Merge pull request #1957 from RustPython/coolreader18/json-encode_basestring

Implement _json.encode_basestring{,_ascii}
This commit is contained in:
Noah
2020-06-06 17:13:00 -05:00
committed by GitHub
13 changed files with 232 additions and 23 deletions

View File

@@ -14,8 +14,6 @@ class TestSpeedups(CTest):
self.assertEqual(self.json.decoder.scanstring.__module__, "_json")
self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encode_basestring_ascii(self):
self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__,
"_json")

View File

@@ -591,7 +591,7 @@ impl<O: OutputStream> Compiler<O> {
{
return Err(self.error_loc(
CompileErrorType::AsyncReturnValue,
statement.location.clone(),
statement.location,
));
}
self.compile_expression(v)?;

View File

@@ -120,7 +120,7 @@ libz-sys = "1.0"
winreg = "0.7"
schannel = "0.1"
[target."cfg(windows)".dependencies.winapi]
[target.'cfg(windows)'.dependencies.winapi]
version = "0.3"
features = ["winsock2", "handleapi", "ws2def", "std", "winbase", "wincrypt", "fileapi"]

View File

@@ -1,4 +1,4 @@
use crate::obj::objstr::PyString;
use crate::obj::objstr::{PyString, PyStringRef};
use crate::pyhash;
use crate::pyobject::{IdProtocol, IntoPyObject, PyObjectRef, PyResult};
use crate::vm::VirtualMachine;
@@ -438,6 +438,26 @@ impl DictKey for &PyObjectRef {
}
}
impl DictKey for &PyStringRef {
fn do_hash(self, _vm: &VirtualMachine) -> PyResult<HashValue> {
Ok(self.hash())
}
fn do_is(self, other: &PyObjectRef) -> bool {
self.is(other)
}
fn do_eq(self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
if self.is(other_key) {
Ok(true)
} else if let Some(py_str_value) = other_key.payload::<PyString>() {
Ok(py_str_value.as_str() == self.as_str())
} else {
vm.bool_eq(self.clone().into_object(), other_key.clone())
}
}
}
/// Implement trait for the str type, so that we can use strings
/// to index dictionaries.
impl DictKey for &str {

View File

@@ -328,3 +328,17 @@ macro_rules! class_or_notimplemented {
}
};
}
#[macro_export]
macro_rules! named_function {
($ctx:expr, $module:ident, $func:ident) => {{
paste::expr! {
$crate::pyobject::PyContext::new_function_named(
&$ctx,
[<$module _ $func>],
stringify!($module).to_owned(),
stringify!($func).to_owned(),
)
}
}};
}

View File

@@ -1,6 +1,7 @@
use std::fmt;
use crate::function::{OptionalArg, PyFuncArgs, PyNativeFunc};
use crate::obj::objstr::PyStringRef;
use crate::obj::objtype::PyClassRef;
use crate::pyobject::{
IdProtocol, PyClassImpl, PyContext, PyObjectRef, PyResult, PyValue, TypeProtocol,
@@ -11,12 +12,15 @@ use crate::vm::VirtualMachine;
#[pyclass]
pub struct PyBuiltinFunction {
value: PyNativeFunc,
module: Option<PyStringRef>,
name: Option<PyStringRef>,
}
impl PyValue for PyBuiltinFunction {
fn class(vm: &VirtualMachine) -> PyClassRef {
vm.ctx.builtin_function_or_method_type()
}
const HAVE_DICT: bool = true;
}
impl fmt::Debug for PyBuiltinFunction {
@@ -27,7 +31,19 @@ impl fmt::Debug for PyBuiltinFunction {
impl PyBuiltinFunction {
pub fn new(value: PyNativeFunc) -> Self {
Self { value }
Self {
value,
module: None,
name: None,
}
}
pub fn new_with_name(value: PyNativeFunc, module: PyStringRef, name: PyStringRef) -> Self {
Self {
value,
module: Some(module),
name: Some(name),
}
}
pub fn as_func(&self) -> &PyNativeFunc {
@@ -42,7 +58,16 @@ impl SlotCall for PyBuiltinFunction {
}
#[pyimpl(with(SlotCall))]
impl PyBuiltinFunction {}
impl PyBuiltinFunction {
#[pyproperty(magic)]
fn module(&self) -> Option<PyStringRef> {
self.module.clone()
}
#[pyproperty(magic)]
fn name(&self) -> Option<PyStringRef> {
self.name.clone()
}
}
#[pyclass]
pub struct PyBuiltinMethod {
@@ -64,7 +89,12 @@ impl fmt::Debug for PyBuiltinMethod {
impl PyBuiltinMethod {
pub fn new(value: PyNativeFunc) -> Self {
Self {
function: PyBuiltinFunction { value },
function: PyBuiltinFunction::new(value),
}
}
pub fn new_with_name(value: PyNativeFunc, module: PyStringRef, name: PyStringRef) -> Self {
Self {
function: PyBuiltinFunction::new_with_name(value, module, name),
}
}
@@ -100,9 +130,14 @@ impl SlotCall for PyBuiltinMethod {
#[pyimpl(with(SlotDescriptor, SlotCall))]
impl PyBuiltinMethod {
// TODO: give builtin functions names
#[pyproperty(magic)]
fn name(&self) {}
fn module(&self) -> Option<PyStringRef> {
self.function.module.clone()
}
#[pyproperty(magic)]
fn name(&self) -> Option<PyStringRef> {
self.function.name.clone()
}
}
pub fn init(context: &PyContext) {

View File

@@ -292,7 +292,7 @@ impl PyString {
}
#[pymethod(name = "__hash__")]
fn hash(&self) -> pyhash::PyHash {
pub(crate) fn hash(&self) -> pyhash::PyHash {
self.hash.load().unwrap_or_else(|| {
let hash = pyhash::hash_value(&self.value);
self.hash.store(Some(hash));

View File

@@ -274,6 +274,7 @@ impl PyClassRef {
// Search the bases for the proper metatype to deal with this:
let winner = calculate_meta_class(metatype.clone(), &bases, vm)?;
let metatype = if !winner.is(&metatype) {
#[allow(clippy::redundant_clone)] // false positive
if let Some(ref tp_new) = winner.clone().slots.read().unwrap().new {
// Pass it to the winner

View File

@@ -138,7 +138,7 @@ impl PyContext {
let exceptions = exceptions::ExceptionZoo::new(&types.type_type, &types.object_type);
fn create_object<T: PyObjectPayload + PyValue>(payload: T, cls: &PyClassRef) -> PyRef<T> {
PyRef::new_ref_unchecked(PyObject::new(payload, cls.clone(), None))
PyRef::from_obj_unchecked(PyObject::new(payload, cls.clone(), None))
}
let none_type = create_type("NoneType", &types.type_type, &types.object_type);
@@ -492,6 +492,18 @@ impl PyContext {
)
}
pub fn new_function_named<F, T, R, VM>(&self, f: F, module: String, name: String) -> PyObjectRef
where
F: IntoPyNativeFunc<T, R, VM>,
{
let stringref = |s| PyRef::new_ref(objstr::PyString::from(s), self.str_type(), None);
PyObject::new(
PyBuiltinFunction::new_with_name(f.into_func(), stringref(module), stringref(name)),
self.builtin_function_or_method_type(),
None,
)
}
pub fn new_method<F, T, R, VM>(&self, f: F) -> PyObjectRef
where
F: IntoPyNativeFunc<T, R, VM>,
@@ -587,7 +599,7 @@ impl PyContext {
bytecode::Constant::Complex { ref value } => self.new_complex(*value),
bytecode::Constant::String { ref value } => self.new_str(value.clone()),
bytecode::Constant::Bytes { ref value } => self.new_bytes(value.clone()),
bytecode::Constant::Boolean { ref value } => self.new_bool(value.clone()),
bytecode::Constant::Boolean { value } => self.new_bool(value),
bytecode::Constant::Code { ref code } => {
self.new_code_object(*code.clone()).into_object()
}
@@ -695,9 +707,14 @@ impl<T> Clone for PyRef<T> {
}
impl<T: PyValue> PyRef<T> {
fn new_ref(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<Self> {
#[allow(clippy::new_ret_no_self)]
pub fn new_ref(payload: T, typ: PyClassRef, dict: Option<PyDictRef>) -> Self {
Self::from_obj_unchecked(PyObject::new(payload, typ, dict))
}
fn from_obj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<Self> {
if obj.payload_is::<T>() {
Ok(Self::new_ref_unchecked(obj))
Ok(Self::from_obj_unchecked(obj))
} else {
Err(vm.new_runtime_error(format!(
"Unexpected payload for type {:?}",
@@ -706,7 +723,7 @@ impl<T: PyValue> PyRef<T> {
}
}
pub(crate) fn new_ref_unchecked(obj: PyObjectRef) -> Self {
pub(crate) fn from_obj_unchecked(obj: PyObjectRef) -> Self {
PyRef {
obj,
_payload: PhantomData,
@@ -747,7 +764,7 @@ where
{
fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
if objtype::isinstance(&obj, &T::class(vm)) {
PyRef::new_ref(obj, vm)
PyRef::from_obj(obj, vm)
} else {
let class = T::class(vm);
let expected_type = vm.to_pystr(&class)?;
@@ -1084,6 +1101,12 @@ impl<T> IntoPyObject for PyRef<T> {
}
}
impl<T> IntoPyObject for &PyRef<T> {
fn into_pyobject(self, _vm: &VirtualMachine) -> PyResult {
Ok(self.obj.clone())
}
}
impl IntoPyObject for PyCallable {
fn into_pyobject(self, _vm: &VirtualMachine) -> PyResult {
Ok(self.into_object())
@@ -1145,7 +1168,7 @@ where
where
T: PyValue,
{
PyRef::new_ref_unchecked(self as PyObjectRef)
PyRef::from_obj_unchecked(self as PyObjectRef)
}
}
@@ -1210,7 +1233,7 @@ pub trait PyValue: fmt::Debug + Send + Sync + Sized + 'static {
} else {
Some(vm.ctx.new_dict())
};
PyRef::new_ref(PyObject::new(self, cls, dict), vm)
PyRef::from_obj(PyObject::new(self, cls, dict), vm)
} else {
let subtype = vm.to_str(&cls.obj)?;
let basetype = vm.to_str(&class.obj)?;
@@ -1219,7 +1242,7 @@ pub trait PyValue: fmt::Debug + Send + Sync + Sized + 'static {
}
fn into_ref_with_type_unchecked(self, cls: PyClassRef, dict: Option<PyDictRef>) -> PyRef<Self> {
PyRef::new_ref_unchecked(PyObject::new(self, cls, dict))
PyRef::from_obj_unchecked(PyObject::new(self, cls, dict))
}
}

View File

@@ -39,7 +39,6 @@ impl Scope {
) -> Scope {
if !globals.contains_key("__builtins__", vm) {
globals
.clone()
.set_item("__builtins__", vm.builtins.clone(), vm)
.unwrap();
}

View File

@@ -7,6 +7,8 @@ use crate::VirtualMachine;
use num_bigint::BigInt;
use std::str::FromStr;
mod machinery;
#[pyclass(name = "Scanner")]
#[derive(Debug)]
struct JsonScanner {
@@ -209,11 +211,30 @@ impl JsonScanner {
}
}
fn encode_string(s: &str, ascii_only: bool) -> String {
let mut buf = Vec::<u8>::with_capacity(s.len() + 2);
machinery::write_json_string(s, ascii_only, &mut buf)
// writing to a vec can't fail
.unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() });
// TODO: verify that the implementation is correct enough to use `from_utf8_unchecked`
String::from_utf8(buf).expect("invalid utf-8 in json output")
}
fn _json_encode_basestring(s: PyStringRef) -> String {
encode_string(s.as_str(), false)
}
fn _json_encode_basestring_ascii(s: PyStringRef) -> String {
encode_string(s.as_str(), true)
}
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
let ctx = &vm.ctx;
let scanner_cls = JsonScanner::make_class(ctx);
scanner_cls.set_str_attr("__module__", vm.new_str("_json".to_owned()));
py_module!(vm, "_json", {
"make_scanner" => scanner_cls,
"encode_basestring" => named_function!(ctx, _json, encode_basestring),
"encode_basestring_ascii" => named_function!(ctx, _json, encode_basestring_ascii),
})
}

View File

@@ -0,0 +1,98 @@
// derived from https://github.com/lovasoa/json_in_type
// BSD 2-Clause License
//
// Copyright (c) 2018, Ophir LOJKINE
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
use std::io;
static ESCAPE_CHARS: [&[u8]; 0x20] = [
b"\\u0000", b"\\u0001", b"\\u0002", b"\\u0003", b"\\u0004", b"\\u0005", b"\\u0006", b"\\u0007",
b"\\b", b"\\t", b"\\n", b"\\u000b", b"\\f", b"\\r", b"\\u000e", b"\\u000f", b"\\u0010",
b"\\u0011", b"\\u0012", b"\\u0013", b"\\u0014", b"\\u0015", b"\\u0016", b"\\u0017", b"\\u0018",
b"\\u0019", b"\\u001a", b"\\u001b", b"\\u001c", b"\\u001d", b"\\u001e", b"\\u001f",
];
// This bitset represents which bytes can be copied as-is to a JSON string (0)
// And which one need to be escaped (1)
// The characters that need escaping are 0x00 to 0x1F, 0x22 ("), 0x5C (\), 0x7F (DEL)
// Non-ASCII unicode characters can be safely included in a JSON string
static NEEDS_ESCAPING_BITSET: [u64; 4] = [
//fedcba9876543210_fedcba9876543210_fedcba9876543210_fedcba9876543210
0b0000000000000000_0000000000000100_1111111111111111_1111111111111111, // 3_2_1_0
0b1000000000000000_0000000000000000_0001000000000000_0000000000000000, // 7_6_5_4
0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // B_A_9_8
0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // F_E_D_C
];
#[inline(always)]
fn json_escaped_char(c: u8) -> Option<&'static [u8]> {
let bitset_value = NEEDS_ESCAPING_BITSET[(c / 64) as usize] & (1 << (c % 64));
if bitset_value == 0 {
None
} else {
Some(match c {
x if x < 0x20 => ESCAPE_CHARS[c as usize],
b'\\' => &b"\\\\"[..],
b'\"' => &b"\\\""[..],
0x7F => &b"\\u007f"[..],
_ => unreachable!(),
})
}
}
pub fn write_json_string<W: io::Write>(s: &str, ascii_only: bool, w: &mut W) -> io::Result<()> {
w.write_all(b"\"")?;
let mut write_start_idx = 0;
let bytes = s.as_bytes();
if ascii_only {
for (idx, c) in s.char_indices() {
if c.is_ascii() {
if let Some(escaped) = json_escaped_char(c as u8) {
w.write_all(&bytes[write_start_idx..idx])?;
w.write_all(escaped)?;
write_start_idx = idx + 1;
}
} else {
w.write_all(&bytes[write_start_idx..idx])?;
write_start_idx = idx + c.len_utf8();
// codepoints outside the BMP get 2 '\uxxxx' sequences to represent them
for point in c.encode_utf16(&mut [0; 2]) {
write!(w, "\\u{:04x}", point)?;
}
}
}
} else {
for (idx, c) in s.bytes().enumerate() {
if let Some(escaped) = json_escaped_char(c) {
w.write_all(&bytes[write_start_idx..idx])?;
w.write_all(escaped)?;
write_start_idx = idx + 1;
}
}
}
w.write_all(&bytes[write_start_idx..])?;
w.write_all(b"\"")
}

View File

@@ -338,8 +338,8 @@ fn init_type_hierarchy() -> (PyClassRef, PyClassRef) {
type_type,
);
let type_type = PyClassRef::new_ref_unchecked(Arc::from_raw(type_type_ptr));
let object_type = PyClassRef::new_ref_unchecked(Arc::from_raw(object_type_ptr));
let type_type = PyClassRef::from_obj_unchecked(Arc::from_raw(type_type_ptr));
let object_type = PyClassRef::from_obj_unchecked(Arc::from_raw(object_type_ptr));
(*type_type_ptr).payload.mro = vec![object_type.clone()];
(*type_type_ptr).payload.bases = vec![object_type.clone()];