forked from Rust-related/RustPython
Merge pull request #1889 from RustPython/coolreader18/_json-optimizations
Add the _json module and _json.make_scanner
This commit is contained in:
@@ -6,11 +6,12 @@ import unittest
|
||||
from test import support
|
||||
|
||||
# import json with and without accelerations
|
||||
cjson = support.import_fresh_module('json', fresh=['_json'])
|
||||
# XXX RUSTPYTHON: we don't import _json as fresh since the fresh module isn't placed
|
||||
# into the sys.modules cache, and therefore the vm can't recognize the _json.Scanner class
|
||||
cjson = support.import_fresh_module('json') #, fresh=['_json'])
|
||||
pyjson = support.import_fresh_module('json', blocked=['_json'])
|
||||
# JSONDecodeError is cached inside the _json module
|
||||
# XXX RustPython TODO: _json module
|
||||
# cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
|
||||
cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
|
||||
|
||||
# create two base classes that will be used by the other tests
|
||||
class PyTest(unittest.TestCase):
|
||||
@@ -38,6 +39,7 @@ class TestPyTest(PyTest):
|
||||
'json.encoder')
|
||||
|
||||
class TestCTest(CTest):
|
||||
@unittest.expectedFailure
|
||||
def test_cjson(self):
|
||||
self.assertEqual(self.json.scanner.make_scanner.__module__, '_json')
|
||||
self.assertEqual(self.json.decoder.scanstring.__module__, '_json')
|
||||
|
||||
@@ -3,6 +3,8 @@ from io import StringIO
|
||||
from collections import OrderedDict
|
||||
from test.test_json import PyTest, CTest
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
class TestDecode:
|
||||
def test_decimal(self):
|
||||
@@ -55,6 +57,7 @@ class TestDecode:
|
||||
self.assertIs(a, c)
|
||||
self.assertIs(b, d)
|
||||
|
||||
@unittest.skip("TODO: RUSTPYTHON: cache/memoize keys")
|
||||
def test_keys_reuse(self):
|
||||
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
|
||||
self.check_keys_reuse(s, self.loads)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from test.test_json import CTest
|
||||
|
||||
import unittest
|
||||
|
||||
class BadBool:
|
||||
def __bool__(self):
|
||||
@@ -7,10 +8,14 @@ class BadBool:
|
||||
|
||||
|
||||
class TestSpeedups(CTest):
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_scanstring(self):
|
||||
self.assertEqual(self.json.decoder.scanstring.__module__, "_json")
|
||||
self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_encode_basestring_ascii(self):
|
||||
self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__,
|
||||
"_json")
|
||||
@@ -38,6 +43,7 @@ class TestEncode(CTest):
|
||||
b"\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75",
|
||||
None)
|
||||
|
||||
@unittest.skip("TODO: RUSTPYTHON, translate the encoder to Rust")
|
||||
def test_bad_str_encoder(self):
|
||||
# Issue #31505: There shouldn't be an assertion failure in case
|
||||
# c_make_encoder() receives a bad encoder() argument.
|
||||
@@ -59,6 +65,8 @@ class TestEncode(CTest):
|
||||
with self.assertRaises(ZeroDivisionError):
|
||||
enc('spam', 4)
|
||||
|
||||
# TODO: RUSTPYTHON, translate the encoder to Rust
|
||||
@unittest.expectedFailure
|
||||
def test_bad_bool_args(self):
|
||||
def test(name):
|
||||
self.json.encoder.JSONEncoder(**{name: BadBool()}).encode({'a': 1})
|
||||
|
||||
@@ -80,6 +80,10 @@ pub fn new_stop_iteration(vm: &VirtualMachine) -> PyBaseExceptionRef {
|
||||
let stop_iteration_type = vm.ctx.exceptions.stop_iteration.clone();
|
||||
vm.new_exception_empty(stop_iteration_type)
|
||||
}
|
||||
pub fn stop_iter_with_value(val: PyObjectRef, vm: &VirtualMachine) -> PyBaseExceptionRef {
|
||||
let stop_iteration_type = vm.ctx.exceptions.stop_iteration.clone();
|
||||
vm.new_exception(stop_iteration_type, vec![val])
|
||||
}
|
||||
|
||||
pub fn stop_iter_value(vm: &VirtualMachine, exc: &PyBaseExceptionRef) -> PyResult {
|
||||
let args = exc.args();
|
||||
|
||||
219
vm/src/stdlib/json.rs
Normal file
219
vm/src/stdlib/json.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
use crate::obj::objiter;
|
||||
use crate::obj::objstr::PyStringRef;
|
||||
use crate::obj::{objbool, objtype::PyClassRef};
|
||||
use crate::pyobject::{IdProtocol, PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue};
|
||||
use crate::VirtualMachine;
|
||||
|
||||
use num_bigint::BigInt;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[pyclass(name = "Scanner")]
|
||||
#[derive(Debug)]
|
||||
struct JsonScanner {
|
||||
strict: bool,
|
||||
object_hook: Option<PyObjectRef>,
|
||||
object_pairs_hook: Option<PyObjectRef>,
|
||||
parse_float: Option<PyObjectRef>,
|
||||
parse_int: Option<PyObjectRef>,
|
||||
parse_constant: PyObjectRef,
|
||||
ctx: PyObjectRef,
|
||||
}
|
||||
|
||||
impl PyValue for JsonScanner {
|
||||
fn class(vm: &VirtualMachine) -> PyClassRef {
|
||||
vm.class("_json", "make_scanner")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl JsonScanner {
|
||||
#[pyslot]
|
||||
fn tp_new(cls: PyClassRef, ctx: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
|
||||
let strict = objbool::boolval(vm, vm.get_attribute(ctx.clone(), "strict")?)?;
|
||||
let object_hook = vm.option_if_none(vm.get_attribute(ctx.clone(), "object_hook")?);
|
||||
let object_pairs_hook =
|
||||
vm.option_if_none(vm.get_attribute(ctx.clone(), "object_pairs_hook")?);
|
||||
let parse_float = vm.get_attribute(ctx.clone(), "parse_float")?;
|
||||
let parse_float = if vm.is_none(&parse_float) || parse_float.is(&vm.ctx.types.float_type) {
|
||||
None
|
||||
} else {
|
||||
Some(parse_float)
|
||||
};
|
||||
let parse_int = vm.get_attribute(ctx.clone(), "parse_int")?;
|
||||
let parse_int = if vm.is_none(&parse_int) || parse_int.is(&vm.ctx.types.int_type) {
|
||||
None
|
||||
} else {
|
||||
Some(parse_int)
|
||||
};
|
||||
let parse_constant = vm.get_attribute(ctx.clone(), "parse_constant")?;
|
||||
|
||||
Self {
|
||||
strict,
|
||||
object_hook,
|
||||
object_pairs_hook,
|
||||
parse_float,
|
||||
parse_int,
|
||||
parse_constant,
|
||||
ctx,
|
||||
}
|
||||
.into_ref_with_type(vm, cls)
|
||||
}
|
||||
|
||||
fn parse(
|
||||
&self,
|
||||
s: &str,
|
||||
pystr: PyStringRef,
|
||||
idx: usize,
|
||||
scan_once: PyObjectRef,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult {
|
||||
let c = s
|
||||
.chars()
|
||||
.next()
|
||||
.ok_or_else(|| objiter::stop_iter_with_value(vm.new_int(idx), vm))?;
|
||||
let next_idx = idx + c.len_utf8();
|
||||
match c {
|
||||
'"' => {
|
||||
// TODO: parse the string in rust
|
||||
let parse_str = vm.get_attribute(self.ctx.clone(), "parse_string")?;
|
||||
return vm.invoke(
|
||||
&parse_str,
|
||||
vec![
|
||||
pystr.into_object(),
|
||||
vm.new_int(next_idx),
|
||||
vm.new_bool(self.strict),
|
||||
],
|
||||
);
|
||||
}
|
||||
'{' => {
|
||||
// TODO: parse the object in rust
|
||||
let parse_obj = vm.get_attribute(self.ctx.clone(), "parse_object")?;
|
||||
return vm.invoke(
|
||||
&parse_obj,
|
||||
vec![
|
||||
vm.ctx
|
||||
.new_tuple(vec![pystr.into_object(), vm.new_int(next_idx)]),
|
||||
vm.new_bool(self.strict),
|
||||
scan_once,
|
||||
self.object_hook.clone().unwrap_or_else(|| vm.get_none()),
|
||||
self.object_pairs_hook
|
||||
.clone()
|
||||
.unwrap_or_else(|| vm.get_none()),
|
||||
],
|
||||
);
|
||||
}
|
||||
'[' => {
|
||||
// TODO: parse the array in rust
|
||||
let parse_array = vm.get_attribute(self.ctx.clone(), "parse_array")?;
|
||||
return vm.invoke(
|
||||
&parse_array,
|
||||
vec![
|
||||
vm.ctx
|
||||
.new_tuple(vec![pystr.into_object(), vm.new_int(next_idx)]),
|
||||
scan_once,
|
||||
],
|
||||
);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
macro_rules! parse_const {
|
||||
($s:literal, $val:expr) => {
|
||||
if s.starts_with($s) {
|
||||
return Ok(vm.ctx.new_tuple(vec![$val, vm.new_int(idx + $s.len())]));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
parse_const!("null", vm.get_none());
|
||||
parse_const!("true", vm.new_bool(true));
|
||||
parse_const!("false", vm.new_bool(false));
|
||||
|
||||
if let Some((res, len)) = self.parse_number(s, vm) {
|
||||
return Ok(vm.ctx.new_tuple(vec![res?, vm.new_int(idx + len)]));
|
||||
}
|
||||
|
||||
macro_rules! parse_constant {
|
||||
($s:literal) => {
|
||||
if s.starts_with($s) {
|
||||
return Ok(vm.ctx.new_tuple(vec![
|
||||
vm.invoke(&self.parse_constant, vec![vm.new_str($s.to_owned())])?,
|
||||
vm.new_int(idx + $s.len()),
|
||||
]));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
parse_constant!("NaN");
|
||||
parse_constant!("Infinity");
|
||||
parse_constant!("-Infinity");
|
||||
|
||||
Err(objiter::stop_iter_with_value(vm.new_int(idx), vm))
|
||||
}
|
||||
|
||||
fn parse_number(&self, s: &str, vm: &VirtualMachine) -> Option<(PyResult, usize)> {
|
||||
let mut has_neg = false;
|
||||
let mut has_decimal = false;
|
||||
let mut has_exponent = false;
|
||||
let mut has_e_sign = false;
|
||||
let mut i = 0;
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'-' if i == 0 => has_neg = true,
|
||||
n if n.is_ascii_digit() => {}
|
||||
'.' if !has_decimal => has_decimal = true,
|
||||
'e' | 'E' if !has_exponent => has_exponent = true,
|
||||
'+' | '-' if !has_e_sign => has_e_sign = true,
|
||||
_ => break,
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if i == 0 || (i == 1 && has_neg) {
|
||||
return None;
|
||||
}
|
||||
let buf = &s[..i];
|
||||
let ret = if has_decimal || has_exponent {
|
||||
// float
|
||||
if let Some(ref parse_float) = self.parse_float {
|
||||
vm.invoke(parse_float, vec![vm.new_str(buf.to_owned())])
|
||||
} else {
|
||||
Ok(vm.ctx.new_float(f64::from_str(buf).unwrap()))
|
||||
}
|
||||
} else if let Some(ref parse_int) = self.parse_int {
|
||||
vm.invoke(parse_int, vec![vm.new_str(buf.to_owned())])
|
||||
} else {
|
||||
Ok(vm.new_int(BigInt::from_str(buf).unwrap()))
|
||||
};
|
||||
Some((ret, buf.len()))
|
||||
}
|
||||
|
||||
#[pyslot]
|
||||
fn call(zelf: PyRef<Self>, pystr: PyStringRef, idx: isize, vm: &VirtualMachine) -> PyResult {
|
||||
if idx < 0 {
|
||||
return Err(vm.new_value_error("idx cannot be negative".to_owned()));
|
||||
}
|
||||
let idx = idx as usize;
|
||||
let mut chars = pystr.as_str().chars();
|
||||
if idx > 0 {
|
||||
chars
|
||||
.nth(idx - 1)
|
||||
.ok_or_else(|| objiter::stop_iter_with_value(vm.new_int(idx), vm))?;
|
||||
}
|
||||
zelf.parse(
|
||||
chars.as_str(),
|
||||
pystr.clone(),
|
||||
idx,
|
||||
zelf.clone().into_object(),
|
||||
vm,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
let ctx = &vm.ctx;
|
||||
let scanner_cls = JsonScanner::make_class(ctx);
|
||||
scanner_cls.set_str_attr("__module__", vm.new_str("_json".to_owned()));
|
||||
py_module!(vm, "_json", {
|
||||
"make_scanner" => scanner_cls,
|
||||
})
|
||||
}
|
||||
@@ -15,6 +15,7 @@ mod hashlib;
|
||||
mod imp;
|
||||
pub mod io;
|
||||
mod itertools;
|
||||
mod json;
|
||||
#[cfg(feature = "rustpython-parser")]
|
||||
mod keyword;
|
||||
mod marshal;
|
||||
@@ -71,14 +72,15 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
|
||||
let mut modules = hashmap! {
|
||||
"array".to_owned() => Box::new(array::make_module) as StdlibInitFunc,
|
||||
"binascii".to_owned() => Box::new(binascii::make_module),
|
||||
"dis".to_owned() => Box::new(dis::make_module),
|
||||
"_collections".to_owned() => Box::new(collections::make_module),
|
||||
"_csv".to_owned() => Box::new(csv::make_module),
|
||||
"_functools".to_owned() => Box::new(functools::make_module),
|
||||
"dis".to_owned() => Box::new(dis::make_module),
|
||||
"errno".to_owned() => Box::new(errno::make_module),
|
||||
"_functools".to_owned() => Box::new(functools::make_module),
|
||||
"hashlib".to_owned() => Box::new(hashlib::make_module),
|
||||
"itertools".to_owned() => Box::new(itertools::make_module),
|
||||
"_io".to_owned() => Box::new(io::make_module),
|
||||
"_json".to_owned() => Box::new(json::make_module),
|
||||
"marshal".to_owned() => Box::new(marshal::make_module),
|
||||
"math".to_owned() => Box::new(math::make_module),
|
||||
"_operator".to_owned() => Box::new(operator::make_module),
|
||||
|
||||
@@ -582,6 +582,13 @@ impl VirtualMachine {
|
||||
pub fn is_none(&self, obj: &PyObjectRef) -> bool {
|
||||
obj.is(&self.get_none())
|
||||
}
|
||||
pub fn option_if_none(&self, obj: PyObjectRef) -> Option<PyObjectRef> {
|
||||
if self.is_none(&obj) {
|
||||
None
|
||||
} else {
|
||||
Some(obj)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_type(&self) -> PyClassRef {
|
||||
self.ctx.type_type()
|
||||
|
||||
Reference in New Issue
Block a user