improve (de)serialization: no crash on BigInts, non-string map keys

This commit is contained in:
silmeth
2019-06-30 14:43:36 +02:00
parent a332b74ad8
commit 5dddd02bca
3 changed files with 64 additions and 50 deletions

View File

@@ -1,3 +1,4 @@
from testutils import assert_raises
import json
def round_trip_test(obj):
@@ -27,6 +28,12 @@ assert [1, "string", 1.0, True] == json.loads(json.dumps((1, "string", 1.0, True
assert '{}' == json.dumps({})
round_trip_test({'a': 'b'})
# should reject non-str keys in jsons
assert_raises(json.JSONDecodeError, lambda: json.loads('{3: "abc"}'))
# should serialize non-str keys as strings
assert json.dumps({'3': 'abc'}) == json.dumps({3: 'abc'})
assert 1 == json.loads("1")
assert -1 == json.loads("-1")
assert 1.0 == json.loads("1.0")
@@ -44,12 +51,23 @@ class String(str): pass
assert "string" == json.loads(String('"string"'))
assert '"string"' == json.dumps(String("string"))
# TODO: Uncomment and test once int/float construction is supported
# class Int(int): pass
# class Float(float): pass
class Int(int): pass
class Float(float): pass
# TODO: Uncomment and test once sequence/dict subclasses are supported by
# json.dumps
# class List(list): pass
# class Tuple(tuple): pass
# class Dict(dict): pass
assert '1' == json.dumps(Int(1))
assert '0.5' == json.dumps(Float(0.5))
class List(list): pass
class Tuple(tuple): pass
class Dict(dict): pass
assert '[1]' == json.dumps(List([1]))
assert json.dumps((1, "string", 1.0, True)) == json.dumps(Tuple((1, "string", 1.0, True)))
assert json.dumps({'a': 'b'}) == json.dumps(Dict({'a': 'b'}))
# big ints should not crash VM
# TODO: test for correct output when actual serialization implemented and doesnt throw
try:
json.dumps(7*500)
except:
pass

View File

@@ -33,11 +33,11 @@ assert_raises(
assert_raises(
OverflowError,
lambda: round(float('inf')),
'OverflowError: cannot convert float NaN to integer')
'OverflowError: cannot convert float infinity to integer')
assert_raises(
OverflowError,
lambda: round(-float('inf')),
'OverflowError: cannot convert float NaN to integer')
'OverflowError: cannot convert float infinity to integer')
assert pow(0, 0) == 1
assert pow(2, 2) == 4

View File

@@ -4,16 +4,11 @@ use serde;
use serde::de::{DeserializeSeed, Visitor};
use serde::ser::{Serialize, SerializeMap, SerializeSeq};
use crate::obj::{
objbool,
objdict::PyDictRef,
objfloat, objint, objsequence,
objstr::{self, PyString},
objtype,
};
use crate::obj::{objbool, objdict::PyDictRef, objfloat, objint, objsequence, objstr, objtype};
use crate::pyobject::{IdProtocol, ItemProtocol, PyObjectRef, TypeProtocol};
use crate::VirtualMachine;
use num_traits::cast::ToPrimitive;
use num_traits::sign::Signed;
#[inline]
pub fn serialize<S>(
@@ -79,9 +74,16 @@ impl<'s> serde::Serialize for PyObjectSerializer<'s> {
serializer.serialize_bool(objbool::get_value(self.pyobject))
} else if objtype::isinstance(self.pyobject, &self.vm.ctx.int_type()) {
let v = objint::get_value(self.pyobject);
serializer.serialize_i64(v.to_i64().unwrap())
// Although this may seem nice, it does not give the right result:
// v.serialize(serializer)
let int_too_large = || serde::ser::Error::custom("int too large to serialize");
// TODO: serialize BigInt when it does not fit into i64
// BigInt implements serialization to a tuple of sign and a list of u32s,
// eg. -1 is [-1, [1]], 0 is [0, []], 12345678900000654321 is [1, [2710766577,2874452364]]
// CPython serializes big ints as long decimal integer literals
if v.is_positive() {
serializer.serialize_u64(v.to_u64().ok_or_else(int_too_large)?)
} else {
serializer.serialize_i64(v.to_i64().ok_or_else(int_too_large)?)
}
} else if objtype::isinstance(self.pyobject, &self.vm.ctx.list_type()) {
let elements = objsequence::get_elements_list(self.pyobject);
serialize_seq_elements(serializer, &elements)
@@ -138,35 +140,26 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> {
formatter.write_str("a type that can deserialise in Python")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.ctx.new_str(value.to_string()))
}
fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.ctx.new_str(value))
Ok(self.vm.ctx.new_bool(value))
}
// Other signed integers delegate to this method by default, its the only one needed
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
// The JSON deserializer always uses the i64/u64 deserializers, so we only need to
// implement those for now
Ok(self.vm.ctx.new_int(value))
}
// Other unsigned integers delegate to this method by default, its the only one needed
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
// The JSON deserializer always uses the i64/u64 deserializers, so we only need to
// implement those for now
Ok(self.vm.ctx.new_int(value))
}
@@ -177,11 +170,26 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> {
Ok(self.vm.ctx.new_float(value))
}
fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E>
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.ctx.new_bool(value))
// Owned value needed anyway, delegate to visit_string
self.visit_string(value.to_string())
}
fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.ctx.new_str(value))
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.get_none())
}
fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
@@ -200,23 +208,11 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> {
M: serde::de::MapAccess<'de>,
{
let dict = self.vm.ctx.new_dict();
// TODO: Given keys must be strings, we can probably do something more efficient
// than wrapping the given object up and then unwrapping it to determine whether or
// not it is a string
// Although JSON keys must be strings, implementation accepts any keys
// and can be reused by other deserializers without such limit
while let Some((key_obj, value)) = access.next_entry_seed(self.clone(), self.clone())? {
let key: String = match key_obj.payload::<PyString>() {
Some(PyString { ref value }) => value.clone(),
_ => unimplemented!("map keys must be strings"),
};
dict.set_item(&key, value, self.vm).unwrap();
dict.set_item(key_obj, value, self.vm).unwrap();
}
Ok(dict.into_object())
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.vm.get_none())
}
}