From 5dddd02bca1bb0bb34e329d44fe8b7186cae0075 Mon Sep 17 00:00:00 2001 From: silmeth Date: Sun, 30 Jun 2019 14:43:36 +0200 Subject: [PATCH] improve (de)serialization: no crash on BigInts, non-string map keys --- tests/snippets/json_snippet.py | 34 +++++++++++---- tests/snippets/math_basics.py | 4 +- vm/src/py_serde.rs | 76 ++++++++++++++++------------------ 3 files changed, 64 insertions(+), 50 deletions(-) diff --git a/tests/snippets/json_snippet.py b/tests/snippets/json_snippet.py index 224fde8ca..a8c0ecc19 100644 --- a/tests/snippets/json_snippet.py +++ b/tests/snippets/json_snippet.py @@ -1,3 +1,4 @@ +from testutils import assert_raises import json def round_trip_test(obj): @@ -27,6 +28,12 @@ assert [1, "string", 1.0, True] == json.loads(json.dumps((1, "string", 1.0, True assert '{}' == json.dumps({}) round_trip_test({'a': 'b'}) +# should reject non-str keys in jsons +assert_raises(json.JSONDecodeError, lambda: json.loads('{3: "abc"}')) + +# should serialize non-str keys as strings +assert json.dumps({'3': 'abc'}) == json.dumps({3: 'abc'}) + assert 1 == json.loads("1") assert -1 == json.loads("-1") assert 1.0 == json.loads("1.0") @@ -44,12 +51,23 @@ class String(str): pass assert "string" == json.loads(String('"string"')) assert '"string"' == json.dumps(String("string")) -# TODO: Uncomment and test once int/float construction is supported -# class Int(int): pass -# class Float(float): pass +class Int(int): pass +class Float(float): pass -# TODO: Uncomment and test once sequence/dict subclasses are supported by -# json.dumps -# class List(list): pass -# class Tuple(tuple): pass -# class Dict(dict): pass +assert '1' == json.dumps(Int(1)) +assert '0.5' == json.dumps(Float(0.5)) + +class List(list): pass +class Tuple(tuple): pass +class Dict(dict): pass + +assert '[1]' == json.dumps(List([1])) +assert json.dumps((1, "string", 1.0, True)) == json.dumps(Tuple((1, "string", 1.0, True))) +assert json.dumps({'a': 'b'}) == json.dumps(Dict({'a': 'b'})) + +# big ints should not crash VM +# TODO: test for correct output when actual serialization implemented and doesn’t throw +try: + json.dumps(7*500) +except: + pass diff --git a/tests/snippets/math_basics.py b/tests/snippets/math_basics.py index d950b13a9..f02ba3e60 100644 --- a/tests/snippets/math_basics.py +++ b/tests/snippets/math_basics.py @@ -33,11 +33,11 @@ assert_raises( assert_raises( OverflowError, lambda: round(float('inf')), - 'OverflowError: cannot convert float NaN to integer') + 'OverflowError: cannot convert float infinity to integer') assert_raises( OverflowError, lambda: round(-float('inf')), - 'OverflowError: cannot convert float NaN to integer') + 'OverflowError: cannot convert float infinity to integer') assert pow(0, 0) == 1 assert pow(2, 2) == 4 diff --git a/vm/src/py_serde.rs b/vm/src/py_serde.rs index 10901bf7a..5e43c43a7 100644 --- a/vm/src/py_serde.rs +++ b/vm/src/py_serde.rs @@ -4,16 +4,11 @@ use serde; use serde::de::{DeserializeSeed, Visitor}; use serde::ser::{Serialize, SerializeMap, SerializeSeq}; -use crate::obj::{ - objbool, - objdict::PyDictRef, - objfloat, objint, objsequence, - objstr::{self, PyString}, - objtype, -}; +use crate::obj::{objbool, objdict::PyDictRef, objfloat, objint, objsequence, objstr, objtype}; use crate::pyobject::{IdProtocol, ItemProtocol, PyObjectRef, TypeProtocol}; use crate::VirtualMachine; use num_traits::cast::ToPrimitive; +use num_traits::sign::Signed; #[inline] pub fn serialize( @@ -79,9 +74,16 @@ impl<'s> serde::Serialize for PyObjectSerializer<'s> { serializer.serialize_bool(objbool::get_value(self.pyobject)) } else if objtype::isinstance(self.pyobject, &self.vm.ctx.int_type()) { let v = objint::get_value(self.pyobject); - serializer.serialize_i64(v.to_i64().unwrap()) - // Although this may seem nice, it does not give the right result: - // v.serialize(serializer) + let int_too_large = || serde::ser::Error::custom("int too large to serialize"); + // TODO: serialize BigInt when it does not fit into i64 + // BigInt implements serialization to a tuple of sign and a list of u32s, + // eg. -1 is [-1, [1]], 0 is [0, []], 12345678900000654321 is [1, [2710766577,2874452364]] + // CPython serializes big ints as long decimal integer literals + if v.is_positive() { + serializer.serialize_u64(v.to_u64().ok_or_else(int_too_large)?) + } else { + serializer.serialize_i64(v.to_i64().ok_or_else(int_too_large)?) + } } else if objtype::isinstance(self.pyobject, &self.vm.ctx.list_type()) { let elements = objsequence::get_elements_list(self.pyobject); serialize_seq_elements(serializer, &elements) @@ -138,35 +140,26 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> { formatter.write_str("a type that can deserialise in Python") } - fn visit_str(self, value: &str) -> Result + fn visit_bool(self, value: bool) -> Result where E: serde::de::Error, { - Ok(self.vm.ctx.new_str(value.to_string())) - } - - fn visit_string(self, value: String) -> Result - where - E: serde::de::Error, - { - Ok(self.vm.ctx.new_str(value)) + Ok(self.vm.ctx.new_bool(value)) } + // Other signed integers delegate to this method by default, it’s the only one needed fn visit_i64(self, value: i64) -> Result where E: serde::de::Error, { - // The JSON deserializer always uses the i64/u64 deserializers, so we only need to - // implement those for now Ok(self.vm.ctx.new_int(value)) } + // Other unsigned integers delegate to this method by default, it’s the only one needed fn visit_u64(self, value: u64) -> Result where E: serde::de::Error, { - // The JSON deserializer always uses the i64/u64 deserializers, so we only need to - // implement those for now Ok(self.vm.ctx.new_int(value)) } @@ -177,11 +170,26 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> { Ok(self.vm.ctx.new_float(value)) } - fn visit_bool(self, value: bool) -> Result + fn visit_str(self, value: &str) -> Result where E: serde::de::Error, { - Ok(self.vm.ctx.new_bool(value)) + // Owned value needed anyway, delegate to visit_string + self.visit_string(value.to_string()) + } + + fn visit_string(self, value: String) -> Result + where + E: serde::de::Error, + { + Ok(self.vm.ctx.new_str(value)) + } + + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + Ok(self.vm.get_none()) } fn visit_seq(self, mut access: A) -> Result @@ -200,23 +208,11 @@ impl<'de> Visitor<'de> for PyObjectDeserializer<'de> { M: serde::de::MapAccess<'de>, { let dict = self.vm.ctx.new_dict(); - // TODO: Given keys must be strings, we can probably do something more efficient - // than wrapping the given object up and then unwrapping it to determine whether or - // not it is a string + // Although JSON keys must be strings, implementation accepts any keys + // and can be reused by other deserializers without such limit while let Some((key_obj, value)) = access.next_entry_seed(self.clone(), self.clone())? { - let key: String = match key_obj.payload::() { - Some(PyString { ref value }) => value.clone(), - _ => unimplemented!("map keys must be strings"), - }; - dict.set_item(&key, value, self.vm).unwrap(); + dict.set_item(key_obj, value, self.vm).unwrap(); } Ok(dict.into_object()) } - - fn visit_unit(self) -> Result - where - E: serde::de::Error, - { - Ok(self.vm.get_none()) - } }