Files
RustPython/extra_tests/snippets/stdlib_json.py
Changjoon 625e5bf012 Report invalid \uXXXX escape position at the u character (#7676)
* Report invalid \uXXXX escape position at the u character

CPython's json decoder reports the position of the `u` specifier
when a \uXXXX escape fails to parse, but RustPython was reporting
the preceding `\`. For surrogate-pair cases (\uXXXX\uYYYY) the
second call was passing char_offset + next_char_i + 1, which
lands on the first hex digit of the first escape -- unrelated to
the actual failure site.

Pass next_char_i (position of the primary `u`) to the primary
decode_unicode call, and capture the second `u`'s char index from
the next_tuple peek to pass to the surrogate-pair decode_unicode
call.

Verified: 13 targeted probes across invalid-hex, short, and pair
cases now all match CPython positions. test.test_json 214 tests
pass with no regressions.

* Add regression test for invalid \uXXXX escape position

* Use raise AssertionError instead of assert False (B011)
2026-04-26 12:33:34 +09:00

258 lines
6.6 KiB
Python

import json
from io import BytesIO, StringIO
from testutils import assert_raises
def round_trip_test(obj):
# serde_json and Python's json module produce slightly differently spaced
# output; direct string comparison can't pass on both so we use this as a
# proxy
return obj == json.loads(json.dumps(obj))
def json_dump(obj):
f = StringIO()
json.dump(obj, f)
f.seek(0)
return f.getvalue()
def json_load(obj):
f = StringIO(obj) if isinstance(obj, str) else BytesIO(bytes(obj))
return json.load(f)
assert '"string"' == json.dumps("string")
assert '"string"' == json_dump("string")
assert "1" == json.dumps(1)
assert "1" == json_dump(1)
assert "1.0" == json.dumps(1.0)
assert "1.0" == json_dump(1.0)
assert "true" == json.dumps(True)
assert "true" == json_dump(True)
assert "false" == json.dumps(False)
assert "false" == json_dump(False)
assert "null" == json.dumps(None)
assert "null" == json_dump(None)
assert "[]" == json.dumps([])
assert "[]" == json_dump([])
assert "[1]" == json.dumps([1])
assert "[1]" == json_dump([1])
assert "[[1]]" == json.dumps([[1]])
assert "[[1]]" == json_dump([[1]])
assert round_trip_test([1, "string", 1.0, True])
assert "[]" == json.dumps(())
assert "[]" == json_dump(())
assert "[1]" == json.dumps((1,))
assert "[1]" == json_dump((1,))
assert "[[1]]" == json.dumps(((1,),))
assert "[[1]]" == json_dump(((1,),))
# tuples don't round-trip through json
assert [1, "string", 1.0, True] == json.loads(json.dumps((1, "string", 1.0, True)))
assert "{}" == json.dumps({})
assert "{}" == json_dump({})
assert round_trip_test({"a": "b"})
# should reject non-str keys in jsons
assert_raises(json.JSONDecodeError, lambda: json.loads('{3: "abc"}'))
assert_raises(json.JSONDecodeError, lambda: json_load('{3: "abc"}'))
# should serialize non-str keys as strings
assert json.dumps({"3": "abc"}) == json.dumps({3: "abc"})
assert 1 == json.loads("1")
assert 1 == json.loads(b"1")
assert 1 == json.loads(bytearray(b"1"))
assert 1 == json_load("1")
assert 1 == json_load(b"1")
assert 1 == json_load(bytearray(b"1"))
assert -1 == json.loads("-1")
assert -1 == json.loads(b"-1")
assert -1 == json.loads(bytearray(b"-1"))
assert -1 == json_load("-1")
assert -1 == json_load(b"-1")
assert -1 == json_load(bytearray(b"-1"))
assert 1.0 == json.loads("1.0")
assert 1.0 == json.loads(b"1.0")
assert 1.0 == json.loads(bytearray(b"1.0"))
assert 1.0 == json_load("1.0")
assert 1.0 == json_load(b"1.0")
assert 1.0 == json_load(bytearray(b"1.0"))
assert -1.0 == json.loads("-1.0")
assert -1.0 == json.loads(b"-1.0")
assert -1.0 == json.loads(bytearray(b"-1.0"))
assert -1.0 == json_load("-1.0")
assert -1.0 == json_load(b"-1.0")
assert -1.0 == json_load(bytearray(b"-1.0"))
assert "str" == json.loads('"str"')
assert "str" == json.loads(b'"str"')
assert "str" == json.loads(bytearray(b'"str"'))
assert "str" == json_load('"str"')
assert "str" == json_load(b'"str"')
assert "str" == json_load(bytearray(b'"str"'))
assert True is json.loads("true")
assert True is json.loads(b"true")
assert True is json.loads(bytearray(b"true"))
assert True is json_load("true")
assert True is json_load(b"true")
assert True is json_load(bytearray(b"true"))
assert False is json.loads("false")
assert False is json.loads(b"false")
assert False is json.loads(bytearray(b"false"))
assert False is json_load("false")
assert False is json_load(b"false")
assert False is json_load(bytearray(b"false"))
assert None is json.loads("null")
assert None is json.loads(b"null")
assert None is json.loads(bytearray(b"null"))
assert None is json_load("null")
assert None is json_load(b"null")
assert None is json_load(bytearray(b"null"))
assert [] == json.loads("[]")
assert [] == json.loads(b"[]")
assert [] == json.loads(bytearray(b"[]"))
assert [] == json_load("[]")
assert [] == json_load(b"[]")
assert [] == json_load(bytearray(b"[]"))
assert ["a"] == json.loads('["a"]')
assert ["a"] == json.loads(b'["a"]')
assert ["a"] == json.loads(bytearray(b'["a"]'))
assert ["a"] == json_load('["a"]')
assert ["a"] == json_load(b'["a"]')
assert ["a"] == json_load(bytearray(b'["a"]'))
assert [["a"], "b"] == json.loads('[["a"], "b"]')
assert [["a"], "b"] == json.loads(b'[["a"], "b"]')
assert [["a"], "b"] == json.loads(bytearray(b'[["a"], "b"]'))
assert [["a"], "b"] == json_load('[["a"], "b"]')
assert [["a"], "b"] == json_load(b'[["a"], "b"]')
assert [["a"], "b"] == json_load(bytearray(b'[["a"], "b"]'))
class String(str):
pass
class Bytes(bytes):
pass
class ByteArray(bytearray):
pass
assert "string" == json.loads(String('"string"'))
assert "string" == json.loads(Bytes(b'"string"'))
assert "string" == json.loads(ByteArray(b'"string"'))
assert "string" == json_load(String('"string"'))
assert "string" == json_load(Bytes(b'"string"'))
assert "string" == json_load(ByteArray(b'"string"'))
assert '"string"' == json.dumps(String("string"))
assert '"string"' == json_dump(String("string"))
class Int(int):
pass
class Float(float):
pass
assert "1" == json.dumps(Int(1))
assert "1" == json_dump(Int(1))
assert "0.5" == json.dumps(Float(0.5))
assert "0.5" == json_dump(Float(0.5))
class List(list):
pass
class Tuple(tuple):
pass
class Dict(dict):
pass
assert "[1]" == json.dumps(List([1]))
assert "[1]" == json_dump(List([1]))
assert json.dumps((1, "string", 1.0, True)) == json.dumps(
Tuple((1, "string", 1.0, True))
)
assert json_dump((1, "string", 1.0, True)) == json_dump(Tuple((1, "string", 1.0, True)))
assert json.dumps({"a": "b"}) == json.dumps(Dict({"a": "b"}))
assert json_dump({"a": "b"}) == json_dump(Dict({"a": "b"}))
i = 7**500
assert json.dumps(i) == str(i)
assert json.decoder.scanstring('✨x"', 1) == ("x", 3)
# Recursion guard: deeply-nested input must raise RecursionError instead of
# overflowing the native stack (SIGSEGV). Matches CPython's
# _Py_EnterRecursiveCall in Modules/_json.c.
_deep = 100_000 # well above the ~45k native-stack crash threshold
# Array nesting
assert_raises(RecursionError, lambda: json.loads("[" * _deep + "]" * _deep))
# Object nesting
assert_raises(
RecursionError,
lambda: json.loads('{"a":' * _deep + "1" + "}" * _deep),
)
# Alternating array/object nesting
assert_raises(
RecursionError,
lambda: json.loads(('[{"x":' * _deep) + "1" + ("}]" * _deep)),
)
# Invalid \uXXXX escape: error position points at the 'u', matching CPython.
try:
json.loads('"\\uXYZW"')
except json.JSONDecodeError as e:
assert e.pos == 2, f"expected pos=2, got {e.pos}"
else:
raise AssertionError("expected JSONDecodeError")
try:
json.loads('"abc\\uZZZZ"')
except json.JSONDecodeError as e:
assert e.pos == 5, f"expected pos=5, got {e.pos}"
else:
raise AssertionError("expected JSONDecodeError")