mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
* Fix stack overflow on deeply-nested JSON in json.loads()
json.loads() on a deeply-nested array or object payload (e.g.
'[' * 50000 + ']' * 50000) overflowed the native Rust stack and
crashed the interpreter process with SIGSEGV. CPython raises
RecursionError on the same input via _Py_EnterRecursiveCall in
Modules/_json.c.
The recursion lives in the mutual call chain:
JsonScanner::parse_object / parse_array
-> JsonScanner::call_scan_once
-> JsonScanner::parse_object / parse_array
Every descent funnels through call_scan_once, so wrapping its body
with vm.with_recursion covers both '{' and '[' paths (and their
mixed nesting) with a single guard.
Before:
./rustpython -c "import json; json.loads('[' * 50000 + ']' * 50000)"
-> SIGSEGV (exit 139)
After:
-> RecursionError: maximum recursion depth exceeded while
decoding a JSON object from a string
Verified:
- extra_tests/snippets/stdlib_json.py: all assertions pass
(includes 3 new regression cases: array, object, alternating
nesting at depth 100000)
- cargo run -- -m test test_json: 214 passed, 0 regressed
(9 skipped, 13 expected failures, all pre-existing)
- depth 500000 no longer crashes (RecursionError)
- shallow parsing unchanged
* Enable test_highly_nested_objects_decoding
Per @ShaharNaveh's review on #7632: this test was previously marked
`@unittest.skip("TODO: RUSTPYTHON; crashes")` because json.loads
would SIGSEGV on the 500_000-deep input. The recursion-guard added
in this PR makes it raise RecursionError like CPython, so the skip
decorator can be removed.
$ cargo run -- -m unittest \
test.test_json.test_recursion.TestCRecursion.test_highly_nested_objects_decoding \
test.test_json.test_recursion.TestPyRecursion.test_highly_nested_objects_decoding
...
Ran 2 tests in 0.825s
OK
$ cargo run -- -m test test_json
Ran 214 tests (7 skipped, 13 expected failures) — all pass.
123 lines
4.2 KiB
Python
Vendored
123 lines
4.2 KiB
Python
Vendored
from test import support
|
|
from test.test_json import PyTest, CTest
|
|
|
|
import unittest # XXX: RUSTPYTHON; importing to be able to skip tests
|
|
|
|
|
|
class JSONTestObject:
|
|
pass
|
|
|
|
|
|
class TestRecursion:
|
|
def test_listrecursion(self):
|
|
x = []
|
|
x.append(x)
|
|
try:
|
|
self.dumps(x)
|
|
except ValueError as exc:
|
|
self.assertEqual(exc.__notes__, ["when serializing list item 0"])
|
|
else:
|
|
self.fail("didn't raise ValueError on list recursion")
|
|
x = []
|
|
y = [x]
|
|
x.append(y)
|
|
try:
|
|
self.dumps(x)
|
|
except ValueError as exc:
|
|
self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
|
|
else:
|
|
self.fail("didn't raise ValueError on alternating list recursion")
|
|
y = []
|
|
x = [y, y]
|
|
# ensure that the marker is cleared
|
|
self.dumps(x)
|
|
|
|
def test_dictrecursion(self):
|
|
x = {}
|
|
x["test"] = x
|
|
try:
|
|
self.dumps(x)
|
|
except ValueError as exc:
|
|
self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
|
|
else:
|
|
self.fail("didn't raise ValueError on dict recursion")
|
|
x = {}
|
|
y = {"a": x, "b": x}
|
|
# ensure that the marker is cleared
|
|
self.dumps(x)
|
|
|
|
def test_defaultrecursion(self):
|
|
class RecursiveJSONEncoder(self.json.JSONEncoder):
|
|
recurse = False
|
|
def default(self, o):
|
|
if o is JSONTestObject:
|
|
if self.recurse:
|
|
return [JSONTestObject]
|
|
else:
|
|
return 'JSONTestObject'
|
|
return self.json.JSONEncoder.default(o)
|
|
|
|
enc = RecursiveJSONEncoder()
|
|
self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"')
|
|
enc.recurse = True
|
|
try:
|
|
enc.encode(JSONTestObject)
|
|
except ValueError as exc:
|
|
self.assertEqual(exc.__notes__,
|
|
["when serializing list item 0",
|
|
"when serializing type object"])
|
|
else:
|
|
self.fail("didn't raise ValueError on default recursion")
|
|
|
|
|
|
@support.skip_if_unlimited_stack_size
|
|
@support.skip_emscripten_stack_overflow()
|
|
@support.skip_wasi_stack_overflow()
|
|
def test_highly_nested_objects_decoding(self):
|
|
very_deep = 500_000
|
|
# test that loading highly-nested objects doesn't segfault when C
|
|
# accelerations are used. See #12017
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion():
|
|
self.loads('{"a":' * very_deep + '1' + '}' * very_deep)
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion():
|
|
self.loads('{"a":' * very_deep + '[1]' + '}' * very_deep)
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion():
|
|
self.loads('[' * very_deep + '1' + ']' * very_deep)
|
|
|
|
@support.skip_if_unlimited_stack_size
|
|
@support.skip_wasi_stack_overflow()
|
|
@support.skip_emscripten_stack_overflow()
|
|
@support.requires_resource('cpu')
|
|
def test_highly_nested_objects_encoding(self):
|
|
# See #12051
|
|
l, d = [], {}
|
|
for x in range(500_000):
|
|
l, d = [l], {'k':d}
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion(5000):
|
|
self.dumps(l)
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion(5000):
|
|
self.dumps(d)
|
|
|
|
@support.skip_if_unlimited_stack_size
|
|
@support.skip_emscripten_stack_overflow()
|
|
@support.skip_wasi_stack_overflow()
|
|
def test_endless_recursion(self):
|
|
# See #12051
|
|
class EndlessJSONEncoder(self.json.JSONEncoder):
|
|
def default(self, o):
|
|
"""If check_circular is False, this will keep adding another list."""
|
|
return [o]
|
|
|
|
with self.assertRaises(RecursionError):
|
|
with support.infinite_recursion(1000):
|
|
EndlessJSONEncoder(check_circular=False).encode(5j)
|
|
|
|
|
|
class TestPyRecursion(TestRecursion, PyTest): pass
|
|
class TestCRecursion(TestRecursion, CTest): pass
|