Files
RustPython/Lib/test/test_json/test_recursion.py
Changjoon 175f12b664 Fix stack overflow on deeply-nested JSON in json.loads() (#7632)
* Fix stack overflow on deeply-nested JSON in json.loads()

json.loads() on a deeply-nested array or object payload (e.g.
'[' * 50000 + ']' * 50000) overflowed the native Rust stack and
crashed the interpreter process with SIGSEGV. CPython raises
RecursionError on the same input via _Py_EnterRecursiveCall in
Modules/_json.c.

The recursion lives in the mutual call chain:
  JsonScanner::parse_object / parse_array
    -> JsonScanner::call_scan_once
      -> JsonScanner::parse_object / parse_array

Every descent funnels through call_scan_once, so wrapping its body
with vm.with_recursion covers both '{' and '[' paths (and their
mixed nesting) with a single guard.

Before:
  ./rustpython -c "import json; json.loads('[' * 50000 + ']' * 50000)"
    -> SIGSEGV (exit 139)

After:
  -> RecursionError: maximum recursion depth exceeded while
     decoding a JSON object from a string

Verified:
  - extra_tests/snippets/stdlib_json.py: all assertions pass
    (includes 3 new regression cases: array, object, alternating
    nesting at depth 100000)
  - cargo run -- -m test test_json: 214 passed, 0 regressed
    (9 skipped, 13 expected failures, all pre-existing)
  - depth 500000 no longer crashes (RecursionError)
  - shallow parsing unchanged

* Enable test_highly_nested_objects_decoding

Per @ShaharNaveh's review on #7632: this test was previously marked
`@unittest.skip("TODO: RUSTPYTHON; crashes")` because json.loads
would SIGSEGV on the 500_000-deep input. The recursion-guard added
in this PR makes it raise RecursionError like CPython, so the skip
decorator can be removed.

  $ cargo run -- -m unittest \
        test.test_json.test_recursion.TestCRecursion.test_highly_nested_objects_decoding \
        test.test_json.test_recursion.TestPyRecursion.test_highly_nested_objects_decoding
  ...
  Ran 2 tests in 0.825s
  OK

  $ cargo run -- -m test test_json
  Ran 214 tests (7 skipped, 13 expected failures) — all pass.
2026-04-20 21:52:17 +09:00

123 lines
4.2 KiB
Python
Vendored

from test import support
from test.test_json import PyTest, CTest
import unittest # XXX: RUSTPYTHON; importing to be able to skip tests
class JSONTestObject:
pass
class TestRecursion:
def test_listrecursion(self):
x = []
x.append(x)
try:
self.dumps(x)
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"])
else:
self.fail("didn't raise ValueError on list recursion")
x = []
y = [x]
x.append(y)
try:
self.dumps(x)
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
else:
self.fail("didn't raise ValueError on alternating list recursion")
y = []
x = [y, y]
# ensure that the marker is cleared
self.dumps(x)
def test_dictrecursion(self):
x = {}
x["test"] = x
try:
self.dumps(x)
except ValueError as exc:
self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
else:
self.fail("didn't raise ValueError on dict recursion")
x = {}
y = {"a": x, "b": x}
# ensure that the marker is cleared
self.dumps(x)
def test_defaultrecursion(self):
class RecursiveJSONEncoder(self.json.JSONEncoder):
recurse = False
def default(self, o):
if o is JSONTestObject:
if self.recurse:
return [JSONTestObject]
else:
return 'JSONTestObject'
return self.json.JSONEncoder.default(o)
enc = RecursiveJSONEncoder()
self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"')
enc.recurse = True
try:
enc.encode(JSONTestObject)
except ValueError as exc:
self.assertEqual(exc.__notes__,
["when serializing list item 0",
"when serializing type object"])
else:
self.fail("didn't raise ValueError on default recursion")
@support.skip_if_unlimited_stack_size
@support.skip_emscripten_stack_overflow()
@support.skip_wasi_stack_overflow()
def test_highly_nested_objects_decoding(self):
very_deep = 500_000
# test that loading highly-nested objects doesn't segfault when C
# accelerations are used. See #12017
with self.assertRaises(RecursionError):
with support.infinite_recursion():
self.loads('{"a":' * very_deep + '1' + '}' * very_deep)
with self.assertRaises(RecursionError):
with support.infinite_recursion():
self.loads('{"a":' * very_deep + '[1]' + '}' * very_deep)
with self.assertRaises(RecursionError):
with support.infinite_recursion():
self.loads('[' * very_deep + '1' + ']' * very_deep)
@support.skip_if_unlimited_stack_size
@support.skip_wasi_stack_overflow()
@support.skip_emscripten_stack_overflow()
@support.requires_resource('cpu')
def test_highly_nested_objects_encoding(self):
# See #12051
l, d = [], {}
for x in range(500_000):
l, d = [l], {'k':d}
with self.assertRaises(RecursionError):
with support.infinite_recursion(5000):
self.dumps(l)
with self.assertRaises(RecursionError):
with support.infinite_recursion(5000):
self.dumps(d)
@support.skip_if_unlimited_stack_size
@support.skip_emscripten_stack_overflow()
@support.skip_wasi_stack_overflow()
def test_endless_recursion(self):
# See #12051
class EndlessJSONEncoder(self.json.JSONEncoder):
def default(self, o):
"""If check_circular is False, this will keep adding another list."""
return [o]
with self.assertRaises(RecursionError):
with support.infinite_recursion(1000):
EndlessJSONEncoder(check_circular=False).encode(5j)
class TestPyRecursion(TestRecursion, PyTest): pass
class TestCRecursion(TestRecursion, CTest): pass