Align codegen metadata with CPython (#7952)

This commit is contained in:
Jeong, YunWon
2026-05-23 20:16:03 +09:00
committed by GitHub
parent f3b83efcee
commit d3272e752b
19 changed files with 5044 additions and 719 deletions

View File

@@ -5,6 +5,7 @@ argtypes
asdl
asname
atopen
atext
attro
augassign
badcert
@@ -104,6 +105,7 @@ inlinedepth
inplace
inpos
isbytecode
ishidden
ismine
ISPOINTER
isoctal
@@ -113,6 +115,7 @@ keeped
kwnames
kwonlyarg
kwonlyargs
kwonlydefaults
lasti
libffi
linearise
@@ -164,6 +167,7 @@ patma
peepholer
phcount
platstdlib
ploc
posonlyarg
posonlyargs
prec
@@ -209,6 +213,7 @@ staticbase
stginfo
storefast
stringlib
stringized
structseq
subkwargs
subparams

View File

@@ -1249,7 +1249,6 @@ class TestSpecifics(unittest.TestCase):
last_line = line
return res
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lineno_attribute(self):
def load_attr():
return (
@@ -1294,7 +1293,6 @@ class TestSpecifics(unittest.TestCase):
code_lines = self.get_code_lines(func.__code__)
self.assertEqual(lines, code_lines)
@unittest.expectedFailure # TODO: RUSTPYTHON; + [0]
def test_line_number_genexp(self):
def return_genexp():

View File

@@ -1215,7 +1215,6 @@ class DisTests(DisTestBase):
def test_disassemble_with(self):
self.do_disassembly_test(_with, dis_with)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_disassemble_asyncwith(self):
self.do_disassembly_test(_asyncwith, dis_asyncwith)
@@ -1991,26 +1990,22 @@ class InstructionTests(InstructionTestCase):
actual = dis.get_instructions(simple, first_line=None)
self.assertInstructionsEqual(list(actual), expected_opinfo_simple)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_outer(self):
actual = dis.get_instructions(outer, first_line=expected_outer_line)
self.assertInstructionsEqual(list(actual), expected_opinfo_outer)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_nested(self):
with captured_stdout():
f = outer()
actual = dis.get_instructions(f, first_line=expected_f_line)
self.assertInstructionsEqual(list(actual), expected_opinfo_f)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_doubly_nested(self):
with captured_stdout():
inner = outer()()
actual = dis.get_instructions(inner, first_line=expected_inner_line)
self.assertInstructionsEqual(list(actual), expected_opinfo_inner)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_jumpy(self):
actual = dis.get_instructions(jumpy, first_line=expected_jumpy_line)
self.assertInstructionsEqual(list(actual), expected_opinfo_jumpy)
@@ -2314,7 +2309,6 @@ class BytecodeTests(InstructionTestCase, DisTestBase):
via_generator = list(dis.get_instructions(obj))
self.assertInstructionsEqual(via_object, via_generator)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_explicit_first_line(self):
actual = dis.Bytecode(outer, first_line=expected_outer_line)
self.assertInstructionsEqual(list(actual), expected_opinfo_outer)

View File

@@ -2245,7 +2245,6 @@ class AssertionErrorTests(unittest.TestCase):
result = run_script(source)
self.assertEqual(result[-3:], expected)
@unittest.expectedFailure # TODO: RUSTPYTHON
@force_not_colorized
def test_multiline_not_highlighted(self):
cases = [

View File

@@ -237,7 +237,6 @@ class TestPredicates(IsTestBase):
self.assertFalse(inspect.ispackage(FakePackage()))
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true
def test_iscoroutine(self):
async_gen_coro = async_generator_function_example(1)
gen_coro = gen_coroutine_function_example(1)

View File

@@ -132,7 +132,6 @@ class PyCompileTestsBase:
finally:
os.chmod(self.directory, mode.st_mode)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_bad_coding(self):
bad_coding = os.path.join(os.path.dirname(__file__),
'tokenizedata',
@@ -198,7 +197,6 @@ class PyCompileTestsBase:
fp.read(), 'test', {})
self.assertEqual(flags, 0b1)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_quiet(self):
bad_coding = os.path.join(os.path.dirname(__file__),
'tokenizedata',

View File

@@ -173,7 +173,6 @@ class StrtodTests(unittest.TestCase):
s = '{}e{}'.format(digits, exponent)
self.check_strtod(s)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_boundaries(self):
# boundaries expressed as triples (n, e, u), where
# n*10**e is an approximation to the boundary value and
@@ -194,7 +193,6 @@ class StrtodTests(unittest.TestCase):
u *= 10
e -= 1
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_underflow_boundary(self):
# test values close to 2**-1075, the underflow boundary; similar
# to boundary_tests, except that the random error doesn't scale
@@ -206,7 +204,6 @@ class StrtodTests(unittest.TestCase):
s = '{}e{}'.format(digits, exponent)
self.check_strtod(s)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_bigcomp(self):
for ndigs in 5, 10, 14, 15, 16, 17, 18, 19, 20, 40, 41, 50:
dig10 = 10**ndigs
@@ -284,7 +281,6 @@ class StrtodTests(unittest.TestCase):
self.assertEqual(float(negative_exp(20000)), 1.0)
self.assertEqual(float(negative_exp(30000)), 1.0)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_particular(self):
# inputs that produced crashes or incorrectly rounded results with
# previous versions of dtoa.c, for various reasons

View File

@@ -1488,8 +1488,6 @@ class JumpTestCase(unittest.TestCase):
output.append(11)
output.append(12)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@jump_test(5, 11, [2, 4], (ValueError, 'after'))
def test_no_jump_over_return_try_finally_in_finally_block(output):
try:

View File

@@ -296,7 +296,6 @@ class TestAsyncCase(unittest.TestCase):
test.doCleanups()
self.assertEqual(events, ['asyncSetUp', 'test', 'asyncTearDown', 'cleanup2', 'cleanup1'])
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_deprecation_of_return_val_from_test(self):
# Issue 41322 - deprecate return of value that is not None from a test
class Nothing:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -32,6 +32,9 @@ pub struct SymbolTable {
// Return True if the block is a nested class or function
pub is_nested: bool,
/// Whether this function-like scope was created directly in a class block.
pub is_method: bool,
/// A set of symbols present on this scope level.
pub symbols: IndexMap<String, Symbol>,
@@ -90,6 +93,7 @@ impl SymbolTable {
typ,
line_number,
is_nested,
is_method: false,
symbols: IndexMap::default(),
sub_tables: vec![],
next_sub_table: 0,
@@ -1103,6 +1107,7 @@ impl SymbolTableBuilder {
| CompilerScope::Lambda
| CompilerScope::Comprehension
| CompilerScope::Annotation
| CompilerScope::TypeParams
)
}
@@ -1118,11 +1123,17 @@ impl SymbolTableBuilder {
}
fn enter_scope(&mut self, name: &str, typ: CompilerScope, line_number: u32) {
let is_nested = self.tables.last().is_some_and(|table| {
table.is_nested
|| matches!(
table.typ,
CompilerScope::Function | CompilerScope::AsyncFunction
let parent = self.tables.last();
let is_nested =
parent.is_some_and(|table| table.is_nested || Self::is_function_like_scope(table.typ));
let is_method = parent.is_some_and(|table| {
table.typ == CompilerScope::Class
&& matches!(
typ,
CompilerScope::Function
| CompilerScope::AsyncFunction
| CompilerScope::Lambda
| CompilerScope::Comprehension
)
});
// Inherit mangled_names from parent for non-class scopes
@@ -1132,6 +1143,7 @@ impl SymbolTableBuilder {
.and_then(|t| t.mangled_names.clone())
.filter(|_| typ != CompilerScope::Class);
let mut table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested);
table.is_method = is_method;
table.future_annotations = self.future_annotations;
table.mangled_names = inherited_mangled_names;
self.tables.push(table);
@@ -1145,6 +1157,8 @@ impl SymbolTableBuilder {
name: &str,
line_number: u32,
for_class: bool,
has_defaults: bool,
has_kwdefaults: bool,
) -> SymbolTableResult {
// Check if we're in a class scope
let in_class = self
@@ -1174,6 +1188,12 @@ impl SymbolTableBuilder {
if for_class {
self.register_name(".generic_base", SymbolUsage::Assigned, TextRange::default())?;
}
if has_defaults {
self.register_name(".defaults", SymbolUsage::Parameter, TextRange::default())?;
}
if has_kwdefaults {
self.register_name(".kwdefaults", SymbolUsage::Parameter, TextRange::default())?;
}
Ok(())
}
@@ -1195,6 +1215,7 @@ impl SymbolTableBuilder {
let can_see_class_scope =
current.typ == CompilerScope::Class || current.can_see_class_scope;
let has_conditional = current.has_conditional_annotations;
let is_nested = current.is_nested || Self::is_function_like_scope(current.typ);
// Create annotation block if not exists
if current.annotation_block.is_none() {
@@ -1202,7 +1223,7 @@ impl SymbolTableBuilder {
"__annotate__".to_owned(),
CompilerScope::Annotation,
line_number,
true, // is_nested
is_nested,
);
// Annotation scope in class can see class scope
annotation_table.can_see_class_scope = can_see_class_scope;
@@ -1488,6 +1509,8 @@ impl SymbolTableBuilder {
&format!("<generic parameters of {}>", name.as_str()),
self.line_index_start(type_params.range),
false,
true,
Self::has_kwonlydefaults(parameters),
)?;
self.scan_type_params(type_params)?;
}
@@ -1536,6 +1559,8 @@ impl SymbolTableBuilder {
&format!("<generic parameters of {}>", name.as_str()),
self.line_index_start(type_params.range),
true, // for_class: enable selective mangling
false,
false,
)?;
// Set class_name for mangling in type param scope
self.class_name = Some(name.to_string());
@@ -1847,6 +1872,8 @@ impl SymbolTableBuilder {
&format!("<generic parameters of {alias_name}>"),
self.line_index_start(type_params.range),
false,
false,
false,
)?;
self.scan_type_params(type_params)?;
}
@@ -2583,6 +2610,13 @@ impl SymbolTableBuilder {
Ok(())
}
fn has_kwonlydefaults(parameters: &ast::Parameters) -> bool {
parameters
.kwonlyargs
.iter()
.any(|arg| arg.default.is_some())
}
fn enter_scope_with_parameters(
&mut self,
name: &str,
@@ -2704,17 +2738,6 @@ impl SymbolTableBuilder {
Ok(())
}
fn add_varname_to_scope(&mut self, table_idx: usize, name: &str) {
let varnames = if table_idx + 1 == self.tables.len() {
&mut self.current_varnames
} else {
&mut self.varnames_stack[table_idx + 1]
};
if !varnames.iter().any(|existing| existing == name) {
varnames.push(name.to_owned());
}
}
// Mirrors CPython symtable_extend_namedexpr_scope(): assignment expressions
// inside comprehensions bind in the nearest function/module-like scope, not
// in the synthetic comprehension scope itself.
@@ -2752,9 +2775,6 @@ impl SymbolTableBuilder {
match table_type {
CompilerScope::Function | CompilerScope::AsyncFunction | CompilerScope::Lambda => {
let current_comp_inlined = self.tables.last().is_some_and(|table| {
table.typ == CompilerScope::Comprehension && table.comp_inlined
});
let parent_is_global = self.tables[table_idx]
.symbols
.get(mangled.as_str())
@@ -2777,9 +2797,6 @@ impl SymbolTableBuilder {
.entry(mangled.clone())
.or_insert_with(|| Symbol::new(mangled.as_str()));
symbol.flags.insert(SymbolFlags::ASSIGNED);
if !parent_is_global && current_comp_inlined {
self.add_varname_to_scope(table_idx, mangled.as_str());
}
return Ok(());
}
CompilerScope::Module => {

View File

@@ -460,9 +460,12 @@ bitflags! {
const GENERATOR = 0x0020;
const COROUTINE = 0x0080;
const ITERABLE_COROUTINE = 0x0100;
const ASYNC_GENERATOR = 0x0200;
const FUTURE_ANNOTATIONS = 0x1000000;
/// If a code object represents a function and has a docstring,
/// this bit is set and the first item in co_consts is the docstring.
const HAS_DOCSTRING = 0x4000000;
const METHOD = 0x8000000;
}
}
@@ -906,8 +909,6 @@ impl PartialEq for ConstantData {
match (self, other) {
(Integer { value: a }, Integer { value: b }) => a == b,
// we want to compare floats *by actual value* - if we have the *exact same* float
// already in a constant cache, we want to use that
(Float { value: a }, Float { value: b }) => a.to_bits() == b.to_bits(),
(Complex { value: a }, Complex { value: b }) => {
a.re.to_bits() == b.re.to_bits() && a.im.to_bits() == b.im.to_bits()

View File

@@ -3,7 +3,7 @@ use alloc::borrow::ToOwned;
use alloc::format;
use alloc::string::{String, ToString};
use core::f64;
use num_traits::{Float, Zero};
use num_traits::Zero;
pub fn parse_str(literal: &str) -> Option<f64> {
parse_inner(literal.trim().as_bytes())
@@ -209,6 +209,111 @@ pub fn format_general(
}
}
fn prefer_cpython_tie_repr(s: String, value: f64) -> String {
let Some(exponent_pos) = s.find('e') else {
return s;
};
let Some(digit_pos) = s[..exponent_pos].bytes().rposition(|b| b.is_ascii_digit()) else {
return s;
};
let digit = s.as_bytes()[digit_pos];
if digit == b'0' {
return s;
}
let decremented = digit - 1;
if !(decremented - b'0').is_multiple_of(2) {
return s;
}
let mut candidate = s.clone();
candidate.replace_range(
digit_pos..digit_pos + 1,
core::str::from_utf8(&[decremented]).unwrap(),
);
if parse_str(&candidate).is_none_or(|parsed| parsed.to_bits() != value.to_bits()) {
return s;
}
let Some(current_distance) = decimal_distance_to_f64(&s, value) else {
return s;
};
let Some(candidate_distance) = decimal_distance_to_f64(&candidate, value) else {
return s;
};
if candidate_distance <= current_distance {
candidate
} else {
s
}
}
fn checked_pow_u128(base: u128, exp: u32) -> Option<u128> {
let mut result = 1u128;
for _ in 0..exp {
result = result.checked_mul(base)?;
}
Some(result)
}
fn parse_decimal_rational(s: &str) -> Option<(u128, u32)> {
let exponent_pos = s.find('e')?;
let exponent = s[exponent_pos + 1..].parse::<i32>().ok()?;
let significand = s[..exponent_pos]
.strip_prefix('-')
.unwrap_or(&s[..exponent_pos]);
let dot_pos = significand.find('.');
let frac_digits = dot_pos
.map(|pos| significand.len().saturating_sub(pos + 1))
.unwrap_or(0);
let mut digits = String::with_capacity(significand.len());
for ch in significand.chars() {
if ch != '.' {
digits.push(ch);
}
}
let mut int = digits.parse::<u128>().ok()?;
let mut scale = i32::try_from(frac_digits).ok()? - exponent;
if scale < 0 {
int = int.checked_mul(checked_pow_u128(10, (-scale) as u32)?)?;
scale = 0;
}
Some((int, scale as u32))
}
fn f64_mantissa_exponent(value: f64) -> Option<(u128, i32)> {
let bits = value.abs().to_bits();
let exponent = ((bits >> 52) & 0x7ff) as i32;
let fraction = bits & ((1u64 << 52) - 1);
if exponent == 0 {
Some((u128::from(fraction), 1 - 1023 - 52))
} else if exponent < 0x7ff {
Some((u128::from((1u64 << 52) | fraction), exponent - 1023 - 52))
} else {
None
}
}
fn decimal_distance_to_f64(s: &str, value: f64) -> Option<u128> {
let (decimal_int, decimal_scale) = parse_decimal_rational(s)?;
let (mantissa, binary_exponent) = f64_mantissa_exponent(value)?;
if binary_exponent >= 0 || decimal_scale > 38 {
return None;
}
let binary_scale = u32::try_from(-binary_exponent).ok()?;
let common_twos = decimal_scale.max(binary_scale);
let decimal_scaled =
decimal_int.checked_mul(checked_pow_u128(2, common_twos - decimal_scale)?)?;
let five_power = checked_pow_u128(5, decimal_scale)?;
let binary_scaled = mantissa
.checked_mul(checked_pow_u128(2, common_twos - binary_scale)?)?
.checked_mul(five_power)?;
Some(decimal_scaled.abs_diff(binary_scaled))
}
// TODO: rewrite using format_general
pub fn to_string(value: f64) -> String {
let lit = format!("{value:e}");
@@ -223,7 +328,7 @@ pub fn to_string(value: f64) -> String {
value.to_string()
}
} else {
format!("{significand}e{exponent:+#03}")
prefer_cpython_tie_repr(format!("{significand}e{exponent:+#03}"), value)
}
} else {
let mut s = value.to_string();
@@ -232,6 +337,22 @@ pub fn to_string(value: f64) -> String {
}
}
#[cfg(test)]
mod tests {
use super::to_string;
#[test]
fn repr_uses_cpython_tie_digit_for_power_of_two() {
assert_eq!(to_string(2.0f64.powi(-25)), "2.9802322387695312e-08");
assert_eq!(to_string((-2.0f64).powi(-25)), "-2.9802322387695312e-08");
assert_eq!(to_string(2.0f64.powi(-26)), "1.4901161193847656e-08");
assert_eq!(
to_string(2.0f64.powi(-14) - 2.0f64.powi(-25)),
"6.1005353927612305e-05"
);
}
}
pub fn from_hex(s: &str) -> Option<f64> {
if let Ok(f) = hexf_parse::parse_hexf64(s, false) {
return Some(f);
@@ -281,22 +402,23 @@ pub fn from_hex(s: &str) -> Option<f64> {
}
pub fn to_hex(value: f64) -> String {
let (mantissa, exponent, sign) = value.integer_decode();
let sign_fmt = if sign < 0 { "-" } else { "" };
let bits = value.to_bits();
let sign_fmt = if bits >> 63 != 0 { "-" } else { "" };
match value {
value if value.is_zero() => format!("{sign_fmt}0x0.0p+0"),
value if value.is_infinite() => format!("{sign_fmt}inf"),
value if value.is_nan() => "nan".to_owned(),
_ => {
const BITS: i16 = 52;
const FRACT_MASK: u64 = 0xf_ffff_ffff_ffff;
format!(
"{}{:#x}.{:013x}p{:+}",
sign_fmt,
mantissa >> BITS,
mantissa & FRACT_MASK,
exponent + BITS
)
const FRACT_MASK: u64 = (1u64 << 52) - 1;
const EXP_MASK: u64 = 0x7ff;
let exponent = (bits >> 52) & EXP_MASK;
let fraction = bits & FRACT_MASK;
if exponent == 0 {
format!("{sign_fmt}0x0.{fraction:013x}p-1022")
} else {
let exponent = i32::try_from(exponent).unwrap() - 1023;
format!("{sign_fmt}0x1.{fraction:013x}p{exponent:+}")
}
}
}
}
@@ -304,6 +426,10 @@ pub fn to_hex(value: f64) -> String {
#[test]
fn test_to_hex() {
use rand::Rng;
assert_eq!(to_hex(f64::from_bits(1)), "0x0.0000000000001p-1022");
assert_eq!(to_hex(f64::from_bits(2)), "0x0.0000000000002p-1022");
assert_eq!(to_hex(-f64::from_bits(1)), "-0x0.0000000000001p-1022");
assert_eq!(to_hex(f64::MIN_POSITIVE), "0x1.0000000000000p-1022");
for _ in 0..20000 {
let bytes = rand::rng().random::<u64>();
let f = f64::from_bits(bytes);

View File

@@ -13,9 +13,9 @@ expression: "dis(r#\"\nasync def test():\n for stop_exc in (StopIteration('sp
Disassembly of <code object test at 0xdeadbeef file "<?>", line 1>:
1 RETURN_GENERATOR
POP_TOP
RESUME 0
L1: RESUME 0
2 L1: LOAD_GLOBAL 1 (StopIteration + NULL)
2 LOAD_GLOBAL 1 (StopIteration + NULL)
LOAD_CONST 0 ('spam')
CALL 1
LOAD_GLOBAL 3 (StopAsyncIteration + NULL)
@@ -90,10 +90,12 @@ Disassembly of <code object test at 0xdeadbeef file "<?>", line 1>:
POP_TOP
POP_TOP
JUMP_FORWARD 3 (to L25)
L24: COPY 3
-- L24: COPY 3
POP_EXCEPT
RERAISE 1
L25: NOP
5 L25: NOP
10 L26: LOAD_GLOBAL 4 (self)
LOAD_ATTR 13 (fail + NULL|self)
@@ -153,11 +155,11 @@ Disassembly of <code object test at 0xdeadbeef file "<?>", line 1>:
POP_TOP
POP_TOP
JUMP_BACKWARD 205 (to L2)
L39: COPY 3
-- L39: COPY 3
POP_EXCEPT
RERAISE 1
-- L40: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR)
L40: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR)
RERAISE 1
ExceptionTable:
L1 to L3 -> L40 [0] lasti

View File

@@ -564,7 +564,8 @@ impl Py<PyFunction> {
let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR);
let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE);
let use_datastack = !(is_gen || is_coro);
let is_async_gen = code.flags.contains(bytecode::CodeFlags::ASYNC_GENERATOR);
let use_datastack = !(is_gen || is_coro || is_async_gen);
// Construct frame:
let frame = Frame::new(
@@ -579,35 +580,30 @@ impl Py<PyFunction> {
.into_ref(&vm.ctx);
self.fill_locals_from_args(&frame, func_args, vm)?;
match (is_gen, is_coro) {
(true, false) => {
let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
}
(false, true) => {
let obj = PyCoroutine::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
}
(true, true) => {
let obj = PyAsyncGen::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
}
(false, false) => {
let result = vm.run_frame(frame.clone());
// Release data stack memory after frame execution completes.
unsafe {
if let Some(base) = frame.materialize_localsplus() {
vm.datastack_pop(base);
}
if is_async_gen {
let obj = PyAsyncGen::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
} else if is_gen {
let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
} else if is_coro {
let obj = PyCoroutine::new(frame.clone(), self.__name__(), self.__qualname__())
.into_pyobject(vm);
frame.set_generator(&obj);
Ok(obj)
} else {
let result = vm.run_frame(frame.clone());
// Release data stack memory after frame execution completes.
unsafe {
if let Some(base) = frame.materialize_localsplus() {
vm.datastack_pop(base);
}
result
}
result
}
}
@@ -689,11 +685,11 @@ impl Py<PyFunction> {
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
);
debug_assert_eq!(code.kwonlyarg_count, 0);
debug_assert!(
!code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
);
debug_assert!(!code.flags.intersects(
bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
| bytecode::CodeFlags::ASYNC_GENERATOR,
));
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
None
@@ -741,10 +737,11 @@ impl Py<PyFunction> {
// Generator/coroutine code objects are SIMPLE_FUNCTION in call
// specialization classification, but their call path must still
// go through invoke() to produce generator/coroutine objects.
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
if code.flags.intersects(
bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
| bytecode::CodeFlags::ASYNC_GENERATOR,
) {
return self.invoke(FuncArgs::from(args), vm);
}
let frame = self.prepare_exact_args_frame(args, vm);
@@ -760,10 +757,11 @@ impl Py<PyFunction> {
}
pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py<PyCode>) -> Option<usize> {
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
if code.flags.intersects(
bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
| bytecode::CodeFlags::ASYNC_GENERATOR,
) {
return None;
}
let nlocalsplus = code.localspluskinds.len();
@@ -1468,9 +1466,11 @@ pub(crate) fn vectorcall_function(
&& !code.flags.contains(bytecode::CodeFlags::VARARGS)
&& !code.flags.contains(bytecode::CodeFlags::VARKEYWORDS)
&& code.kwonlyarg_count == 0
&& !code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE);
&& !code.flags.intersects(
bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
| bytecode::CodeFlags::ASYNC_GENERATOR,
);
if is_simple && nargs == code.arg_count as usize {
// FAST PATH: simple positional-only call, exact arg count.

View File

@@ -710,10 +710,11 @@ impl Frame {
// For generators/coroutines, initialize prev_line to the def line
// so that preamble instructions (RETURN_GENERATOR, POP_TOP) don't
// fire spurious LINE events.
let prev_line = if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
let prev_line = if code.flags.intersects(
bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
| bytecode::CodeFlags::ASYNC_GENERATOR,
) {
code.first_line_number.map_or(0, |line| line.get() as u32)
} else {
0
@@ -9523,9 +9524,7 @@ impl ExecutingFrame<'_> {
// Returns the exception object; RERAISE will re-raise it
if arg.fast_isinstance(vm.ctx.exceptions.stop_iteration) {
let flags = &self.code.flags;
let msg = if flags
.contains(bytecode::CodeFlags::COROUTINE | bytecode::CodeFlags::GENERATOR)
{
let msg = if flags.contains(bytecode::CodeFlags::ASYNC_GENERATOR) {
"async generator raised StopIteration"
} else if flags.contains(bytecode::CodeFlags::COROUTINE) {
"coroutine raised StopIteration"

View File

@@ -160,7 +160,11 @@ mod builtins {
.map(|&b| b as char)
.collect();
if name.is_empty() { None } else { Some(name) }
if name.is_empty() {
None
} else {
Some(normalize_source_encoding(&name))
}
}
// Split into lines (first two only)
@@ -186,15 +190,39 @@ mod builtins {
lines.next().and_then(find_encoding_in_line)
}
/// Match CPython's Parser/tokenizer/helpers.c:get_normal_name().
#[cfg(feature = "parser")]
fn normalize_source_encoding(name: &str) -> String {
let mut normalized = String::with_capacity(name.len().min(12));
for ch in name.chars().take(12) {
if ch == '_' {
normalized.push('-');
} else {
normalized.push(ch.to_ascii_lowercase());
}
}
if normalized == "utf-8" || normalized.starts_with("utf-8-") {
"utf-8".to_owned()
} else if normalized == "latin-1"
|| normalized == "iso-8859-1"
|| normalized == "iso-latin-1"
|| normalized.starts_with("latin-1-")
|| normalized.starts_with("iso-8859-1-")
|| normalized.starts_with("iso-latin-1-")
{
"iso-8859-1".to_owned()
} else {
name.to_owned()
}
}
/// Decode source bytes to a string, handling PEP 263 encoding declarations
/// and BOM. Raises SyntaxError for invalid UTF-8 without an encoding
/// declaration.
/// Check if an encoding name is a UTF-8 variant after normalization.
/// Matches: utf-8, utf_8, utf8, UTF-8, etc.
#[cfg(feature = "parser")]
fn is_utf8_encoding(name: &str) -> bool {
let normalized: String = name.chars().filter(|&c| c != '-' && c != '_').collect();
normalized.eq_ignore_ascii_case("utf8")
name == "utf-8"
}
#[cfg(feature = "parser")]
@@ -206,9 +234,10 @@ mod builtins {
// Validate BOM + encoding combination
if has_bom && !is_utf8 {
let enc = encoding.as_deref().unwrap_or("utf-8");
return Err(vm.new_exception_msg(
vm.ctx.exceptions.syntax_error.to_owned(),
format!("encoding problem for '{filename}': utf-8").into(),
format!("encoding problem: {enc} with BOM").into(),
));
}

View File

@@ -18,7 +18,6 @@ import dis
import json
import os
import re
import struct
import sys
import types
@@ -109,22 +108,6 @@ def _normalize_argrepr(argrepr):
return argrepr
def _normalize_const_repr(value):
"""Return a cross-interpreter representation for LOAD_CONST values."""
if isinstance(value, float):
return f"float:{struct.pack('>d', value).hex()}"
if isinstance(value, tuple):
if not value:
return "()"
parts = [_normalize_const_repr(item) for item in value]
trailing = "," if len(parts) == 1 else ""
return f"({', '.join(parts)}{trailing})"
if isinstance(value, frozenset):
parts = sorted(_normalize_const_repr(item) for item in value)
return f"frozenset({{{', '.join(parts)}}})"
return _normalize_argrepr(repr(value))
_IS_RUSTPYTHON = (
hasattr(sys, "implementation") and sys.implementation.name == "rustpython"
)
@@ -168,7 +151,7 @@ def _resolve_arg_fallback(code, opname, arg):
return _resolve_localsplus_name(code, arg)
elif opname == "LOAD_CONST":
if 0 <= arg < len(code.co_consts):
return _normalize_const_repr(code.co_consts[arg])
return _normalize_argrepr(repr(code.co_consts[arg]))
elif opname in (
"LOAD_DEREF",
"STORE_DEREF",
@@ -311,10 +294,7 @@ def _extract_instructions(code):
elif inst.arg is not None and inst.argrepr:
# If argrepr is just a number, try to resolve it via fallback
# (RustPython may return raw index instead of variable name)
if opname == "LOAD_CONST" and 0 <= inst.arg < len(code.co_consts):
argrepr = _normalize_const_repr(code.co_consts[inst.arg])
else:
argrepr = inst.argrepr
argrepr = inst.argrepr
if argrepr.isdigit() or (argrepr.startswith("-") and argrepr[1:].isdigit()):
resolved = _resolve_arg_fallback(code, opname, inst.arg)
if isinstance(resolved, str) and not resolved.isdigit():