mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Tighten CPython parity for str format spec, %-format, and str() constructor (#7769)
Five related CPython parity gaps in `str` formatting and construction:
1. **`str(bytes, errors=...)` triggers decode mode.** Previously, only
`encoding=` triggered decode; passing only `errors=` fell back to
`repr()`. CPython's behavior: presence of `encoding` OR `errors`
triggers decode mode (default UTF-8 when only `errors` is given).
2. **`'{...}'.format() IndexError wording.** Generic Rust "tuple index
out of range" replaced with CPython's "Replacement index N out of
range for positional args tuple".
3. **`{0:3.2s}.format('abc')` → 'ab '.** String format spec applied
precision after width padding; CPython truncates BEFORE padding.
Reorder the operations.
4. **`%x` / `%o` / `%X` / `%c` accept `__index__` objects.** Previously
only `PyInt` downcast was attempted. Mirror CPython's
PyNumber_Index dispatch via `try_index_opt`.
5. **`%d` / `%u` / `%i` error wording.** "a number is required" →
"a real number is required" (matches CPython).
Also adds `not <type>` suffix to `%c` error messages so the type is
visible in TypeError text (matches CPython structure even without
fully-qualified names).
Verified byte-identical with CPython 3.14.4 across 25+ probes covering
the format/spec/constructor combinations. Unmasks
`test_str.test_constructor_keyword_args` and
`test_str.test_constructor_defaults`. test_str/test_bytes/test_format/
test_codecs/test_io/test_unicode_identifiers — 1,429 tests pass, 0
regressions. All 188 `extra_tests/snippets/*.py` pass under the CI
feature set.
`test_str.test_format` and `test_str.test_formatting` markers retained:
`test_format` still trips on `'{0:08s}'.format('result')` (numeric
zero-pad treated as fill+left-align by CPython for str type — separate
format-spec parser concern). `test_formatting` still trips on
`%c` error message expecting fully qualified `module.qualname` (RP
returns bare class name — separate broader concern).
This commit is contained in:
1
Lib/test/test_descr.py
vendored
1
Lib/test/test_descr.py
vendored
@@ -3120,7 +3120,6 @@ class ClassPropertiesAndMethods(unittest.TestCase):
|
||||
## pass
|
||||
## os_helper.unlink(os_helper.TESTFN)
|
||||
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON
|
||||
def test_keywords(self):
|
||||
# Testing keyword args to basic type constructors ...
|
||||
with self.assertRaisesRegex(TypeError, 'keyword argument'):
|
||||
|
||||
6
Lib/test/test_str.py
vendored
6
Lib/test/test_str.py
vendored
@@ -1074,7 +1074,7 @@ class StrTest(string_tests.StringLikeTest,
|
||||
'\U00100000'.ljust(3, '\U00010000')
|
||||
'\U00100000'.rjust(3, '\U00010000')
|
||||
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; ? +
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; '{0:08s}'.format('result') misalign — '0' fill treated as numeric zero-pad for str type
|
||||
def test_format(self):
|
||||
self.assertEqual(''.format(), '')
|
||||
self.assertEqual('a'.format(), 'a')
|
||||
@@ -1503,7 +1503,7 @@ class StrTest(string_tests.StringLikeTest,
|
||||
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
|
||||
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
|
||||
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: %x format: an integer is required, not PseudoInt
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; %c error wording uses bare class name; CPython uses fully qualified module.qualname (e.g. test.test_str.PseudoFloat)
|
||||
def test_formatting(self):
|
||||
string_tests.StringLikeTest.test_formatting(self)
|
||||
# Testing Unicode formatting strings...
|
||||
@@ -1752,7 +1752,6 @@ class StrTest(string_tests.StringLikeTest,
|
||||
'character buffers are decoded to unicode'
|
||||
)
|
||||
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; Pass various keyword argument combinations to the constructor.
|
||||
def test_constructor_keyword_args(self):
|
||||
"""Pass various keyword argument combinations to the constructor."""
|
||||
# The object argument can be passed as a keyword.
|
||||
@@ -1762,7 +1761,6 @@ class StrTest(string_tests.StringLikeTest,
|
||||
self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
|
||||
self.assertEqual(str(object=b'foo', errors='strict'), 'foo')
|
||||
|
||||
@unittest.expectedFailure # TODO: RUSTPYTHON; Check the constructor argument defaults.
|
||||
def test_constructor_defaults(self):
|
||||
"""Check the constructor argument defaults."""
|
||||
# The object argument defaults to '' or b''.
|
||||
|
||||
@@ -876,14 +876,15 @@ impl FormatSpec {
|
||||
{
|
||||
self.validate_format(FormatType::String)?;
|
||||
match self.format_type {
|
||||
Some(FormatType::String) | None => self
|
||||
.format_sign_and_align(s, "", FormatAlign::Left)
|
||||
.map(|mut value| {
|
||||
if let Some(precision) = self.precision {
|
||||
value.truncate(precision);
|
||||
}
|
||||
value
|
||||
}),
|
||||
Some(FormatType::String) | None => {
|
||||
// CPython parity: precision truncates BEFORE width pads.
|
||||
// `'{:3.2s}'.format('abc')` -> 'ab ' (truncate to 'ab', pad to 3).
|
||||
let truncated: String = match self.precision {
|
||||
Some(p) => s.deref().chars().take(p).collect(),
|
||||
None => s.deref().to_owned(),
|
||||
};
|
||||
self.format_sign_and_align(&truncated, "", FormatAlign::Left)
|
||||
}
|
||||
_ => {
|
||||
let ch = char::from(self.format_type.as_ref().unwrap());
|
||||
Err(FormatSpecError::UnknownFormatCode(ch, "str"))
|
||||
@@ -1081,6 +1082,12 @@ impl CharLen for AsciiStr<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
impl CharLen for String {
|
||||
fn char_len(&self) -> usize {
|
||||
self.chars().count()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for AsciiStr<'_> {
|
||||
type Target = str;
|
||||
|
||||
|
||||
@@ -411,8 +411,12 @@ impl Constructor for PyStr {
|
||||
// result as-is so any str subclass type the user returned is preserved
|
||||
// (matches unicode_new_impl which only invokes unicode_subtype_new when
|
||||
// type != &PyUnicode_Type).
|
||||
// CPython parity: `errors` without `encoding` also triggers decode
|
||||
// mode (with default UTF-8). The fast-path repr only applies when
|
||||
// BOTH `encoding` and `errors` are missing.
|
||||
if cls.is(vm.ctx.types.str_type)
|
||||
&& args.encoding.is_missing()
|
||||
&& args.errors.is_missing()
|
||||
&& let OptionalArg::Present(input) = &args.object
|
||||
{
|
||||
return Ok(input.str(vm)?.into());
|
||||
@@ -425,13 +429,31 @@ impl Constructor for PyStr {
|
||||
fn py_new(_cls: &Py<PyType>, args: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
|
||||
match args.object {
|
||||
OptionalArg::Present(input) => {
|
||||
if let OptionalArg::Present(enc) = args.encoding {
|
||||
let s = vm.state.codec_registry.decode_text(
|
||||
input,
|
||||
enc.as_str(),
|
||||
args.errors.into_option(),
|
||||
vm,
|
||||
)?;
|
||||
let encoding = args.encoding.into_option();
|
||||
let errors = args.errors.into_option();
|
||||
// CPython parity: presence of `encoding` OR `errors` triggers
|
||||
// decode mode. When `errors` is given alone, the encoding
|
||||
// defaults to UTF-8.
|
||||
if encoding.is_some() || errors.is_some() {
|
||||
// CPython rejects str / non-bytes-like input early with
|
||||
// specific TypeError wording (unicode_new_impl).
|
||||
if input.fast_isinstance(vm.ctx.types.str_type) {
|
||||
return Err(vm.new_type_error("decoding str is not supported"));
|
||||
}
|
||||
if !input.fast_isinstance(vm.ctx.types.bytes_type)
|
||||
&& !input.fast_isinstance(vm.ctx.types.bytearray_type)
|
||||
&& crate::protocol::PyBuffer::try_from_borrowed_object(vm, &input).is_err()
|
||||
{
|
||||
return Err(vm.new_type_error(format!(
|
||||
"decoding to str: need a bytes-like object, {} found",
|
||||
input.class().name()
|
||||
)));
|
||||
}
|
||||
let enc_str = encoding.as_ref().map(|e| e.as_str()).unwrap_or("utf-8");
|
||||
let s = vm
|
||||
.state
|
||||
.codec_registry
|
||||
.decode_text(input, enc_str, errors, vm)?;
|
||||
Ok(Self::from(s.as_wtf8().to_owned()))
|
||||
} else {
|
||||
let s = input.str(vm)?;
|
||||
|
||||
@@ -64,6 +64,13 @@ fn spec_format_bytes(
|
||||
Ok(spec.format_number(&bigint).into_bytes())
|
||||
}
|
||||
obj => {
|
||||
// CPython parity: `%d` / `%i` / `%u` accept any object
|
||||
// with `__index__` (preferred) or `__int__`.
|
||||
if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
let i = int_result?;
|
||||
check_int_to_str_digits(i.as_bigint(), vm)?;
|
||||
return Ok(spec.format_number(i.as_bigint()).into_bytes());
|
||||
}
|
||||
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
|
||||
let result = method?.call((), vm)?;
|
||||
if let Some(i) = result.downcast_ref::<PyInt>() {
|
||||
@@ -72,7 +79,7 @@ fn spec_format_bytes(
|
||||
}
|
||||
}
|
||||
Err(vm.new_type_error(format!(
|
||||
"%{} format: a number is required, not {}",
|
||||
"%{} format: a real number is required, not {}",
|
||||
spec.format_type.to_char(),
|
||||
obj.class().name()
|
||||
)))
|
||||
@@ -80,8 +87,13 @@ fn spec_format_bytes(
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// CPython parity: `%x` / `%o` / `%X` accept any object with
|
||||
// `__index__`, not just PyInt. Mirrors PyNumber_Index dispatch.
|
||||
if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
Ok(spec.format_number(i.as_bigint()).into_bytes())
|
||||
} else if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
let i = int_result?;
|
||||
Ok(spec.format_number(i.as_bigint()).into_bytes())
|
||||
} else {
|
||||
Err(vm.new_type_error(format!(
|
||||
"%{} format: an integer is required, not {}",
|
||||
@@ -105,12 +117,8 @@ fn spec_format_bytes(
|
||||
Ok(spec.format_float(value.into()).into_bytes())
|
||||
}
|
||||
CFormatType::Character(CCharacterType::Character) => {
|
||||
if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
let ch = i
|
||||
.try_to_primitive::<u8>(vm)
|
||||
.map_err(|_| vm.new_overflow_error("%c arg not in range(256)"))?;
|
||||
return Ok(spec.format_char(ch));
|
||||
}
|
||||
// CPython parity: bytes `%c` accepts a single byte or any object
|
||||
// with `__index__` in range(256).
|
||||
if let Some(b) = obj.downcast_ref::<PyBytes>() {
|
||||
if b.len() == 1 {
|
||||
return Ok(spec.format_char(b.as_bytes()[0]));
|
||||
@@ -121,7 +129,20 @@ fn spec_format_bytes(
|
||||
return Ok(spec.format_char(buf[0]));
|
||||
}
|
||||
}
|
||||
Err(vm.new_type_error("%c requires an integer in range(256) or a single byte"))
|
||||
let int = if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
i.to_owned()
|
||||
} else if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
int_result?
|
||||
} else {
|
||||
return Err(vm.new_type_error(format!(
|
||||
"%c requires an integer in range(256) or a single byte, not {}",
|
||||
obj.class().name()
|
||||
)));
|
||||
};
|
||||
let ch = int
|
||||
.try_to_primitive::<u8>(vm)
|
||||
.map_err(|_| vm.new_overflow_error("%c arg not in range(256)"))?;
|
||||
Ok(spec.format_char(ch))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -161,6 +182,13 @@ fn spec_format_string(
|
||||
Ok(spec.format_number(&bigint).into())
|
||||
}
|
||||
obj => {
|
||||
// CPython parity: `%d` / `%i` / `%u` accept any object
|
||||
// with `__index__` (preferred) or `__int__`.
|
||||
if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
let i = int_result?;
|
||||
check_int_to_str_digits(i.as_bigint(), vm)?;
|
||||
return Ok(spec.format_number(i.as_bigint()).into());
|
||||
}
|
||||
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
|
||||
let result = method?.call((), vm)?;
|
||||
if let Some(i) = result.downcast_ref::<PyInt>() {
|
||||
@@ -169,7 +197,7 @@ fn spec_format_string(
|
||||
}
|
||||
}
|
||||
Err(vm.new_type_error(format!(
|
||||
"%{} format: a number is required, not {}",
|
||||
"%{} format: a real number is required, not {}",
|
||||
spec.format_type.to_char(),
|
||||
obj.class().name()
|
||||
)))
|
||||
@@ -177,8 +205,13 @@ fn spec_format_string(
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// CPython parity: `%x` / `%o` / `%X` accept any object with
|
||||
// `__index__`, not just PyInt. Mirrors PyNumber_Index dispatch.
|
||||
if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
Ok(spec.format_number(i.as_bigint()).into())
|
||||
} else if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
let i = int_result?;
|
||||
Ok(spec.format_number(i.as_bigint()).into())
|
||||
} else {
|
||||
Err(vm.new_type_error(format!(
|
||||
"%{} format: an integer is required, not {}",
|
||||
@@ -193,20 +226,29 @@ fn spec_format_string(
|
||||
Ok(spec.format_float(value.into()).into())
|
||||
}
|
||||
CFormatType::Character(CCharacterType::Character) => {
|
||||
if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
let ch = i
|
||||
.as_bigint()
|
||||
.to_u32()
|
||||
.and_then(CodePoint::from_u32)
|
||||
.ok_or_else(|| vm.new_overflow_error("%c arg not in range(0x110000)"))?;
|
||||
return Ok(spec.format_char(ch));
|
||||
}
|
||||
// CPython parity: `%c` accepts a single-char str or any object with
|
||||
// `__index__` (the latter via PyNumber_Index dispatch).
|
||||
if let Some(s) = obj.downcast_ref::<PyStr>()
|
||||
&& let Ok(ch) = s.as_wtf8().code_points().exactly_one()
|
||||
{
|
||||
return Ok(spec.format_char(ch));
|
||||
}
|
||||
Err(vm.new_type_error("%c requires int or char"))
|
||||
let int = if let Some(i) = obj.downcast_ref::<PyInt>() {
|
||||
i.to_owned()
|
||||
} else if let Some(int_result) = obj.try_index_opt(vm) {
|
||||
int_result?
|
||||
} else {
|
||||
return Err(vm.new_type_error(format!(
|
||||
"%c requires an int or a unicode character, not {}",
|
||||
obj.class().name()
|
||||
)));
|
||||
};
|
||||
let ch = int
|
||||
.as_bigint()
|
||||
.to_u32()
|
||||
.and_then(CodePoint::from_u32)
|
||||
.ok_or_else(|| vm.new_overflow_error("%c arg not in range(0x110000)"))?;
|
||||
Ok(spec.format_char(ch))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,11 +198,12 @@ pub(crate) fn format(
|
||||
));
|
||||
}
|
||||
auto_argument_index += 1;
|
||||
arguments
|
||||
.args
|
||||
.get(auto_argument_index - 1)
|
||||
.cloned()
|
||||
.ok_or_else(|| vm.new_index_error("tuple index out of range"))
|
||||
let idx = auto_argument_index - 1;
|
||||
arguments.args.get(idx).cloned().ok_or_else(|| {
|
||||
vm.new_index_error(format!(
|
||||
"Replacement index {idx} out of range for positional args tuple"
|
||||
))
|
||||
})
|
||||
}
|
||||
FieldType::Index(index) => {
|
||||
if auto_argument_index != 0 {
|
||||
@@ -211,11 +212,11 @@ pub(crate) fn format(
|
||||
));
|
||||
}
|
||||
seen_index = true;
|
||||
arguments
|
||||
.args
|
||||
.get(index)
|
||||
.cloned()
|
||||
.ok_or_else(|| vm.new_index_error("tuple index out of range"))
|
||||
arguments.args.get(index).cloned().ok_or_else(|| {
|
||||
vm.new_index_error(format!(
|
||||
"Replacement index {index} out of range for positional args tuple"
|
||||
))
|
||||
})
|
||||
}
|
||||
FieldType::Keyword(keyword) => keyword
|
||||
.as_str()
|
||||
|
||||
Reference in New Issue
Block a user