mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Parse surrogates in string literals properly
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -2319,6 +2319,7 @@ dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"malachite-bigint",
|
||||
"memchr",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"ruff_python_ast",
|
||||
@@ -2330,6 +2331,7 @@ dependencies = [
|
||||
"rustpython-compiler-core",
|
||||
"rustpython-compiler-source",
|
||||
"thiserror 2.0.11",
|
||||
"unicode_names2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2387,6 +2389,7 @@ dependencies = [
|
||||
"ruff_python_ast",
|
||||
"ruff_python_parser",
|
||||
"ruff_source_file",
|
||||
"rustpython-common",
|
||||
"serde",
|
||||
]
|
||||
|
||||
|
||||
6
Lib/test/test_codeccallbacks.py
vendored
6
Lib/test/test_codeccallbacks.py
vendored
@@ -536,8 +536,6 @@ class CodecCallbackTest(unittest.TestCase):
|
||||
("".join("&#%d;" % c for c in cs), 1 + len(s))
|
||||
)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_badandgoodbackslashreplaceexceptions(self):
|
||||
# "backslashreplace" complains about a non-exception passed in
|
||||
self.assertRaises(
|
||||
@@ -596,8 +594,6 @@ class CodecCallbackTest(unittest.TestCase):
|
||||
(r, 2)
|
||||
)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_badandgoodnamereplaceexceptions(self):
|
||||
# "namereplace" complains about a non-exception passed in
|
||||
self.assertRaises(
|
||||
@@ -644,8 +640,6 @@ class CodecCallbackTest(unittest.TestCase):
|
||||
(r, 1 + len(s))
|
||||
)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_badandgoodsurrogateescapeexceptions(self):
|
||||
surrogateescape_errors = codecs.lookup_error('surrogateescape')
|
||||
# "surrogateescape" complains about a non-exception passed in
|
||||
|
||||
@@ -401,7 +401,7 @@ pub mod errors {
|
||||
let mut out = String::with_capacity(num_chars * 4);
|
||||
for c in err_str.code_points() {
|
||||
let c_u32 = c.to_u32();
|
||||
if let Some(c_name) = unicode_names2::name(c.to_char_lossy()) {
|
||||
if let Some(c_name) = c.to_char().and_then(unicode_names2::name) {
|
||||
write!(out, "\\N{{{c_name}}}").unwrap();
|
||||
} else if c_u32 >= 0x10000 {
|
||||
write!(out, "\\U{c_u32:08x}").unwrap();
|
||||
|
||||
@@ -574,6 +574,12 @@ impl<W: AsRef<Wtf8>> FromIterator<W> for Wtf8Buf {
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Wtf8Buf {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
Wtf8::hash(self, state)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Wtf8> for Wtf8Buf {
|
||||
fn as_ref(&self) -> &Wtf8 {
|
||||
self
|
||||
@@ -692,6 +698,13 @@ impl Default for &Wtf8 {
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Wtf8 {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
state.write(self.as_bytes());
|
||||
state.write_u8(0xff);
|
||||
}
|
||||
}
|
||||
|
||||
impl Wtf8 {
|
||||
/// Creates a WTF-8 slice from a UTF-8 `&str` slice.
|
||||
///
|
||||
@@ -722,6 +735,32 @@ impl Wtf8 {
|
||||
unsafe { &mut *(value as *mut [u8] as *mut Wtf8) }
|
||||
}
|
||||
|
||||
/// Create a WTF-8 slice from a WTF-8 byte slice.
|
||||
//
|
||||
// whooops! using WTF-8 for interchange!
|
||||
#[inline]
|
||||
pub fn from_bytes(b: &[u8]) -> Option<&Self> {
|
||||
let mut rest = b;
|
||||
while let Err(e) = std::str::from_utf8(rest) {
|
||||
rest = &rest[e.valid_up_to()..];
|
||||
Self::decode_surrogate(rest)?;
|
||||
rest = &rest[3..];
|
||||
}
|
||||
Some(unsafe { Wtf8::from_bytes_unchecked(b) })
|
||||
}
|
||||
|
||||
fn decode_surrogate(b: &[u8]) -> Option<CodePoint> {
|
||||
let [a, b, c, ..] = *b else { return None };
|
||||
if (a & 0xf0) == 0xe0 && (b & 0xc0) == 0x80 && (c & 0xc0) == 0x80 {
|
||||
// it's a three-byte code
|
||||
let c = ((a as u32 & 0x0f) << 12) + ((b as u32 & 0x3f) << 6) + (c as u32 & 0x3f);
|
||||
let 0xD800..=0xDFFF = c else { return None };
|
||||
Some(CodePoint { value: c })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the length, in WTF-8 bytes.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
@@ -875,6 +914,14 @@ impl Wtf8 {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn final_lead_surrogate(&self) -> Option<u16> {
|
||||
match self.bytes {
|
||||
[.., 0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_code_point_boundary(&self, index: usize) -> bool {
|
||||
is_code_point_boundary(self, index)
|
||||
}
|
||||
@@ -1481,6 +1528,12 @@ impl From<Wtf8Buf> for Box<Wtf8> {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Box<Wtf8>> for Wtf8Buf {
|
||||
fn from(w: Box<Wtf8>) -> Self {
|
||||
Wtf8Buf::from_box(w)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Box<Wtf8> {
|
||||
fn from(s: String) -> Self {
|
||||
s.into_boxed_str().into()
|
||||
|
||||
@@ -30,6 +30,8 @@ num-complex = { workspace = true }
|
||||
num-traits = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
malachite-bigint = { workspace = true }
|
||||
memchr = { workspace = true }
|
||||
unicode_names2 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
# rustpython-parser = { workspace = true }
|
||||
|
||||
@@ -21,13 +21,14 @@ use ruff_python_ast::{
|
||||
Alias, Arguments, BoolOp, CmpOp, Comprehension, ConversionFlag, DebugText, Decorator, DictItem,
|
||||
ExceptHandler, ExceptHandlerExceptHandler, Expr, ExprAttribute, ExprBoolOp, ExprFString,
|
||||
ExprList, ExprName, ExprStarred, ExprSubscript, ExprTuple, ExprUnaryOp, FString,
|
||||
FStringElement, FStringElements, FStringPart, Int, Keyword, MatchCase, ModExpression,
|
||||
ModModule, Operator, Parameters, Pattern, PatternMatchAs, PatternMatchValue, Stmt, StmtExpr,
|
||||
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, TypeParams, UnaryOp,
|
||||
WithItem,
|
||||
FStringElement, FStringElements, FStringFlags, FStringPart, Int, Keyword, MatchCase,
|
||||
ModExpression, ModModule, Operator, Parameters, Pattern, PatternMatchAs, PatternMatchValue,
|
||||
Stmt, StmtExpr, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
|
||||
TypeParams, UnaryOp, WithItem,
|
||||
};
|
||||
use ruff_source_file::OneIndexed;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
use rustpython_common::wtf8::Wtf8Buf;
|
||||
// use rustpython_ast::located::{self as located_ast, Located};
|
||||
use rustpython_compiler_core::{
|
||||
Mode,
|
||||
@@ -375,7 +376,9 @@ impl Compiler<'_> {
|
||||
|
||||
let (doc, statements) = split_doc(&body.body, &self.opts);
|
||||
if let Some(value) = doc {
|
||||
self.emit_load_const(ConstantData::Str { value });
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: value.into(),
|
||||
});
|
||||
let doc = self.name("__doc__");
|
||||
emit!(self, Instruction::StoreGlobal(doc))
|
||||
}
|
||||
@@ -636,14 +639,12 @@ impl Compiler<'_> {
|
||||
statement.range(),
|
||||
));
|
||||
}
|
||||
vec![ConstantData::Str {
|
||||
value: "*".to_owned(),
|
||||
}]
|
||||
vec![ConstantData::Str { value: "*".into() }]
|
||||
} else {
|
||||
names
|
||||
.iter()
|
||||
.map(|n| ConstantData::Str {
|
||||
value: n.name.to_string(),
|
||||
value: n.name.as_str().into(),
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
@@ -954,7 +955,7 @@ impl Compiler<'_> {
|
||||
self.pop_symbol_table();
|
||||
}
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name_string.clone(),
|
||||
value: name_string.clone().into(),
|
||||
});
|
||||
emit!(self, Instruction::TypeAlias);
|
||||
self.store_name(&name_string)?;
|
||||
@@ -1028,7 +1029,7 @@ impl Compiler<'_> {
|
||||
let default_kw_count = kw_with_defaults.len();
|
||||
for (arg, default) in kw_with_defaults.iter() {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: arg.name.to_string(),
|
||||
value: arg.name.as_str().into(),
|
||||
});
|
||||
self.compile_expression(default)?;
|
||||
}
|
||||
@@ -1101,7 +1102,7 @@ impl Compiler<'_> {
|
||||
if let Some(expr) = &bound {
|
||||
self.compile_expression(expr)?;
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
emit!(self, Instruction::TypeVarWithBound);
|
||||
emit!(self, Instruction::Duplicate);
|
||||
@@ -1109,7 +1110,7 @@ impl Compiler<'_> {
|
||||
} else {
|
||||
// self.store_name(type_name.as_str())?;
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
emit!(self, Instruction::TypeVar);
|
||||
emit!(self, Instruction::Duplicate);
|
||||
@@ -1118,7 +1119,7 @@ impl Compiler<'_> {
|
||||
}
|
||||
TypeParam::ParamSpec(TypeParamParamSpec { name, .. }) => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
emit!(self, Instruction::ParamSpec);
|
||||
emit!(self, Instruction::Duplicate);
|
||||
@@ -1126,7 +1127,7 @@ impl Compiler<'_> {
|
||||
}
|
||||
TypeParam::TypeVarTuple(TypeParamTypeVarTuple { name, .. }) => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
emit!(self, Instruction::TypeVarTuple);
|
||||
emit!(self, Instruction::Duplicate);
|
||||
@@ -1363,7 +1364,7 @@ impl Compiler<'_> {
|
||||
if let Some(annotation) = returns {
|
||||
// key:
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: "return".to_owned(),
|
||||
value: "return".into(),
|
||||
});
|
||||
// value:
|
||||
self.compile_annotation(annotation)?;
|
||||
@@ -1380,7 +1381,7 @@ impl Compiler<'_> {
|
||||
for param in parameters_iter {
|
||||
if let Some(annotation) = ¶m.annotation {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: self.mangle(param.name.as_str()).into_owned(),
|
||||
value: self.mangle(param.name.as_str()).into_owned().into(),
|
||||
});
|
||||
self.compile_annotation(annotation)?;
|
||||
num_annotations += 1;
|
||||
@@ -1410,7 +1411,7 @@ impl Compiler<'_> {
|
||||
code: Box::new(code),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: qualified_name,
|
||||
value: qualified_name.into(),
|
||||
});
|
||||
|
||||
// Turn code object into function object:
|
||||
@@ -1418,7 +1419,9 @@ impl Compiler<'_> {
|
||||
|
||||
if let Some(value) = doc_str {
|
||||
emit!(self, Instruction::Duplicate);
|
||||
self.emit_load_const(ConstantData::Str { value });
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: value.into(),
|
||||
});
|
||||
emit!(self, Instruction::Rotate2);
|
||||
let doc = self.name("__doc__");
|
||||
emit!(self, Instruction::StoreAttr { idx: doc });
|
||||
@@ -1547,7 +1550,7 @@ impl Compiler<'_> {
|
||||
let dunder_module = self.name("__module__");
|
||||
emit!(self, Instruction::StoreLocal(dunder_module));
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: qualified_name,
|
||||
value: qualified_name.into(),
|
||||
});
|
||||
let qualname = self.name("__qualname__");
|
||||
emit!(self, Instruction::StoreLocal(qualname));
|
||||
@@ -1608,16 +1611,12 @@ impl Compiler<'_> {
|
||||
self.emit_load_const(ConstantData::Code {
|
||||
code: Box::new(code),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_owned(),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str { value: name.into() });
|
||||
|
||||
// Turn code object into function object:
|
||||
emit!(self, Instruction::MakeFunction(func_flags));
|
||||
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_owned(),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str { value: name.into() });
|
||||
|
||||
// Call the __build_class__ builtin
|
||||
let call = if let Some(arguments) = arguments {
|
||||
@@ -1638,7 +1637,7 @@ impl Compiler<'_> {
|
||||
|
||||
// Doc string value:
|
||||
self.emit_load_const(match doc_str {
|
||||
Some(doc) => ConstantData::Str { value: doc },
|
||||
Some(doc) => ConstantData::Str { value: doc.into() },
|
||||
None => ConstantData::None, // set docstring None if not declared
|
||||
});
|
||||
}
|
||||
@@ -2031,7 +2030,7 @@ impl Compiler<'_> {
|
||||
let ident = Default::default();
|
||||
let codegen = ruff_python_codegen::Generator::new(&ident, Default::default());
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: codegen.expr(annotation),
|
||||
value: codegen.expr(annotation).into(),
|
||||
});
|
||||
} else {
|
||||
self.compile_expression(annotation)?;
|
||||
@@ -2063,7 +2062,7 @@ impl Compiler<'_> {
|
||||
let annotations = self.name("__annotations__");
|
||||
emit!(self, Instruction::LoadNameAny(annotations));
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: self.mangle(id.as_str()).into_owned(),
|
||||
value: self.mangle(id.as_str()).into_owned().into(),
|
||||
});
|
||||
emit!(self, Instruction::StoreSubscript);
|
||||
} else {
|
||||
@@ -2538,7 +2537,7 @@ impl Compiler<'_> {
|
||||
self.emit_load_const(ConstantData::Code {
|
||||
code: Box::new(code),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str { value: name });
|
||||
self.emit_load_const(ConstantData::Str { value: name.into() });
|
||||
// Turn code object into function object:
|
||||
emit!(self, Instruction::MakeFunction(func_flags));
|
||||
|
||||
@@ -2679,9 +2678,23 @@ impl Compiler<'_> {
|
||||
self.compile_expr_fstring(fstring)?;
|
||||
}
|
||||
Expr::StringLiteral(string) => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: string.value.to_str().to_owned(),
|
||||
});
|
||||
let value = string.value.to_str();
|
||||
if value.contains(char::REPLACEMENT_CHARACTER) {
|
||||
let value = string
|
||||
.value
|
||||
.iter()
|
||||
.map(|lit| {
|
||||
let source = self.source_code.get_range(lit.range);
|
||||
crate::string_parser::parse_string_literal(source, lit.flags.into())
|
||||
})
|
||||
.collect();
|
||||
// might have a surrogate literal; should reparse to be sure
|
||||
self.emit_load_const(ConstantData::Str { value });
|
||||
} else {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: value.into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Expr::BytesLiteral(bytes) => {
|
||||
let iter = bytes.value.iter().flat_map(|x| x.iter().copied());
|
||||
@@ -2732,7 +2745,7 @@ impl Compiler<'_> {
|
||||
for keyword in sub_keywords {
|
||||
if let Some(name) = &keyword.arg {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
self.compile_expression(&keyword.value)?;
|
||||
sub_size += 1;
|
||||
@@ -2822,7 +2835,7 @@ impl Compiler<'_> {
|
||||
for keyword in &arguments.keywords {
|
||||
if let Some(name) = &keyword.arg {
|
||||
kwarg_names.push(ConstantData::Str {
|
||||
value: name.to_string(),
|
||||
value: name.as_str().into(),
|
||||
});
|
||||
} else {
|
||||
// This means **kwargs!
|
||||
@@ -3058,9 +3071,7 @@ impl Compiler<'_> {
|
||||
});
|
||||
|
||||
// List comprehension function name:
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: name.to_owned(),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str { value: name.into() });
|
||||
|
||||
// Turn code object into function object:
|
||||
emit!(self, Instruction::MakeFunction(func_flags));
|
||||
@@ -3358,9 +3369,19 @@ impl Compiler<'_> {
|
||||
fn compile_fstring_part(&mut self, part: &FStringPart) -> CompileResult<()> {
|
||||
match part {
|
||||
FStringPart::Literal(string) => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: string.value.to_string(),
|
||||
});
|
||||
if string.value.contains(char::REPLACEMENT_CHARACTER) {
|
||||
// might have a surrogate literal; should reparse to be sure
|
||||
let source = self.source_code.get_range(string.range);
|
||||
let value =
|
||||
crate::string_parser::parse_string_literal(source, string.flags.into());
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: value.into(),
|
||||
});
|
||||
} else {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: string.value.to_string().into(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
FStringPart::FString(fstring) => self.compile_fstring(fstring),
|
||||
@@ -3368,19 +3389,32 @@ impl Compiler<'_> {
|
||||
}
|
||||
|
||||
fn compile_fstring(&mut self, fstring: &FString) -> CompileResult<()> {
|
||||
self.compile_fstring_elements(&fstring.elements)
|
||||
self.compile_fstring_elements(fstring.flags, &fstring.elements)
|
||||
}
|
||||
|
||||
fn compile_fstring_elements(
|
||||
&mut self,
|
||||
flags: FStringFlags,
|
||||
fstring_elements: &FStringElements,
|
||||
) -> CompileResult<()> {
|
||||
for element in fstring_elements {
|
||||
match element {
|
||||
FStringElement::Literal(string) => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: string.value.to_string(),
|
||||
});
|
||||
if string.value.contains(char::REPLACEMENT_CHARACTER) {
|
||||
// might have a surrogate literal; should reparse to be sure
|
||||
let source = self.source_code.get_range(string.range);
|
||||
let value = crate::string_parser::parse_fstring_literal_element(
|
||||
source.into(),
|
||||
flags.into(),
|
||||
);
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: value.into(),
|
||||
});
|
||||
} else {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: string.value.to_string().into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
FStringElement::Expression(fstring_expr) => {
|
||||
let mut conversion = fstring_expr.conversion;
|
||||
@@ -3393,11 +3427,13 @@ impl Compiler<'_> {
|
||||
let source = source.to_string();
|
||||
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: leading.to_string(),
|
||||
value: leading.to_string().into(),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str { value: source });
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: trailing.to_string(),
|
||||
value: source.into(),
|
||||
});
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: trailing.to_string().into(),
|
||||
});
|
||||
|
||||
3
|
||||
@@ -3407,7 +3443,7 @@ impl Compiler<'_> {
|
||||
match &fstring_expr.format_spec {
|
||||
None => {
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: String::new(),
|
||||
value: Wtf8Buf::new(),
|
||||
});
|
||||
// Match CPython behavior: If debug text is present, apply repr conversion.
|
||||
// See: https://github.com/python/cpython/blob/f61afca262d3a0aa6a8a501db0b1936c60858e35/Parser/action_helpers.c#L1456
|
||||
@@ -3416,7 +3452,7 @@ impl Compiler<'_> {
|
||||
}
|
||||
}
|
||||
Some(format_spec) => {
|
||||
self.compile_fstring_elements(&format_spec.elements)?;
|
||||
self.compile_fstring_elements(flags, &format_spec.elements)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3449,7 +3485,7 @@ impl Compiler<'_> {
|
||||
if element_count == 0 {
|
||||
// ensure to put an empty string on the stack if there aren't any fstring elements
|
||||
self.emit_load_const(ConstantData::Str {
|
||||
value: String::new(),
|
||||
value: Wtf8Buf::new(),
|
||||
});
|
||||
} else if element_count > 1 {
|
||||
emit!(
|
||||
|
||||
@@ -11,6 +11,7 @@ type IndexSet<T> = indexmap::IndexSet<T, ahash::RandomState>;
|
||||
pub mod compile;
|
||||
pub mod error;
|
||||
pub mod ir;
|
||||
mod string_parser;
|
||||
pub mod symboltable;
|
||||
|
||||
pub use compile::CompileOpts;
|
||||
|
||||
287
compiler/codegen/src/string_parser.rs
Normal file
287
compiler/codegen/src/string_parser.rs
Normal file
@@ -0,0 +1,287 @@
|
||||
//! A stripped-down version of ruff's string literal parser, modified to
|
||||
//! handle surrogates in string literals and output WTF-8.
|
||||
//!
|
||||
//! Any `unreachable!()` statements in this file are because we only get here
|
||||
//! after ruff has already successfully parsed the string literal, meaning
|
||||
//! we don't need to do any validation or error handling.
|
||||
|
||||
use std::convert::Infallible;
|
||||
|
||||
use ruff_python_ast::{AnyStringFlags, StringFlags};
|
||||
use rustpython_common::wtf8::{CodePoint, Wtf8, Wtf8Buf};
|
||||
|
||||
// use ruff_python_parser::{LexicalError, LexicalErrorType};
|
||||
type LexicalError = Infallible;
|
||||
|
||||
enum EscapedChar {
|
||||
Literal(CodePoint),
|
||||
Escape(char),
|
||||
}
|
||||
|
||||
struct StringParser {
|
||||
/// The raw content of the string e.g., the `foo` part in `"foo"`.
|
||||
source: Box<str>,
|
||||
/// Current position of the parser in the source.
|
||||
cursor: usize,
|
||||
/// Flags that can be used to query information about the string.
|
||||
flags: AnyStringFlags,
|
||||
}
|
||||
|
||||
impl StringParser {
|
||||
fn new(source: Box<str>, flags: AnyStringFlags) -> Self {
|
||||
Self {
|
||||
source,
|
||||
cursor: 0,
|
||||
flags,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn skip_bytes(&mut self, bytes: usize) -> &str {
|
||||
let skipped_str = &self.source[self.cursor..self.cursor + bytes];
|
||||
self.cursor += bytes;
|
||||
skipped_str
|
||||
}
|
||||
|
||||
/// Returns the next byte in the string, if there is one.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When the next byte is a part of a multi-byte character.
|
||||
#[inline]
|
||||
fn next_byte(&mut self) -> Option<u8> {
|
||||
self.source[self.cursor..].as_bytes().first().map(|&byte| {
|
||||
self.cursor += 1;
|
||||
byte
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next_char(&mut self) -> Option<char> {
|
||||
self.source[self.cursor..].chars().next().inspect(|c| {
|
||||
self.cursor += c.len_utf8();
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_byte(&self) -> Option<u8> {
|
||||
self.source[self.cursor..].as_bytes().first().copied()
|
||||
}
|
||||
|
||||
fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<CodePoint, LexicalError> {
|
||||
let mut p: u32 = 0u32;
|
||||
for i in 1..=literal_number {
|
||||
match self.next_char() {
|
||||
Some(c) => match c.to_digit(16) {
|
||||
Some(d) => p += d << ((literal_number - i) * 4),
|
||||
None => unreachable!(),
|
||||
},
|
||||
None => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok(CodePoint::from_u32(p).unwrap())
|
||||
}
|
||||
|
||||
fn parse_octet(&mut self, o: u8) -> char {
|
||||
let mut radix_bytes = [o, 0, 0];
|
||||
let mut len = 1;
|
||||
|
||||
while len < 3 {
|
||||
let Some(b'0'..=b'7') = self.peek_byte() else {
|
||||
break;
|
||||
};
|
||||
|
||||
radix_bytes[len] = self.next_byte().unwrap();
|
||||
len += 1;
|
||||
}
|
||||
|
||||
// OK because radix_bytes is always going to be in the ASCII range.
|
||||
let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes");
|
||||
let value = u32::from_str_radix(radix_str, 8).unwrap();
|
||||
char::from_u32(value).unwrap()
|
||||
}
|
||||
|
||||
fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
|
||||
let Some('{') = self.next_char() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let Some(close_idx) = self.source[self.cursor..].find('}') else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let name_and_ending = self.skip_bytes(close_idx + 1);
|
||||
let name = &name_and_ending[..name_and_ending.len() - 1];
|
||||
|
||||
unicode_names2::character(name).ok_or_else(|| unreachable!())
|
||||
}
|
||||
|
||||
/// Parse an escaped character, returning the new character.
|
||||
fn parse_escaped_char(&mut self) -> Result<Option<EscapedChar>, LexicalError> {
|
||||
let Some(first_char) = self.next_char() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let new_char = match first_char {
|
||||
'\\' => '\\'.into(),
|
||||
'\'' => '\''.into(),
|
||||
'\"' => '"'.into(),
|
||||
'a' => '\x07'.into(),
|
||||
'b' => '\x08'.into(),
|
||||
'f' => '\x0c'.into(),
|
||||
'n' => '\n'.into(),
|
||||
'r' => '\r'.into(),
|
||||
't' => '\t'.into(),
|
||||
'v' => '\x0b'.into(),
|
||||
o @ '0'..='7' => self.parse_octet(o as u8).into(),
|
||||
'x' => self.parse_unicode_literal(2)?,
|
||||
'u' if !self.flags.is_byte_string() => self.parse_unicode_literal(4)?,
|
||||
'U' if !self.flags.is_byte_string() => self.parse_unicode_literal(8)?,
|
||||
'N' if !self.flags.is_byte_string() => self.parse_unicode_name()?.into(),
|
||||
// Special cases where the escape sequence is not a single character
|
||||
'\n' => return Ok(None),
|
||||
'\r' => {
|
||||
if self.peek_byte() == Some(b'\n') {
|
||||
self.next_byte();
|
||||
}
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
_ => return Ok(Some(EscapedChar::Escape(first_char))),
|
||||
};
|
||||
|
||||
Ok(Some(EscapedChar::Literal(new_char)))
|
||||
}
|
||||
|
||||
fn parse_fstring_middle(mut self) -> Result<Box<Wtf8>, LexicalError> {
|
||||
// Fast-path: if the f-string doesn't contain any escape sequences, return the literal.
|
||||
let Some(mut index) = memchr::memchr3(b'{', b'}', b'\\', self.source.as_bytes()) else {
|
||||
return Ok(self.source.into());
|
||||
};
|
||||
|
||||
let mut value = Wtf8Buf::with_capacity(self.source.len());
|
||||
loop {
|
||||
// Add the characters before the escape sequence (or curly brace) to the string.
|
||||
let before_with_slash_or_brace = self.skip_bytes(index + 1);
|
||||
let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
|
||||
value.push_str(before);
|
||||
|
||||
// Add the escaped character to the string.
|
||||
match &self.source.as_bytes()[self.cursor - 1] {
|
||||
// If there are any curly braces inside a `FStringMiddle` token,
|
||||
// then they were escaped (i.e. `{{` or `}}`). This means that
|
||||
// we need increase the location by 2 instead of 1.
|
||||
b'{' => value.push_char('{'),
|
||||
b'}' => value.push_char('}'),
|
||||
// We can encounter a `\` as the last character in a `FStringMiddle`
|
||||
// token which is valid in this context. For example,
|
||||
//
|
||||
// ```python
|
||||
// f"\{foo} \{bar:\}"
|
||||
// # ^ ^^ ^
|
||||
// ```
|
||||
//
|
||||
// Here, the `FStringMiddle` token content will be "\" and " \"
|
||||
// which is invalid if we look at the content in isolation:
|
||||
//
|
||||
// ```python
|
||||
// "\"
|
||||
// ```
|
||||
//
|
||||
// However, the content is syntactically valid in the context of
|
||||
// the f-string because it's a substring of the entire f-string.
|
||||
// This is still an invalid escape sequence, but we don't want to
|
||||
// raise a syntax error as is done by the CPython parser. It might
|
||||
// be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
|
||||
b'\\' => {
|
||||
if !self.flags.is_raw_string() && self.peek_byte().is_some() {
|
||||
match self.parse_escaped_char()? {
|
||||
None => {}
|
||||
Some(EscapedChar::Literal(c)) => value.push(c),
|
||||
Some(EscapedChar::Escape(c)) => {
|
||||
value.push_char('\\');
|
||||
value.push_char(c);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
value.push_char('\\');
|
||||
}
|
||||
}
|
||||
ch => {
|
||||
unreachable!("Expected '{{', '}}', or '\\' but got {:?}", ch);
|
||||
}
|
||||
}
|
||||
|
||||
let Some(next_index) =
|
||||
memchr::memchr3(b'{', b'}', b'\\', self.source[self.cursor..].as_bytes())
|
||||
else {
|
||||
// Add the rest of the string to the value.
|
||||
let rest = &self.source[self.cursor..];
|
||||
value.push_str(rest);
|
||||
break;
|
||||
};
|
||||
|
||||
index = next_index;
|
||||
}
|
||||
|
||||
Ok(value.into())
|
||||
}
|
||||
|
||||
fn parse_string(mut self) -> Result<Box<Wtf8>, LexicalError> {
|
||||
if self.flags.is_raw_string() {
|
||||
// For raw strings, no escaping is necessary.
|
||||
return Ok(self.source.into());
|
||||
}
|
||||
|
||||
let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
|
||||
// If the string doesn't contain any escape sequences, return the owned string.
|
||||
return Ok(self.source.into());
|
||||
};
|
||||
|
||||
// If the string contains escape sequences, we need to parse them.
|
||||
let mut value = Wtf8Buf::with_capacity(self.source.len());
|
||||
|
||||
loop {
|
||||
// Add the characters before the escape sequence to the string.
|
||||
let before_with_slash = self.skip_bytes(escape + 1);
|
||||
let before = &before_with_slash[..before_with_slash.len() - 1];
|
||||
value.push_str(before);
|
||||
|
||||
// Add the escaped character to the string.
|
||||
match self.parse_escaped_char()? {
|
||||
None => {}
|
||||
Some(EscapedChar::Literal(c)) => value.push(c),
|
||||
Some(EscapedChar::Escape(c)) => {
|
||||
value.push_char('\\');
|
||||
value.push_char(c);
|
||||
}
|
||||
}
|
||||
|
||||
let Some(next_escape) = self.source[self.cursor..].find('\\') else {
|
||||
// Add the rest of the string to the value.
|
||||
let rest = &self.source[self.cursor..];
|
||||
value.push_str(rest);
|
||||
break;
|
||||
};
|
||||
|
||||
// Update the position of the next escape sequence.
|
||||
escape = next_escape;
|
||||
}
|
||||
|
||||
Ok(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_string_literal(source: &str, flags: AnyStringFlags) -> Box<Wtf8> {
|
||||
let source = &source[flags.opener_len().to_usize()..];
|
||||
let source = &source[..source.len() - flags.quote_len().to_usize()];
|
||||
StringParser::new(source.into(), flags)
|
||||
.parse_string()
|
||||
.unwrap_or_else(|x| match x {})
|
||||
}
|
||||
|
||||
pub(crate) fn parse_fstring_literal_element(source: Box<str>, flags: AnyStringFlags) -> Box<Wtf8> {
|
||||
StringParser::new(source, flags)
|
||||
.parse_fstring_middle()
|
||||
.unwrap_or_else(|x| match x {})
|
||||
}
|
||||
@@ -13,6 +13,7 @@ license.workspace = true
|
||||
ruff_python_ast = { workspace = true }
|
||||
ruff_python_parser = { workspace = true }
|
||||
ruff_source_file = { workspace = true }
|
||||
rustpython-common = { workspace = true }
|
||||
|
||||
bitflags = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
|
||||
@@ -8,6 +8,7 @@ use num_complex::Complex64;
|
||||
pub use ruff_python_ast::ConversionFlag;
|
||||
// use rustpython_parser_core::source_code::{OneIndexed, SourceLocation};
|
||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||
use rustpython_common::wtf8::{Wtf8, Wtf8Buf};
|
||||
use std::marker::PhantomData;
|
||||
use std::{collections::BTreeSet, fmt, hash, mem};
|
||||
|
||||
@@ -678,7 +679,7 @@ pub enum ConstantData {
|
||||
Float { value: f64 },
|
||||
Complex { value: Complex64 },
|
||||
Boolean { value: bool },
|
||||
Str { value: String },
|
||||
Str { value: Wtf8Buf },
|
||||
Bytes { value: Vec<u8> },
|
||||
Code { code: Box<CodeObject> },
|
||||
None,
|
||||
@@ -738,7 +739,7 @@ pub enum BorrowedConstant<'a, C: Constant> {
|
||||
Float { value: f64 },
|
||||
Complex { value: Complex64 },
|
||||
Boolean { value: bool },
|
||||
Str { value: &'a str },
|
||||
Str { value: &'a Wtf8 },
|
||||
Bytes { value: &'a [u8] },
|
||||
Code { code: &'a CodeObject<C> },
|
||||
Tuple { elements: &'a [C] },
|
||||
|
||||
@@ -2,6 +2,7 @@ use crate::bytecode::*;
|
||||
use malachite_bigint::{BigInt, Sign};
|
||||
use num_complex::Complex64;
|
||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||
use rustpython_common::wtf8::Wtf8;
|
||||
use std::convert::Infallible;
|
||||
|
||||
pub const FORMAT_VERSION: u32 = 4;
|
||||
@@ -117,6 +118,9 @@ pub trait Read {
|
||||
fn read_str(&mut self, len: u32) -> Result<&str> {
|
||||
Ok(std::str::from_utf8(self.read_slice(len)?)?)
|
||||
}
|
||||
fn read_wtf8(&mut self, len: u32) -> Result<&Wtf8> {
|
||||
Wtf8::from_bytes(self.read_slice(len)?).ok_or(MarshalError::InvalidUtf8)
|
||||
}
|
||||
fn read_u8(&mut self) -> Result<u8> {
|
||||
Ok(u8::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
@@ -262,7 +266,7 @@ pub trait MarshalBag: Copy {
|
||||
fn make_ellipsis(&self) -> Self::Value;
|
||||
fn make_float(&self, value: f64) -> Self::Value;
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value;
|
||||
fn make_str(&self, value: &str) -> Self::Value;
|
||||
fn make_str(&self, value: &Wtf8) -> Self::Value;
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value;
|
||||
fn make_int(&self, value: BigInt) -> Self::Value;
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
|
||||
@@ -299,7 +303,7 @@ impl<Bag: ConstantBag> MarshalBag for Bag {
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
|
||||
}
|
||||
fn make_str(&self, value: &str) -> Self::Value {
|
||||
fn make_str(&self, value: &Wtf8) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
|
||||
}
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value {
|
||||
@@ -368,7 +372,7 @@ pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Res
|
||||
}
|
||||
Type::Ascii | Type::Unicode => {
|
||||
let len = rdr.read_u32()?;
|
||||
let value = rdr.read_str(len)?;
|
||||
let value = rdr.read_wtf8(len)?;
|
||||
bag.make_str(value)
|
||||
}
|
||||
Type::Tuple => {
|
||||
@@ -422,7 +426,7 @@ pub enum DumpableValue<'a, D: Dumpable> {
|
||||
Float(f64),
|
||||
Complex(Complex64),
|
||||
Boolean(bool),
|
||||
Str(&'a str),
|
||||
Str(&'a Wtf8),
|
||||
Bytes(&'a [u8]),
|
||||
Code(&'a CodeObject<D::Constant>),
|
||||
Tuple(&'a [D]),
|
||||
|
||||
@@ -53,7 +53,9 @@ enum StackValue {
|
||||
impl From<ConstantData> for StackValue {
|
||||
fn from(value: ConstantData) -> Self {
|
||||
match value {
|
||||
ConstantData::Str { value } => StackValue::String(value),
|
||||
ConstantData::Str { value } => {
|
||||
StackValue::String(value.into_string().expect("surrogate in test code"))
|
||||
}
|
||||
ConstantData::None => StackValue::None,
|
||||
ConstantData::Code { code } => StackValue::Code(code),
|
||||
c => unimplemented!("constant {:?} isn't yet supported in py_function!", c),
|
||||
|
||||
@@ -74,7 +74,7 @@ fn borrow_obj_constant(obj: &PyObject) -> BorrowedConstant<'_, Literal> {
|
||||
ref c @ super::complex::PyComplex => BorrowedConstant::Complex {
|
||||
value: c.to_complex()
|
||||
},
|
||||
ref s @ super::pystr::PyStr => BorrowedConstant::Str { value: s.as_str() },
|
||||
ref s @ super::pystr::PyStr => BorrowedConstant::Str { value: s.as_wtf8() },
|
||||
ref b @ super::bytes::PyBytes => BorrowedConstant::Bytes {
|
||||
value: b.as_bytes()
|
||||
},
|
||||
|
||||
@@ -1815,6 +1815,18 @@ impl AsRef<str> for PyExact<PyStr> {
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Wtf8> for PyRefExact<PyStr> {
|
||||
fn as_ref(&self) -> &Wtf8 {
|
||||
self.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Wtf8> for PyExact<PyStr> {
|
||||
fn as_ref(&self) -> &Wtf8 {
|
||||
self.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
impl AnyStrWrapper<Wtf8> for PyStrRef {
|
||||
fn as_ref(&self) -> Option<&Wtf8> {
|
||||
Some(self.as_wtf8())
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use rustpython_common::wtf8::{Wtf8, Wtf8Buf};
|
||||
|
||||
use crate::{
|
||||
AsObject, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, VirtualMachine,
|
||||
builtins::{PyStr, PyStrInterned, PyTypeRef},
|
||||
@@ -86,29 +88,29 @@ pub struct CachedPyStrRef {
|
||||
|
||||
impl std::hash::Hash for CachedPyStrRef {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.inner.as_str().hash(state)
|
||||
self.inner.as_wtf8().hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for CachedPyStrRef {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.inner.as_str() == other.inner.as_str()
|
||||
self.inner.as_wtf8() == other.inner.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for CachedPyStrRef {}
|
||||
|
||||
impl std::borrow::Borrow<str> for CachedPyStrRef {
|
||||
impl std::borrow::Borrow<Wtf8> for CachedPyStrRef {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &str {
|
||||
self.inner.as_str()
|
||||
fn borrow(&self) -> &Wtf8 {
|
||||
self.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for CachedPyStrRef {
|
||||
impl AsRef<Wtf8> for CachedPyStrRef {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &str {
|
||||
self.as_str()
|
||||
fn as_ref(&self) -> &Wtf8 {
|
||||
self.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,8 +123,8 @@ impl CachedPyStrRef {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_str(&self) -> &str {
|
||||
self.inner.as_str()
|
||||
fn as_wtf8(&self) -> &Wtf8 {
|
||||
self.inner.as_wtf8()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,6 +211,8 @@ impl<T: PyPayload> ToPyObject for &'static PyInterned<T> {
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
use rustpython_common::wtf8::{Wtf8, Wtf8Buf};
|
||||
|
||||
use crate::{
|
||||
builtins::PyStr,
|
||||
object::{Py, PyExact, PyRefExact},
|
||||
@@ -218,11 +222,14 @@ mod sealed {
|
||||
|
||||
impl SealedInternable for String {}
|
||||
impl SealedInternable for &str {}
|
||||
impl SealedInternable for Wtf8Buf {}
|
||||
impl SealedInternable for &Wtf8 {}
|
||||
impl SealedInternable for PyRefExact<PyStr> {}
|
||||
|
||||
pub trait SealedMaybeInterned {}
|
||||
|
||||
impl SealedMaybeInterned for str {}
|
||||
impl SealedMaybeInterned for Wtf8 {}
|
||||
impl SealedMaybeInterned for PyExact<PyStr> {}
|
||||
impl SealedMaybeInterned for Py<PyStr> {}
|
||||
}
|
||||
@@ -250,6 +257,21 @@ impl InternableString for &str {
|
||||
}
|
||||
}
|
||||
|
||||
impl InternableString for Wtf8Buf {
|
||||
type Interned = Wtf8;
|
||||
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
let obj = PyRef::new_ref(PyStr::from(self), str_type, None);
|
||||
unsafe { PyRefExact::new_unchecked(obj) }
|
||||
}
|
||||
}
|
||||
|
||||
impl InternableString for &Wtf8 {
|
||||
type Interned = Wtf8;
|
||||
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
self.to_owned().into_pyref_exact(str_type)
|
||||
}
|
||||
}
|
||||
|
||||
impl InternableString for PyRefExact<PyStr> {
|
||||
type Interned = Py<PyStr>;
|
||||
#[inline]
|
||||
@@ -259,7 +281,7 @@ impl InternableString for PyRefExact<PyStr> {
|
||||
}
|
||||
|
||||
pub trait MaybeInternedString:
|
||||
AsRef<str> + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned
|
||||
AsRef<Wtf8> + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned
|
||||
{
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned>;
|
||||
}
|
||||
@@ -271,6 +293,13 @@ impl MaybeInternedString for str {
|
||||
}
|
||||
}
|
||||
|
||||
impl MaybeInternedString for Wtf8 {
|
||||
#[inline(always)]
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl MaybeInternedString for PyExact<PyStr> {
|
||||
#[inline(always)]
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned> {
|
||||
@@ -296,7 +325,7 @@ impl PyObject {
|
||||
if self.is_interned() {
|
||||
s.unwrap().as_interned()
|
||||
} else if let Some(s) = s {
|
||||
vm.ctx.interned_str(s.as_str())
|
||||
vm.ctx.interned_str(s.as_wtf8())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ mod decl {
|
||||
PyBool, PyByteArray, PyBytes, PyCode, PyComplex, PyDict, PyEllipsis, PyFloat,
|
||||
PyFrozenSet, PyInt, PyList, PyNone, PySet, PyStopIteration, PyStr, PyTuple,
|
||||
},
|
||||
common::wtf8::Wtf8,
|
||||
convert::ToPyObject,
|
||||
function::{ArgBytesLike, OptionalArg},
|
||||
object::AsObject,
|
||||
@@ -53,7 +54,7 @@ mod decl {
|
||||
f(Complex(pycomplex.to_complex64()))
|
||||
}
|
||||
ref pystr @ PyStr => {
|
||||
f(Str(pystr.as_str()))
|
||||
f(Str(pystr.as_wtf8()))
|
||||
}
|
||||
ref pylist @ PyList => {
|
||||
f(List(&pylist.borrow_vec()))
|
||||
@@ -139,7 +140,7 @@ mod decl {
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value {
|
||||
self.0.ctx.new_complex(value).into()
|
||||
}
|
||||
fn make_str(&self, value: &str) -> Self::Value {
|
||||
fn make_str(&self, value: &Wtf8) -> Self::Value {
|
||||
self.0.ctx.new_str(value).into()
|
||||
}
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value {
|
||||
|
||||
Reference in New Issue
Block a user