Split out wtf8 into its own crate

This commit is contained in:
Noa
2025-03-27 00:08:29 -05:00
committed by Jeong, YunWon
parent 6b72d2ef5d
commit 030243a6f9
17 changed files with 74 additions and 14 deletions

17
Cargo.lock generated
View File

@@ -2327,9 +2327,9 @@ dependencies = [
"ruff_python_parser",
"ruff_source_file",
"ruff_text_size",
"rustpython-common",
"rustpython-compiler-core",
"rustpython-compiler-source",
"rustpython-wtf8",
"thiserror 2.0.11",
"unicode_names2",
]
@@ -2355,6 +2355,7 @@ dependencies = [
"radium",
"rand 0.9.0",
"rustpython-literal",
"rustpython-wtf8",
"siphasher 0.3.11",
"unicode_names2",
"volatile",
@@ -2388,7 +2389,7 @@ dependencies = [
"ruff_python_ast",
"ruff_python_parser",
"ruff_source_file",
"rustpython-common",
"rustpython-wtf8",
"serde",
]
@@ -2476,7 +2477,7 @@ dependencies = [
"criterion",
"num_enum",
"optional",
"rustpython-common",
"rustpython-wtf8",
]
[[package]]
@@ -2634,6 +2635,16 @@ dependencies = [
"winreg",
]
[[package]]
name = "rustpython-wtf8"
version = "0.4.0"
dependencies = [
"ascii",
"bstr",
"itertools 0.14.0",
"memchr",
]
[[package]]
name = "rustpython_wasm"
version = "0.4.0"

View File

@@ -115,7 +115,7 @@ template = "installer-config/installer.wxs"
resolver = "2"
members = [
"compiler", "compiler/core", "compiler/codegen", "compiler/literal", "compiler/source",
".", "common", "derive", "jit", "vm", "vm/sre_engine", "pylib", "stdlib", "derive-impl",
".", "common", "derive", "jit", "vm", "vm/sre_engine", "pylib", "stdlib", "derive-impl", "wtf8",
"wasm/lib",
]
@@ -141,6 +141,7 @@ rustpython-vm = { path = "vm", default-features = false, version = "0.4.0" }
rustpython-pylib = { path = "pylib", version = "0.4.0" }
rustpython-stdlib = { path = "stdlib", default-features = false, version = "0.4.0" }
rustpython-sre_engine = { path = "vm/sre_engine", version = "0.4.0" }
rustpython-wtf8 = { path = "wtf8", version = "0.4.0" }
rustpython-doc = { git = "https://github.com/RustPython/__doc__", tag = "0.3.0", version = "0.3.0" }
ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "0.11.0" }

View File

@@ -13,6 +13,7 @@ threading = ["parking_lot"]
[dependencies]
rustpython-literal = { workspace = true }
rustpython-wtf8 = { workspace = true }
ascii = { workspace = true }
bitflags = { workspace = true }

View File

@@ -29,7 +29,8 @@ pub mod static_cell;
pub mod str;
#[cfg(windows)]
pub mod windows;
pub mod wtf8;
pub use rustpython_wtf8 as wtf8;
pub mod vendored {
pub use ascii;

View File

@@ -11,10 +11,10 @@ license.workspace = true
[dependencies]
# rustpython-ast = { workspace = true, features=["unparse", "constant-optimization"] }
rustpython-common = { workspace = true }
# rustpython-parser-core = { workspace = true }
rustpython-compiler-core = { workspace = true }
rustpython-compiler-source = {workspace = true }
rustpython-wtf8 = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_text_size = { workspace = true }

View File

@@ -28,7 +28,7 @@ use ruff_python_ast::{
};
use ruff_source_file::OneIndexed;
use ruff_text_size::{Ranged, TextRange};
use rustpython_common::wtf8::Wtf8Buf;
use rustpython_wtf8::Wtf8Buf;
// use rustpython_ast::located::{self as located_ast, Located};
use rustpython_compiler_core::{
Mode,
@@ -3529,7 +3529,7 @@ impl EmitArg<bytecode::Label> for ir::BlockIdx {
/// The code has been ported from `_PyCompile_CleanDoc` in cpython.
/// `inspect.cleandoc` is also a good reference, but has a few incompatibilities.
fn clean_doc(doc: &str) -> String {
let doc = rustpython_common::str::expandtabs(doc, 8);
let doc = expandtabs(doc, 8);
// First pass: find minimum indentation of any non-blank lines
// after first line.
let margin = doc
@@ -3558,6 +3558,37 @@ fn clean_doc(doc: &str) -> String {
}
}
// copied from rustpython_common::str, so we don't have to depend on it just for this function
fn expandtabs(input: &str, tab_size: usize) -> String {
let tab_stop = tab_size;
let mut expanded_str = String::with_capacity(input.len());
let mut tab_size = tab_stop;
let mut col_count = 0usize;
for ch in input.chars() {
match ch {
'\t' => {
let num_spaces = tab_size - col_count;
col_count += num_spaces;
let expand = " ".repeat(num_spaces);
expanded_str.push_str(&expand);
}
'\r' | '\n' => {
expanded_str.push(ch);
col_count = 0;
tab_size = 0;
}
_ => {
expanded_str.push(ch);
col_count += 1;
}
}
if col_count >= tab_size {
tab_size += tab_stop;
}
}
expanded_str
}
fn split_doc<'a>(body: &'a [Stmt], opts: &CompileOpts) -> (Option<String>, &'a [Stmt]) {
if let Some((Stmt::Expr(expr), body_rest)) = body.split_first() {
let doc_comment = match &*expr.value {

View File

@@ -8,7 +8,7 @@
use std::convert::Infallible;
use ruff_python_ast::{AnyStringFlags, StringFlags};
use rustpython_common::wtf8::{CodePoint, Wtf8, Wtf8Buf};
use rustpython_wtf8::{CodePoint, Wtf8, Wtf8Buf};
// use ruff_python_parser::{LexicalError, LexicalErrorType};
type LexicalError = Infallible;

View File

@@ -13,7 +13,7 @@ license.workspace = true
ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_source_file = { workspace = true }
rustpython-common = { workspace = true }
rustpython-wtf8 = { workspace = true }
bitflags = { workspace = true }
itertools = { workspace = true }

View File

@@ -8,7 +8,7 @@ use num_complex::Complex64;
pub use ruff_python_ast::ConversionFlag;
// use rustpython_parser_core::source_code::{OneIndexed, SourceLocation};
use ruff_source_file::{OneIndexed, SourceLocation};
use rustpython_common::wtf8::{Wtf8, Wtf8Buf};
use rustpython_wtf8::{Wtf8, Wtf8Buf};
use std::marker::PhantomData;
use std::{collections::BTreeSet, fmt, hash, mem};

View File

@@ -2,7 +2,7 @@ use crate::bytecode::*;
use malachite_bigint::{BigInt, Sign};
use num_complex::Complex64;
use ruff_source_file::{OneIndexed, SourceLocation};
use rustpython_common::wtf8::Wtf8;
use rustpython_wtf8::Wtf8;
use std::convert::Infallible;
pub const FORMAT_VERSION: u32 = 4;

View File

@@ -15,7 +15,7 @@ name = "benches"
harness = false
[dependencies]
rustpython-common = { workspace = true }
rustpython-wtf8 = { workspace = true }
num_enum = { workspace = true }
bitflags = { workspace = true }
optional = { workspace = true }

View File

@@ -1,4 +1,4 @@
use rustpython_common::wtf8::Wtf8;
use rustpython_wtf8::Wtf8;
#[derive(Debug, Clone, Copy)]
pub struct StringCursor {

15
wtf8/Cargo.toml Normal file
View File

@@ -0,0 +1,15 @@
[package]
name = "rustpython-wtf8"
description = "An implementation of WTF-8 for use in RustPython"
version.workspace = true
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
repository.workspace = true
license.workspace = true
[dependencies]
ascii = { workspace = true }
bstr = { workspace = true }
itertools = { workspace = true }
memchr = { workspace = true }