Compare commits

..

10 Commits

Author SHA1 Message Date
Jeong, YunWon
18657330c9 Drop old PyObjectRef outside type lock to prevent deadlock
Dropping values inside with_type_lock can trigger weakref callbacks,
which may access attributes (LOAD_ATTR specialization) and re-acquire
the non-reentrant type mutex, causing deadlock.

Return old values from lock closures so they drop after lock release.
2026-03-21 00:39:45 +09:00
Jeong, YunWon
ea2d66e799 type lock 2026-03-20 22:47:55 +09:00
Jeong, YunWon
38de7462c0 Fix Constants newtype usage in init_cleanup_code 2026-03-20 22:47:55 +09:00
Jeong, YunWon
cb2db07463 Extract datastack_frame_size_bytes_for_code, skip monitoring for init_cleanup frames, guard trace dispatch
- Extract datastack_frame_size_bytes_for_code as free function, use it
  to compute init_cleanup stack bytes instead of hardcoded constant
- Add monitoring_disabled_for_code to skip instrumentation for
  synthetic init_cleanup code object in RESUME and execute_instrumented
- Add is_trace_event guard so profile-only events skip trace_func dispatch
- Reformat core.rs (rustfmt)
2026-03-20 22:47:55 +09:00
Jeong, YunWon
76e6ece941 address review: check datastack space for extra_bytes, require CO_OPTIMIZED in vectorcall fast path 2026-03-20 22:47:55 +09:00
Jeong, YunWon
fb0dfa102c address review: invalidate init cache on type modification, add cspell words 2026-03-20 22:47:55 +09:00
Jeong, YunWon
9df4787aed Align call-init frame flow and spec cache atomic ordering 2026-03-20 22:47:55 +09:00
Jeong, YunWon
e19335e8f2 Tighten CALL_ALLOC_AND_ENTER_INIT stack-space guard 2026-03-20 22:47:55 +09:00
Jeong, YunWon
b3daabf169 Align type _spec_cache and latin1 singleton string paths 2026-03-20 22:47:55 +09:00
Jeong, YunWon
471fe551fa Align BINARY_OP_EXTEND with CPython descriptor cache model 2026-03-20 22:47:55 +09:00
125 changed files with 3181 additions and 9675 deletions

View File

@@ -109,7 +109,6 @@ lineiterator
linetable
loadfast
localsplus
localspluskinds
Lshift
lsprof
MAXBLOCKS

View File

@@ -61,6 +61,9 @@
"dedents",
"deduped",
"deoptimize",
"downcastable",
"downcasted",
"dumpable",
"emscripten",
"excs",
"interps",
@@ -70,7 +73,6 @@
"lossily",
"mcache",
"oparg",
"opargs",
"pyc",
"significand",
"summands",

View File

@@ -5,11 +5,6 @@ updates:
directory: /
schedule:
interval: weekly
cooldown:
default-days: 7
semver-major-days: 30
semver-minor-days: 7
semver-patch-days: 3
groups:
criterion:
patterns:
@@ -148,20 +143,7 @@ updates:
directory: /
schedule:
interval: weekly
cooldown:
default-days: 7
- package-ecosystem: npm
directory: /
schedule:
interval: weekly
cooldown:
default-days: 7
semver-major-days: 30
semver-minor-days: 7
semver-patch-days: 3
- package-ecosystem: pre-commit
directory: /
schedule:
interval: weekly
cooldown:
default-days: 7

View File

@@ -8,14 +8,11 @@ on:
name: CI
permissions:
contents: read
# Cancel previous workflows if they are the same workflow on same ref (branch/tags)
# with the same event (push/pull_request) even they are in progress.
# This setting will help reduce the number of duplicated workflows.
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }}
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
env:
@@ -30,8 +27,6 @@ env:
PYTHON_VERSION: "3.14.3"
X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD
X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include
CARGO_INCREMENTAL: 0
CARGO_TERM_COLOR: always
jobs:
rust_tests:
@@ -70,9 +65,6 @@ jobs:
- name: check compilation without threading
run: cargo check ${{ env.CARGO_ARGS }}
- run: cargo doc --locked
if: runner.os == 'Linux'
- name: check compilation without host_env (sandbox mode)
run: |
cargo check -p rustpython-vm --no-default-features --features compiler
@@ -112,39 +104,41 @@ jobs:
cargo_check:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }}
name: cargo check
name: Ensure compilation on various targets
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
target: aarch64-linux-android
- os: ubuntu-latest
target: i686-unknown-linux-gnu
targets:
- aarch64-linux-android
- i686-unknown-linux-gnu
- i686-unknown-linux-musl
- wasm32-wasip2
- x86_64-unknown-freebsd
dependencies:
gcc-multilib: true
- os: ubuntu-latest
target: i686-unknown-linux-musl
dependencies:
musl-tools: true
- os: ubuntu-latest
target: wasm32-wasip2
- os: ubuntu-latest
target: x86_64-unknown-freebsd
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
targets:
- aarch64-unknown-linux-gnu
dependencies:
gcc-aarch64-linux-gnu: true
gcc-aarch64-linux-gnu: true # conflict with `gcc-multilib`
- os: macos-latest
target: aarch64-apple-ios
- os: macos-latest
target: x86_64-apple-darwin
targets:
- aarch64-apple-ios
- x86_64-apple-darwin
fail-fast: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
prefix-key: v0-rust-${{ join(matrix.targets, '-') }}
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Install dependencies
uses: ./.github/actions/install-linux-deps
# zizmor has an issue with dynamic `with`
@@ -154,27 +148,12 @@ jobs:
musl-tools: ${{ matrix.dependencies.musl-tools || false }}
gcc-aarch64-linux-gnu: ${{ matrix.dependencies.gcc-aarch64-linux-gnu || false }}
- name: Restore cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref != 'refs/heads/main' }} # Never restore on main
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
# key won't match, will rely on restore-keys
key: cargo-check-${{ runner.os }}-${{ matrix.target }}
restore-keys: |
cargo-check-${{ runner.os }}-${{ matrix.target }}-
- uses: dtolnay/rust-toolchain@stable
with:
target: ${{ matrix.target }}
targets: ${{ join(matrix.targets, ',') }}
- name: Setup Android NDK
if: ${{ matrix.target == 'aarch64-linux-android' }}
if: ${{ contains(matrix.targets, 'aarch64-linux-android') }}
id: setup-ndk
uses: nttld/setup-ndk@v1
with:
@@ -190,24 +169,18 @@ jobs:
# args: --ignore-rust-version
- name: Check compilation
run: cargo check --target "${{ matrix.target }}" ${{ env.CARGO_ARGS_NO_SSL }}
run: |
for target in ${{ join(matrix.targets, ' ') }}
do
echo "::group::${target}"
cargo check --target $target ${{ env.CARGO_ARGS_NO_SSL }}
echo "::endgroup::"
done
env:
CC_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang
AR_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar
CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang
- name: Save cache
if: ${{ github.ref == 'refs/heads/main' }} # only save on main
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: cargo-check-${{ runner.os }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}-${{ hashFiles('Cargo.lock') }}-${{ github.sha }}
snippets_cpython:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }}
env:
@@ -215,7 +188,6 @@ jobs:
# Tests that can be flaky when running with multiple processes `-j 2`. We will use `-j 1` for these.
FLAKY_MP_TESTS: >-
test_class
test_concurrent_futures
test_eintr
test_multiprocessing_fork
test_multiprocessing_forkserver
@@ -258,7 +230,7 @@ jobs:
with:
save-if: ${{ github.ref == 'refs/heads/main' }}
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -279,7 +251,7 @@ jobs:
shell: bash
run: |
cores=$(python -c 'print(__import__("os").process_cpu_count())')
echo "cores=${cores}" >> "$GITHUB_OUTPUT"
echo "cores=${cores}" >> $GITHUB_OUTPUT
- name: Run CPython tests
run: |
@@ -298,32 +270,28 @@ jobs:
- name: run cpython tests to check if env polluters have stopped polluting
shell: bash
run: |
IFS=' ' read -r -a target_array <<< "$TARGETS"
for thing in "${target_array[@]}"; do
for thing in ${{ join(matrix.env_polluting_tests, ' ') }}; do
for i in $(seq 1 10); do
set +e
target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v "${thing}"
target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v ${thing}
exit_code=$?
set -e
if [ "${exit_code}" -eq 3 ]; then
if [ ${exit_code} -eq 3 ]; then
echo "Test ${thing} polluted the environment on attempt ${i}."
break
fi
done
if [ "${exit_code}" -ne 3 ]; then
if [ ${exit_code} -ne 3 ]; then
echo "Test ${thing} is no longer polluting the environment after ${i} attempts!"
echo "Please remove ${thing} from matrix.env_polluting_tests in '.github/workflows/ci.yaml'."
echo "Please also remove the skip decorators that include the word 'POLLUTERS' in ${thing}."
if [ "${exit_code}" -ne 0 ]; then
if [ ${exit_code} -ne 0 ]; then
echo "Test ${thing} failed with exit code ${exit_code}."
echo "Please investigate which test item in ${thing} is failing and either mark it as an expected failure or a skip."
fi
exit 1
fi
done
env:
TARGETS: ${{ join(matrix.env_polluting_tests, ' ') }}
timeout-minutes: 15
- if: runner.os != 'Windows'
@@ -349,67 +317,63 @@ jobs:
run: python -I scripts/whats_left.py ${{ env.CARGO_ARGS }} --features jit
lint:
name: Lint
name: Lint Rust & Python code
runs-on: ubuntu-latest
permissions:
contents: read
checks: write
pull-requests: write
security-events: write # for zizmor
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Check for redundant test patches
run: python scripts/check_redundant_patches.py
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt
components: clippy
- uses: cargo-bins/cargo-binstall@113a77a4ce971c41332f2129c3d995df993cf746 # v1.17.8
- name: run clippy on wasm
run: cargo clippy --manifest-path=crates/wasm/Cargo.toml -- -Dwarnings
- name: cargo shear
- name: Ensure docs generate no warnings
run: cargo doc --locked
- name: Ensure Lib/_opcode_metadata is updated
run: |
cargo binstall --no-confirm cargo-shear
cargo shear
python scripts/generate_opcode_metadata.py
if [ -n "$(git status --porcelain)" ]; then
exit 1
fi
- name: actionlint
uses: reviewdog/action-actionlint@0d952c597ef8459f634d7145b0b044a9699e5e43 # v1.71.0
- name: zizmor
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
- name: restore prek cache
if: ${{ github.ref != 'refs/heads/main' }} # never restore on main
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
- name: Install ruff
uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1
with:
key: prek-${{ hashFiles('.pre-commit-config.yaml') }}
path: ~/.cache/prek
version: "0.15.5"
args: "--version"
- name: prek
id: prek
uses: j178/prek-action@53276d8b0d10f8b6672aa85b4588c6921d0370cc # v2.0.1
with:
cache: false
show-verbose-logs: false
continue-on-error: true
- run: ruff check --diff
- name: save prek cache
if: ${{ github.ref == 'refs/heads/main' }} # only save on main
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
key: prek-${{ hashFiles('.pre-commit-config.yaml') }}
path: ~/.cache/prek
- run: ruff format --check
- name: reviewdog
uses: reviewdog/action-suggester@aa38384ceb608d00f84b4690cacc83a5aba307ff # 1.24.0
- name: install prettier
run: |
yarn global add prettier
yarn global bin >> "$GITHUB_PATH"
- name: check wasm code with prettier
# prettier doesn't handle ignore files very well: https://github.com/prettier/prettier/issues/8506
run: cd wasm && git ls-files -z | xargs -0 prettier --check -u
# Keep cspell check as the last step. This is optional test.
- name: install extra dictionaries
run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell
- name: spell checker
uses: streetsidesoftware/cspell-action@v8
with:
level: warning
fail_level: error
cleanup: false
files: "**/*.rs"
incremental_files_only: true
miri:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }}
@@ -450,16 +414,11 @@ jobs:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable
with:
components: clippy
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: cargo clippy
run: cargo clippy --manifest-path=crates/wasm/Cargo.toml -- -Dwarnings
- name: install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: install geckodriver
@@ -467,14 +426,12 @@ jobs:
wget https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz
mkdir geckodriver
tar -xzf geckodriver-v0.36.0-linux64.tar.gz -C geckodriver
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- run: python -m pip install -r requirements.txt
working-directory: ./wasm/tests
- uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
- uses: actions/setup-node@v6
with:
cache: "npm"
cache-dependency-path: "wasm/demo/package-lock.json"
@@ -545,6 +502,32 @@ jobs:
- name: build rustpython
run: cargo build --release --target wasm32-wasip1 --features freeze-stdlib,stdlib --verbose
- name: run snippets
run: wasmer run --dir "$(pwd)" target/wasm32-wasip1/release/rustpython.wasm -- "$(pwd)/extra_tests/snippets/stdlib_random.py"
run: wasmer run --dir $(pwd) target/wasm32-wasip1/release/rustpython.wasm -- "$(pwd)/extra_tests/snippets/stdlib_random.py"
- name: run cpython unittest
run: wasmer run --dir "$(pwd)" target/wasm32-wasip1/release/rustpython.wasm -- "$(pwd)/Lib/test/test_int.py"
run: wasmer run --dir $(pwd) target/wasm32-wasip1/release/rustpython.wasm -- "$(pwd)/Lib/test/test_int.py"
cargo-shear:
name: cargo shear
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: cargo-bins/cargo-binstall@1800853f2578f8c34492ec76154caef8e163fbca # v1.17.7
- run: cargo binstall --no-confirm cargo-shear
- run: cargo shear
security-lint:
runs-on: ubuntu-latest
permissions:
security-events: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2

View File

@@ -18,6 +18,4 @@ jobs:
steps:
# Using REST API and not `gh issue edit`. https://github.com/cli/cli/issues/6235#issuecomment-1243487651
- run: |
curl -H "Authorization: token ${{ github.token }}" -d '{"assignees": ["${{ env.USER }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
env:
USER: ${{ github.event.comment.user.login }}
curl -H "Authorization: token ${{ github.token }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees

View File

@@ -7,7 +7,7 @@ on:
- "Lib/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number }}
group: lib-deps-${{ github.event.pull_request.number }}
cancel-in-progress: true
env:
@@ -74,7 +74,7 @@ jobs:
- name: Setup Python
if: steps.changed-files.outputs.modules != ''
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@v6.2.0
with:
python-version: "${{ env.PYTHON_VERSION }}"
@@ -83,15 +83,22 @@ jobs:
id: deps-check
run: |
# Run deps for all modules at once
echo "deps_output<<EOF" >> "$GITHUB_OUTPUT"
output=$(python scripts/update_lib deps "${MODULES}" --depth 2 2>&1 || true)
echo "$output" >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
env:
MODULES: ${{ steps.changed-files.outputs.modules }}
python scripts/update_lib deps ${{ steps.changed-files.outputs.modules }} --depth 2 > /tmp/deps_output.txt 2>&1 || true
# Read output for GitHub Actions
echo "deps_output<<EOF" >> $GITHUB_OUTPUT
cat /tmp/deps_output.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
# Check if there's any meaningful output
if [ -s /tmp/deps_output.txt ]; then
echo "has_output=true" >> $GITHUB_OUTPUT
else
echo "has_output=false" >> $GITHUB_OUTPUT
fi
- name: Post comment
if: steps.deps-check.outputs.deps_output != ''
if: steps.deps-check.outputs.has_output == 'true'
uses: marocchino/sticky-pull-request-comment@v3
with:
header: lib-deps-check

74
.github/workflows/pr-format.yaml vendored Normal file
View File

@@ -0,0 +1,74 @@
name: Format Check
# This workflow triggers when a PR is opened/updated
# Posts inline suggestion comments instead of auto-committing
on:
pull_request:
types: [opened, synchronize, reopened]
branches:
- main
- release
concurrency:
group: format-check-${{ github.event.pull_request.number }}
cancel-in-progress: true
env:
PYTHON_VERSION: "3.14.3"
jobs:
format_check:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: reviewdog/action-actionlint@0d952c597ef8459f634d7145b0b044a9699e5e43 # v1.71.0
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --all
- name: Install ruff
uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1
with:
version: "0.15.4"
args: "--version"
- name: Run ruff format
run: ruff format
- name: Run ruff check import sorting
run: ruff check --select I --fix
- uses: actions/setup-python@v6.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Run generate_opcode_metadata.py
run: python scripts/generate_opcode_metadata.py
- name: Check for formatting changes
run: |
if ! git diff --exit-code; then
echo "::error::Formatting changes detected. Please run 'cargo fmt --all', 'ruff format', and 'ruff check --select I --fix' locally."
exit 1
fi
- name: Post formatting suggestions
if: failure()
uses: reviewdog/action-suggester@v1
with:
tool_name: auto-format
github_token: ${{ secrets.GITHUB_TOKEN }}
level: warning
filter_mode: diff_context

View File

@@ -12,40 +12,44 @@ on:
required: false
default: true
permissions:
contents: write
env:
CARGO_ARGS: --no-default-features --features stdlib,importlib,encodings,sqlite,ssl
X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD
X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include
jobs:
build:
runs-on: ${{ matrix.os }}
runs-on: ${{ matrix.platform.runner }}
# Disable this scheduled job when running on a fork.
if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }}
strategy:
matrix:
include:
- os: ubuntu-latest
platform:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
- os: macos-latest
# - runner: ubuntu-latest
# target: i686-unknown-linux-gnu
# - runner: ubuntu-latest
# target: aarch64-unknown-linux-gnu
# - runner: ubuntu-latest
# target: armv7-unknown-linux-gnueabi
# - runner: ubuntu-latest
# target: s390x-unknown-linux-gnu
# - runner: ubuntu-latest
# target: powerpc64le-unknown-linux-gnu
- runner: macos-latest
target: aarch64-apple-darwin
- os: windows-2025
# - runner: macos-latest
# target: x86_64-apple-darwin
- runner: windows-2025
target: x86_64-pc-windows-msvc
# - os: ubuntu-latest
# target: i686-unknown-linux-gnu
# - os: ubuntu-latest
# target: aarch64-unknown-linux-gnu
# - os: ubuntu-latest
# target: armv7-unknown-linux-gnueabi
# - os: ubuntu-latest
# target: s390x-unknown-linux-gnu
# - os: ubuntu-latest
# target: powerpc64le-unknown-linux-gnu
# - os: macos-latest
# target: x86_64-apple-darwin
# - os: windows-2025
# target: i686-pc-windows-msvc
# - os: windows-2025
# target: aarch64-pc-windows-msvc
# - runner: windows-2025
# target: i686-pc-windows-msvc
# - runner: windows-2025
# target: aarch64-pc-windows-msvc
fail-fast: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -53,32 +57,34 @@ jobs:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable
with:
target: ${{ matrix.target }}
- uses: cargo-bins/cargo-binstall@main
- name: Install macOS dependencies
uses: ./.github/actions/install-macos-deps
with:
autoconf: true
automake: true
libtool: true
- name: Set up Environment
shell: bash
run: rustup target add ${{ matrix.platform.target }}
- name: Set up MacOS Environment
run: brew install autoconf automake libtool
if: runner.os == 'macOS'
- name: Build RustPython
run: cargo build --release --target=${{ matrix.target }} --verbose --no-default-features --features stdlib,stdio,importlib,encodings,sqlite,host_env,ssl-rustls,threading,jit
run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }}
if: runner.os == 'macOS'
- name: Build RustPython
run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }},jit
if: runner.os != 'macOS'
- name: Rename Binary
run: cp target/${{ matrix.target }}/release/rustpython target/rustpython-release-${{ runner.os }}-${{ matrix.target }}
run: cp target/${{ matrix.platform.target }}/release/rustpython target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}
if: runner.os != 'Windows'
- name: Rename Binary
run: cp target/${{ matrix.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.target }}.exe
run: cp target/${{ matrix.platform.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}.exe
if: runner.os == 'Windows'
- name: Upload Binary Artifacts
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
uses: actions/upload-artifact@v7.0.0
with:
name: rustpython-release-${{ runner.os }}-${{ matrix.target }}
path: target/rustpython-release-${{ runner.os }}-${{ matrix.target }}*
name: rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}
path: target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}*
build-wasm:
runs-on: ubuntu-latest
@@ -100,22 +106,16 @@ jobs:
run: cp target/wasm32-wasip1/release/rustpython.wasm target/rustpython-release-wasm32-wasip1.wasm
- name: Upload Binary Artifacts
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
uses: actions/upload-artifact@v7.0.0
with:
name: rustpython-release-wasm32-wasip1
path: target/rustpython-release-wasm32-wasip1.wasm
- name: install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
package-manager-cache: false
- uses: mwilliamson/setup-wabt-action@febe2a12b7ccb999a6e5d953a8362a3b7ffcf148 # v3.2.0
with:
wabt-version: "1.0.30"
- uses: actions/setup-node@v6
- uses: mwilliamson/setup-wabt-action@v3
with: { wabt-version: "1.0.30" }
- name: build demo
run: |
npm install
@@ -123,7 +123,6 @@ jobs:
env:
NODE_OPTIONS: "--openssl-legacy-provider"
working-directory: ./wasm/demo
- name: build notebook demo
run: |
npm install
@@ -132,10 +131,8 @@ jobs:
env:
NODE_OPTIONS: "--openssl-legacy-provider"
working-directory: ./wasm/notebook
- name: Deploy demo to Github Pages
if: ${{ github.repository == 'RustPython/RustPython' }}
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
uses: peaceiris/actions-gh-pages@v4
with:
deploy_key: ${{ secrets.ACTIONS_DEMO_DEPLOY_KEY }}
publish_dir: ./wasm/demo/dist
@@ -153,21 +150,26 @@ jobs:
persist-credentials: false
- name: Download Binary Artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
uses: actions/download-artifact@v8.0.1
with:
path: bin
pattern: rustpython-*
merge-multiple: true
- name: Create Lib Archive
run: zip -r bin/rustpython-lib.zip Lib/
run: |
zip -r bin/rustpython-lib.zip Lib/
- name: List Binaries
run: |
ls -lah bin/
file bin/*
- name: Create Release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}
run: ${{ github.run_number }}
PRE_RELEASE_INPUT: ${{ github.event.inputs.pre-release }}
run: |
if [[ "${PRE_RELEASE_INPUT}" == "false" ]]; then
RELEASE_TYPE_NAME=Release
@@ -186,8 +188,3 @@ jobs:
--generate-notes \
$PRERELEASE_ARG \
bin/rustpython-release-*
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}
run: ${{ github.run_number }}
PRE_RELEASE_INPUT: ${{ github.event.inputs.pre-release }}

View File

@@ -1,6 +1,8 @@
name: Update doc DB
permissions: {}
permissions:
contents: write
pull-requests: write
on:
workflow_dispatch:
@@ -20,8 +22,6 @@ defaults:
jobs:
generate:
permissions:
contents: read
runs-on: ${{ matrix.os }}
strategy:
matrix:
@@ -54,19 +54,17 @@ jobs:
merge:
runs-on: ubuntu-latest
needs: generate
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: true
ref: ${{ inputs.base-ref }}
token: ${{ secrets.AUTO_COMMIT_PAT }}
- name: Create update branch
run: git switch -c "update-doc-${PYTHON_VERSION}"
env:
PYTHON_VERSION: ${{ inputs.python-version }}
run: git switch -c "update-doc-${PYTHON_VERSION}"
- name: Download generated doc DBs
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
@@ -109,7 +107,7 @@ jobs:
- name: Commit, push and create PR
env:
GH_TOKEN: ${{ github.token }}
GH_TOKEN: ${{ secrets.AUTO_COMMIT_PAT }}
PYTHON_VERSION: ${{ inputs.python-version }}
BASE_REF: ${{ inputs.base-ref }}
run: |

View File

@@ -58,7 +58,7 @@ jobs:
comment_repo: ""
steps:
- name: Setup Scripts
uses: github/gh-aw/actions/setup@48d8fdfddc8cad854ac0c70ceb573f09fb8f9c9b # v0.62.5
uses: github/gh-aw/actions/setup@08a903b1fb2e493a84a57577778fe5dd711f9468 # v0.58.3
with:
destination: /opt/gh-aw/actions
- name: Check workflow file timestamps
@@ -99,7 +99,7 @@ jobs:
secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
steps:
- name: Setup Scripts
uses: github/gh-aw/actions/setup@48d8fdfddc8cad854ac0c70ceb573f09fb8f9c9b # v0.62.5
uses: github/gh-aw/actions/setup@08a903b1fb2e493a84a57577778fe5dd711f9468 # v0.58.3
with:
destination: /opt/gh-aw/actions
- name: Checkout repository
@@ -114,7 +114,7 @@ jobs:
run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh
# Cache configuration from frontmatter processed below
- name: Cache (cpython-lib-${{ env.PYTHON_VERSION }})
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
with:
key: cpython-lib-${{ env.PYTHON_VERSION }}
path: cpython
@@ -804,7 +804,7 @@ jobs:
total_count: ${{ steps.missing_tool.outputs.total_count }}
steps:
- name: Setup Scripts
uses: github/gh-aw/actions/setup@48d8fdfddc8cad854ac0c70ceb573f09fb8f9c9b # v0.62.5
uses: github/gh-aw/actions/setup@08a903b1fb2e493a84a57577778fe5dd711f9468 # v0.58.3
with:
destination: /opt/gh-aw/actions
- name: Download agent output artifact
@@ -925,7 +925,7 @@ jobs:
success: ${{ steps.parse_results.outputs.success }}
steps:
- name: Setup Scripts
uses: github/gh-aw/actions/setup@48d8fdfddc8cad854ac0c70ceb573f09fb8f9c9b # v0.62.5
uses: github/gh-aw/actions/setup@08a903b1fb2e493a84a57577778fe5dd711f9468 # v0.58.3
with:
destination: /opt/gh-aw/actions
- name: Download agent artifacts
@@ -1037,7 +1037,7 @@ jobs:
process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }}
steps:
- name: Setup Scripts
uses: github/gh-aw/actions/setup@48d8fdfddc8cad854ac0c70ceb573f09fb8f9c9b # v0.62.5
uses: github/gh-aw/actions/setup@08a903b1fb2e493a84a57577778fe5dd711f9468 # v0.58.3
with:
destination: /opt/gh-aw/actions
- name: Download agent output artifact

14
.github/zizmor.yml vendored
View File

@@ -1,14 +0,0 @@
rules:
unpinned-uses:
config:
policies:
# dtolnay/rust-toolchain is a trusted action that uses lightweight branch
# refs (@stable, @nightly, etc.) by design. Pinning to a hash would break
# the intended usage pattern.
# We can remove this once https://github.com/dtolnay/rust-toolchain/issues/180 is resolved
dtolnay/rust-toolchain: any
# dtolnay/rust-toolchain handles component installation, target addition, and
# override configuration beyond what a bare `rustup` invocation provides.
# See: https://github.com/zizmorcore/zizmor/issues/1817
superfluous-actions:
disable: true

View File

@@ -1,71 +0,0 @@
# NOTE: Reason for not using `prek.toml` is dependabot supports `pre-commit` as an ecosystem
# See: https://github.blog/changelog/2026-03-10-dependabot-now-supports-pre-commit-hooks/
fail_fast: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-merge-conflict
priority: 0
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.7
hooks:
- id: ruff-format
priority: 0
- id: ruff-check
args: [--select, I, --fix, --exit-non-zero-on-fix]
types_or: [python]
require_serial: true
priority: 1
- repo: local
hooks:
- id: redundant-test-patches
name: check redundant test patches
entry: scripts/check_redundant_patches.py
files: '^Lib/test/.*\.py$'
language: script
types: [python]
priority: 0
- repo: local
hooks:
- id: rustfmt
name: rustfmt
entry: rustfmt
language: system
types: [rust]
priority: 0
- id: generate-opcode-metadata
name: generate opcode metadata
entry: python scripts/generate_opcode_metadata.py
files: '^(crates/compiler-core/src/bytecode/instruction\.rs|scripts/generate_opcode_metadata\.py)$'
pass_filenames: false
language: system
require_serial: true
priority: 1 # so rustfmt runs first
- repo: https://github.com/streetsidesoftware/cspell-cli
rev: v9.7.0
hooks:
- id: cspell
types: [rust]
additional_dependencies:
- '@cspell/dict-en_us'
- '@cspell/dict-cpp'
- '@cspell/dict-python'
- '@cspell/dict-rust'
- '@cspell/dict-win32'
- '@cspell/dict-shell'
priority: 0
- repo: https://github.com/rbubley/mirrors-prettier
rev: v3.8.1
hooks:
- id: prettier
files: '^wasm/.*$'
priority: 0

676
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -153,23 +153,15 @@ rustpython-vm = { path = "crates/vm", default-features = false, version = "0.5.0
rustpython-pylib = { path = "crates/pylib", version = "0.5.0" }
rustpython-stdlib = { path = "crates/stdlib", default-features = false, version = "0.5.0" }
rustpython-sre_engine = { path = "crates/sre_engine", version = "0.5.0" }
rustpython-unicode = { path = "crates/unicode", default-features = false, version = "0.5.0" }
rustpython-wtf8 = { path = "crates/wtf8", version = "0.5.0" }
rustpython-doc = { path = "crates/doc", version = "0.5.0" }
# Use RustPython-packaged Ruff crates from the published fork while keeping
# existing crate names in the codebase.
ruff_python_parser = { package = "rustpython-ruff_python_parser", version = "0.15.8" }
ruff_python_ast = { package = "rustpython-ruff_python_ast", version = "0.15.8" }
ruff_text_size = { package = "rustpython-ruff_text_size", version = "0.15.8" }
ruff_source_file = { package = "rustpython-ruff_source_file", version = "0.15.8" }
# To update ruff crates, comment out the above lines and uncomment the following lines to pull directly from the Ruff repository at the specified commit hash.
# Ruff tag 0.15.8 is based on commit c2a8815842f9dc5d24ec19385eae0f1a7188b0d9
# Ruff tag 0.15.6 is based on commit e4c7f357777a2fdd34dbe6a98b1b7d3e7488f675
# at the time of this capture. We use the commit hash to ensure reproducible builds.
# ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" }
# ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" }
# ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" }
# ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" }
ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "e4c7f357777a2fdd34dbe6a98b1b7d3e7488f675" }
ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "e4c7f357777a2fdd34dbe6a98b1b7d3e7488f675" }
ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "e4c7f357777a2fdd34dbe6a98b1b7d3e7488f675" }
ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "e4c7f357777a2fdd34dbe6a98b1b7d3e7488f675" }
phf = { version = "0.13.1", default-features = false, features = ["macros"]}
ahash = "0.8.12"
@@ -215,19 +207,23 @@ rand_core = { version = "0.9", features = ["os_rng"] }
rustix = { version = "1.1", features = ["event"] }
rustyline = "17.0.1"
serde = { package = "serde_core", version = "1.0.225", default-features = false, features = ["alloc"] }
schannel = "0.1.29"
schannel = "0.1.28"
scoped-tls = "1"
scopeguard = "1"
static_assertions = "1.1"
strum = "0.28"
strum = "0.27"
strum_macros = "0.28"
syn = "2"
thiserror = "2.0"
icu_properties = "2"
icu_normalizer = "2"
unicode-casing = "0.1.1"
unic-char-property = "0.9.0"
unic-normal = "0.9.0"
unic-ucd-age = "0.9.0"
unic-ucd-bidi = "0.9.0"
unic-ucd-category = "0.9.0"
unic-ucd-ident = "0.9.0"
unicode_names2 = "2.0.0"
unicode-bidi-mirroring = "0.4"
widestring = "1.2.0"
windows-sys = "0.61.2"
wasm-bindgen = "0.2.106"

View File

@@ -163,6 +163,7 @@ class OSEINTRTest(EINTRBaseTest):
self.assertEqual(os.readinto(fd, buffer), len(expected))
self.assertEqual(buffer, expected)
@unittest.expectedFailure # TODO: RUSTPYTHON; InterruptedError: [Errno 4] Interrupted system call
def test_write(self):
rd, wr = os.pipe()
self.addCleanup(os.close, wr)

View File

@@ -4813,9 +4813,9 @@ class _TestFinalize(BaseTestCase):
result = [obj for obj in iter(conn.recv, 'STOP')]
self.assertEqual(result, ['a', 'b', 'd10', 'd03', 'd02', 'd01', 'e'])
# TODO: RUSTPYTHON; SIGSEGV due to dict thread-safety issue under aggressive GC
@unittest.skip("TODO: RUSTPYTHON")
@support.requires_resource('cpu')
# TODO: RUSTPYTHON; dict iteration races with concurrent GC mutations
@unittest.expectedFailure
def test_thread_safety(self):
# bpo-24484: _run_finalizers() should be thread-safe
def cb():

View File

@@ -1,5 +1,6 @@
"Test the functionality of Python classes implementing operators."
import sys
import unittest
from test import support
from test.support import cpython_only, import_helper, script_helper
@@ -614,6 +615,7 @@ class ClassTests(unittest.TestCase):
with self.assertRaises(TypeError):
a >= b
@unittest.skipIf(sys.platform == "win32", "TODO: RUSTPYTHON; flaky on Windows")
def testHashComparisonOfMethods(self):
# Test comparison and hash of methods
class A:

View File

@@ -475,6 +475,8 @@ class CmdLineTest(unittest.TestCase):
self.assertRegex(err.decode('ascii', 'ignore'), 'SyntaxError')
self.assertEqual(b'', out)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_stdout_flush_at_shutdown(self):
# Issue #5319: if stdout.flush() fails at shutdown, an error should
# be printed out.

View File

@@ -2486,6 +2486,7 @@ class TestSourcePositions(unittest.TestCase):
class TestStaticAttributes(unittest.TestCase):
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: type object 'C' has no attribute '__static_attributes__'
def test_basic(self):
class C:
def f(self):
@@ -2517,6 +2518,7 @@ class TestStaticAttributes(unittest.TestCase):
self.assertEqual(sorted(C.__static_attributes__), ['u', 'v', 'x', 'y', 'z'])
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: type object 'C' has no attribute '__static_attributes__'
def test_nested_class(self):
class C:
def f(self):
@@ -2531,6 +2533,7 @@ class TestStaticAttributes(unittest.TestCase):
self.assertEqual(sorted(C.__static_attributes__), ['x', 'y'])
self.assertEqual(sorted(C.D.__static_attributes__), ['y', 'z'])
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: type object 'C' has no attribute '__static_attributes__'
def test_subclass(self):
class C:
def f(self):
@@ -2590,6 +2593,7 @@ class TestExpressionStackSize(unittest.TestCase):
def test_set(self):
self.check_stack_size("{" + "x, " * self.N + "x}")
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 202 not less than or equal to 7
def test_dict(self):
self.check_stack_size("{" + "x:x, " * self.N + "x:x}")

View File

@@ -99,6 +99,8 @@ class ContextManagerTestCase(unittest.TestCase):
raise ZeroDivisionError()
self.assertEqual(state, [1, 42, 999])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_contextmanager_traceback(self):
@contextmanager
def f():

View File

@@ -252,6 +252,7 @@ class AsyncContextManagerTestCase(unittest.IsolatedAsyncioTestCase):
raise ZeroDivisionError(999)
self.assertEqual(state, [1, 42, 999])
@unittest.expectedFailure # TODO: RUSTPYTHON
async def test_contextmanager_except_stopiter(self):
@asynccontextmanager
async def woohoo():

View File

@@ -4987,6 +4987,7 @@ class ClassPropertiesAndMethods(unittest.TestCase):
self.assertEqual(Y.__qualname__, 'Y')
self.assertEqual(Y.Inside.__qualname__, 'Y.Inside')
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_qualname_dict(self):
ns = {'__qualname__': 'some.name'}
tp = type('Foo', (), ns)
@@ -5129,6 +5130,7 @@ class ClassPropertiesAndMethods(unittest.TestCase):
gc.collect()
self.assertEqual(Parent.__subclasses__(), [])
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_instance_method_get_behavior(self):
# test case for gh-113157
@@ -5178,6 +5180,7 @@ class DictProxyTests(unittest.TestCase):
pass
self.C = C
@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(),
'trace function introduces __local__')
def test_iter_keys(self):
@@ -5191,6 +5194,7 @@ class DictProxyTests(unittest.TestCase):
'__static_attributes__', '__weakref__',
'meth'])
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 5 != 7
@unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(),
'trace function introduces __local__')
def test_iter_values(self):
@@ -5200,6 +5204,7 @@ class DictProxyTests(unittest.TestCase):
values = list(it)
self.assertEqual(len(values), 7)
@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(),
'trace function introduces __local__')
def test_iter_items(self):

View File

@@ -1134,6 +1134,7 @@ class DisTests(DisTestBase):
# Test that value is displayed for keyword argument names:
self.do_disassembly_test(wrap_func_w_kwargs, dis_kw_names)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_intrinsic_1(self):
# Test that argrepr is displayed for CALL_INTRINSIC_1
self.do_disassembly_test("from math import *", dis_intrinsic_1_2)

View File

@@ -2519,6 +2519,7 @@ class SyntaxErrorTests(unittest.TestCase):
self.assertEqual(error, the_exception.text)
self.assertEqual("bad bad", the_exception.msg)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_incorrect_constructor(self):
args = ("bad.py", 1, 2)
self.assertRaises(TypeError, SyntaxError, "bad bad", args)

13
Lib/test/test_file.py vendored
View File

@@ -126,7 +126,7 @@ class AutoFileTests:
# it must also return None if an exception was given
try:
1/0
except ZeroDivisionError:
except:
self.assertEqual(self.f.__exit__(*sys.exc_info()), None)
def testReadWhenWriting(self):
@@ -216,16 +216,6 @@ class OtherFileTests:
with self.assertWarnsRegex(RuntimeWarning, 'line buffering'):
self._checkBufferSize(1)
def testDefaultBufferSize(self):
with self.open(TESTFN, 'wb') as f:
blksize = f.raw._blksize
f.write(b"\0" * 5_000_000)
with self.open(TESTFN, 'rb') as f:
data = f.read1()
expected_size = max(min(blksize, 8192 * 1024), io.DEFAULT_BUFFER_SIZE)
self.assertEqual(len(data), expected_size)
def testTruncateOnWindows(self):
# SF bug <https://bugs.python.org/issue801631>
# "file.truncate fault on windows"
@@ -354,6 +344,7 @@ class OtherFileTests:
class COtherFileTests(OtherFileTests, unittest.TestCase):
open = io.open
class PyOtherFileTests(OtherFileTests, unittest.TestCase):
open = staticmethod(pyio.open)

View File

@@ -385,6 +385,7 @@ class PluralFormsTests:
x = ngettext(singular, plural, None)
self.assertEqual(x, tplural)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_forms(self):
self._test_plural_forms(
self.ngettext, self.gettext,
@@ -395,6 +396,7 @@ class PluralFormsTests:
'%d file deleted', '%d files deleted',
'%d file deleted', '%d files deleted')
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_context_forms(self):
ngettext = partial(self.npgettext, 'With context')
gettext = partial(self.pgettext, 'With context')
@@ -407,6 +409,7 @@ class PluralFormsTests:
'%d file deleted', '%d files deleted',
'%d file deleted', '%d files deleted')
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_wrong_context_forms(self):
self._test_plural_forms(
partial(self.npgettext, 'Unknown context'),
@@ -439,6 +442,7 @@ class GNUTranslationsWithDomainPluralFormsTestCase(PluralFormsTests, GettextBase
self.pgettext = partial(gettext.dpgettext, 'gettext')
self.npgettext = partial(gettext.dnpgettext, 'gettext')
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_forms_wrong_domain(self):
self._test_plural_forms(
partial(gettext.dngettext, 'unknown'),
@@ -447,6 +451,7 @@ class GNUTranslationsWithDomainPluralFormsTestCase(PluralFormsTests, GettextBase
'There is %s file', 'There are %s files',
numbers_only=False)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_context_forms_wrong_domain(self):
self._test_plural_forms(
partial(gettext.dnpgettext, 'unknown', 'With context'),
@@ -467,6 +472,7 @@ class GNUTranslationsClassPluralFormsTestCase(PluralFormsTests, GettextBaseTest)
self.pgettext = t.pgettext
self.npgettext = t.npgettext
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_forms_null_translations(self):
t = gettext.NullTranslations()
self._test_plural_forms(
@@ -475,6 +481,7 @@ class GNUTranslationsClassPluralFormsTestCase(PluralFormsTests, GettextBaseTest)
'There is %s file', 'There are %s files',
numbers_only=False)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_plural_context_forms_null_translations(self):
t = gettext.NullTranslations()
self._test_plural_forms(

View File

@@ -961,6 +961,7 @@ class TestGettingSourceOfToplevelFrames(GetSourceBase):
class TestDecorators(GetSourceBase):
fodderModule = mod2
@unittest.expectedFailure # TODO: RUSTPYTHON; pass
def test_wrapped_decorator(self):
self.assertSourceEqual(mod2.wrapped, 14, 17)
@@ -1258,6 +1259,7 @@ class TestNoEOL(GetSourceBase):
class TestComplexDecorator(GetSourceBase):
fodderModule = mod2
@unittest.expectedFailure # TODO: RUSTPYTHON; return foo + bar()
def test_parens_in_decorator(self):
self.assertSourceEqual(self.fodderModule.complex_decorated, 273, 275)

View File

@@ -2,12 +2,13 @@
"""
import os
import stat
import sys
import unittest
import socket
import shutil
import threading
from test.support import requires, bigmemtest, requires_resource
from test.support import requires, bigmemtest
from test.support import SHORT_TIMEOUT
from test.support import socket_helper
from test.support.os_helper import TESTFN, unlink
@@ -28,7 +29,7 @@ class LargeFileTest:
mode = 'w+b'
with self.open(TESTFN, mode) as f:
current_size = os.fstat(f.fileno()).st_size
current_size = os.fstat(f.fileno())[stat.ST_SIZE]
if current_size == size+1:
return
@@ -39,13 +40,13 @@ class LargeFileTest:
f.seek(size)
f.write(b'a')
f.flush()
self.assertEqual(os.fstat(f.fileno()).st_size, size+1)
self.assertEqual(os.fstat(f.fileno())[stat.ST_SIZE], size+1)
@classmethod
def tearDownClass(cls):
with cls.open(TESTFN, 'wb'):
pass
if not os.stat(TESTFN).st_size == 0:
if not os.stat(TESTFN)[stat.ST_SIZE] == 0:
raise cls.failureException('File was not truncated by opening '
'with mode "wb"')
unlink(TESTFN2)
@@ -66,7 +67,7 @@ class TestFileMethods(LargeFileTest):
self.assertEqual(f.tell(), size + 1)
def test_osstat(self):
self.assertEqual(os.stat(TESTFN).st_size, size+1)
self.assertEqual(os.stat(TESTFN)[stat.ST_SIZE], size+1)
def test_seek_read(self):
with self.open(TESTFN, 'rb') as f:
@@ -141,9 +142,6 @@ class TestFileMethods(LargeFileTest):
f.truncate(1)
self.assertEqual(f.tell(), 0) # else pointer moved
f.seek(0)
# Verify readall on a truncated file is well behaved. read()
# without a size can be unbounded, this should get just the byte
# that remains.
self.assertEqual(len(f.read()), 1) # else wasn't truncated
def test_seekable(self):
@@ -154,22 +152,6 @@ class TestFileMethods(LargeFileTest):
f.seek(pos)
self.assertTrue(f.seekable())
@bigmemtest(size=size, memuse=2, dry_run=False)
def test_seek_readall(self, _size):
# Seek which doesn't change position should readall successfully.
with self.open(TESTFN, 'rb') as f:
self.assertEqual(f.seek(0, os.SEEK_CUR), 0)
self.assertEqual(len(f.read()), size + 1)
# Seek which changes (or might change) position should readall
# successfully.
with self.open(TESTFN, 'rb') as f:
self.assertEqual(f.seek(20, os.SEEK_SET), 20)
self.assertEqual(len(f.read()), size - 19)
with self.open(TESTFN, 'rb') as f:
self.assertEqual(f.seek(-3, os.SEEK_END), size - 2)
self.assertEqual(len(f.read()), 3)
def skip_no_disk_space(path, required):
def decorator(fun):
@@ -191,7 +173,6 @@ class TestCopyfile(LargeFileTest, unittest.TestCase):
# Exact required disk space would be (size * 2), but let's give it a
# bit more tolerance.
@skip_no_disk_space(TESTFN, size * 2.5)
@requires_resource('cpu')
def test_it(self):
# Internally shutil.copyfile() can use "fast copy" methods like
# os.sendfile().
@@ -241,7 +222,6 @@ class TestSocketSendfile(LargeFileTest, unittest.TestCase):
# Exact required disk space would be (size * 2), but let's give it a
# bit more tolerance.
@skip_no_disk_space(TESTFN, size * 2.5)
@requires_resource('cpu')
def test_it(self):
port = socket_helper.find_unused_port()
with socket.create_server(("", port)) as sock:

View File

@@ -49,6 +49,7 @@ class IntTestCase(unittest.TestCase, HelperMixin):
self.helper(expected)
n = n >> 1
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_int64(self):
# Simulate int marshaling with TYPE_INT64.
maxint64 = (1 << 63) - 1
@@ -140,6 +141,7 @@ class CodeTestCase(unittest.TestCase):
self.assertEqual(co1.co_filename, "f1")
self.assertEqual(co2.co_filename, "f2")
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument allow_code
def test_no_allow_code(self):
data = {'a': [({0},)]}
dump = marshal.dumps(data, allow_code=False)
@@ -232,12 +234,14 @@ class BufferTestCase(unittest.TestCase, HelperMixin):
new = marshal.loads(marshal.dumps(b))
self.assertEqual(type(new), bytes)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_memoryview(self):
b = memoryview(b"abc")
self.helper(b)
new = marshal.loads(marshal.dumps(b))
self.assertEqual(type(new), bytes)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_array(self):
a = array.array('B', b"abc")
new = marshal.loads(marshal.dumps(a))
@@ -270,6 +274,7 @@ class BugsTestCase(unittest.TestCase):
except Exception:
pass
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_loads_recursion(self):
def run_tests(N, check):
# (((...None...),),)
@@ -290,7 +295,7 @@ class BugsTestCase(unittest.TestCase):
run_tests(2**20, check)
@unittest.skipIf(support.is_android, "TODO: RUSTPYTHON; segfault")
@unittest.skipIf(os.name == 'nt', "TODO: RUSTPYTHON; write depth limit is 2000 not 1000")
@unittest.expectedFailure # TODO: RUSTPYTHON; segfault
def test_recursion_limit(self):
# Create a deeply nested structure.
head = last = []
@@ -319,6 +324,7 @@ class BugsTestCase(unittest.TestCase):
last.append([0])
self.assertRaises(ValueError, marshal.dumps, head)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_exact_type_match(self):
# Former bug:
# >>> class Int(int): pass
@@ -342,6 +348,7 @@ class BugsTestCase(unittest.TestCase):
invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00'
self.assertRaises(ValueError, marshal.loads, invalid_string)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_multiple_dumps_and_loads(self):
# Issue 12291: marshal.load() should be callable multiple times
# with interleaved data written by non-marshal code
@@ -525,56 +532,66 @@ class InstancingTestCase(unittest.TestCase, HelperMixin):
else:
self.assertGreaterEqual(len(s2), len(s3))
@unittest.expectedFailure # TODO: RUSTPYTHON
def testInt(self):
intobj = 123321
self.helper(intobj)
self.helper3(intobj, simple=True)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testFloat(self):
floatobj = 1.2345
self.helper(floatobj)
self.helper3(floatobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testStr(self):
strobj = "abcde"*3
self.helper(strobj)
self.helper3(strobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testBytes(self):
bytesobj = b"abcde"*3
self.helper(bytesobj)
self.helper3(bytesobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testList(self):
for obj in self.keys:
listobj = [obj, obj]
self.helper(listobj)
self.helper3(listobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testTuple(self):
for obj in self.keys:
tupleobj = (obj, obj)
self.helper(tupleobj)
self.helper3(tupleobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testSet(self):
for obj in self.keys:
setobj = {(obj, 1), (obj, 2)}
self.helper(setobj)
self.helper3(setobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testFrozenSet(self):
for obj in self.keys:
frozensetobj = frozenset({(obj, 1), (obj, 2)})
self.helper(frozensetobj)
self.helper3(frozensetobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testDict(self):
for obj in self.keys:
dictobj = {"hello": obj, "goodbye": obj, obj: "hello"}
self.helper(dictobj)
self.helper3(dictobj)
@unittest.expectedFailure # TODO: RUSTPYTHON
def testModule(self):
with open(__file__, "rb") as f:
code = f.read()
@@ -634,6 +651,7 @@ class InterningTestCase(unittest.TestCase, HelperMixin):
self.assertNotEqual(id(s2), id(s))
class SliceTestCase(unittest.TestCase, HelperMixin):
@unittest.expectedFailure # TODO: RUSTPYTHON; NotImplementedError: TODO: not implemented yet or marshal unsupported type
def test_slice(self):
for obj in (
slice(None), slice(1), slice(1, 2), slice(1, 2, 3),

View File

@@ -867,6 +867,7 @@ class MmapTests(unittest.TestCase):
finally:
f.close()
@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.skipUnless(os.name == 'nt', 'requires Windows')
def test_resize_succeeds_with_error_for_second_named_mapping(self):
"""If a more than one mapping exists of the same name, none of them can

View File

@@ -1,5 +1,4 @@
# Test the module type
import importlib.machinery
import unittest
import weakref
from test.support import gc_collect
@@ -30,7 +29,7 @@ class ModuleTests(unittest.TestCase):
self.fail("__name__ = %s" % repr(s))
except AttributeError:
pass
self.assertEqual(foo.__doc__, ModuleType.__doc__ or '')
self.assertEqual(foo.__doc__, ModuleType.__doc__)
def test_uninitialized_missing_getattr(self):
# Issue 8297
@@ -103,7 +102,8 @@ class ModuleTests(unittest.TestCase):
gc_collect()
self.assertEqual(f().__dict__["bar"], 4)
@unittest.expectedFailure # TODO: RUSTPYTHON
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_clear_dict_in_ref_cycle(self):
destroyed = []
m = ModuleType("foo")
@@ -152,13 +152,15 @@ a = A(destroyed)"""
if 'test.test_module.bad_getattr2' in sys.modules:
del sys.modules['test.test_module.bad_getattr2']
@unittest.expectedFailure # TODO: RUSTPYTHON
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_module_dir(self):
import test.test_module.good_getattr as gga
self.assertEqual(dir(gga), ['a', 'b', 'c'])
del sys.modules['test.test_module.good_getattr']
@unittest.expectedFailure # TODO: RUSTPYTHON
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_module_dir_errors(self):
import test.test_module.bad_getattr as bga
from test.test_module import bad_getattr2
@@ -268,38 +270,11 @@ a = A(destroyed)"""
self.assertEqual(r[-len(ends_with):], ends_with,
'{!r} does not end with {!r}'.format(r, ends_with))
def test_module_repr_with_namespace_package(self):
m = ModuleType('foo')
loader = importlib.machinery.NamespaceLoader('foo', ['bar'], 'baz')
spec = importlib.machinery.ModuleSpec('foo', loader)
m.__loader__ = loader
m.__spec__ = spec
self.assertEqual(repr(m), "<module 'foo' (namespace) from ['bar']>")
def test_module_repr_with_namespace_package_and_custom_loader(self):
m = ModuleType('foo')
loader = BareLoader()
spec = importlib.machinery.ModuleSpec('foo', loader)
m.__loader__ = loader
m.__spec__ = spec
expected_repr_pattern = r"<module 'foo' \(<.*\.BareLoader object at .+>\)>"
self.assertRegex(repr(m), expected_repr_pattern)
self.assertNotIn('from', repr(m))
def test_module_repr_with_fake_namespace_package(self):
m = ModuleType('foo')
loader = BareLoader()
loader._path = ['spam']
spec = importlib.machinery.ModuleSpec('foo', loader)
m.__loader__ = loader
m.__spec__ = spec
expected_repr_pattern = r"<module 'foo' \(<.*\.BareLoader object at .+>\)>"
self.assertRegex(repr(m), expected_repr_pattern)
self.assertNotIn('from', repr(m))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_module_finalization_at_shutdown(self):
# Module globals and builtins should still be available during shutdown
rc, out, err = assert_python_ok("-c", "from test.test_module import final_a")
rc, out, err = assert_python_ok("-c", "from test import final_a")
self.assertFalse(err)
lines = out.splitlines()
self.assertEqual(set(lines), {

View File

@@ -1,19 +0,0 @@
"""
Fodder for module finalization tests in test_module.
"""
import shutil
import test.test_module.final_b
x = 'a'
class C:
def __del__(self):
# Inspect module globals and builtins
print("x =", x)
print("final_b.x =", test.test_module.final_b.x)
print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None))
print("len =", getattr(len, '__name__', None))
c = C()
_underscored = C()

View File

@@ -1,19 +0,0 @@
"""
Fodder for module finalization tests in test_module.
"""
import shutil
import test.test_module.final_a
x = 'b'
class C:
def __del__(self):
# Inspect module globals and builtins
print("x =", x)
print("final_a.x =", test.test_module.final_a.x)
print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None))
print("len =", getattr(len, '__name__', None))
c = C()
_underscored = C()

View File

@@ -1261,6 +1261,7 @@ class TestLineAndInstructionEvents(CheckEvents):
('instruction', 'func2', 46),
('line', 'get_events', 11)])
@unittest.expectedFailure # TODO: RUSTPYTHON; - instruction offsets differ from CPython
def test_try_except(self):
def func3():

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +0,0 @@
import unittest
import types
from test.support import import_helper
_testinternalcapi = import_helper.import_module("_testinternalcapi")
class TestRareEventCounters(unittest.TestCase):
def setUp(self):
_testinternalcapi.reset_rare_event_counters()
def test_set_class(self):
class A:
pass
class B:
pass
a = A()
orig_counter = _testinternalcapi.get_rare_event_counters()["set_class"]
a.__class__ = B
self.assertEqual(
orig_counter + 1,
_testinternalcapi.get_rare_event_counters()["set_class"]
)
def test_set_bases(self):
class A:
pass
class B:
pass
class C(B):
pass
orig_counter = _testinternalcapi.get_rare_event_counters()["set_bases"]
C.__bases__ = (A,)
self.assertEqual(
orig_counter + 1,
_testinternalcapi.get_rare_event_counters()["set_bases"]
)
def test_set_eval_frame_func(self):
orig_counter = _testinternalcapi.get_rare_event_counters()["set_eval_frame_func"]
_testinternalcapi.set_eval_frame_record([])
self.assertEqual(
orig_counter + 1,
_testinternalcapi.get_rare_event_counters()["set_eval_frame_func"]
)
_testinternalcapi.set_eval_frame_default()
def test_builtin_dict(self):
orig_counter = _testinternalcapi.get_rare_event_counters()["builtin_dict"]
if isinstance(__builtins__, types.ModuleType):
builtins = __builtins__.__dict__
else:
builtins = __builtins__
builtins["FOO"] = 42
self.assertEqual(
orig_counter + 1,
_testinternalcapi.get_rare_event_counters()["builtin_dict"]
)
del builtins["FOO"]
def test_func_modification(self):
def func(x=0):
pass
for attribute in (
"__code__",
"__defaults__",
"__kwdefaults__"
):
orig_counter = _testinternalcapi.get_rare_event_counters()["func_modification"]
setattr(func, attribute, getattr(func, attribute))
self.assertEqual(
orig_counter + 1,
_testinternalcapi.get_rare_event_counters()["func_modification"]
)
class TestOptimizerSymbols(unittest.TestCase):
@unittest.skipUnless(hasattr(_testinternalcapi, "uop_symbols_test"),
"requires _testinternalcapi.uop_symbols_test")
def test_optimizer_symbols(self):
_testinternalcapi.uop_symbols_test()
if __name__ == "__main__":
unittest.main()

View File

@@ -132,6 +132,7 @@ class TestTranforms(BytecodeTestCase):
self.assertInBytecode(f, 'LOAD_CONST', None)
self.check_lnotab(f)
@unittest.expectedFailure # TODO: RUSTPYTHON; RETURN_VALUE
def test_while_one(self):
# Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx
def f():
@@ -529,6 +530,7 @@ class TestTranforms(BytecodeTestCase):
self.assertEqual(len(returns), 1)
self.check_lnotab(f)
@unittest.expectedFailure # TODO: RUSTPYTHON; KeyError: 20
def test_elim_jump_to_return(self):
# JUMP_FORWARD to RETURN --> RETURN
def f(cond, true_value, false_value):
@@ -543,6 +545,7 @@ class TestTranforms(BytecodeTestCase):
self.assertEqual(len(returns), 2)
self.check_lnotab(f)
@unittest.expectedFailure # TODO: RUSTPYTHON; absolute jump encoding
def test_elim_jump_to_uncond_jump(self):
# POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump
def f():
@@ -609,6 +612,7 @@ class TestTranforms(BytecodeTestCase):
print(i)
self.check_jump_targets(f)
@unittest.expectedFailure # TODO: RUSTPYTHON; 611 JUMP_BACKWARD 16
def test_elim_jump_after_return1(self):
# Eliminate dead code: jumps immediately after returns can't be reached
def f(cond1, cond2):
@@ -642,6 +646,7 @@ class TestTranforms(BytecodeTestCase):
self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0)
self.check_lnotab(containtest)
@unittest.expectedFailure # TODO: RUSTPYTHON; no BUILD_LIST to BUILD_TUPLE optimization
def test_iterate_literal_list(self):
def forloop():
for x in [a, b]:
@@ -858,6 +863,7 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
self.addCleanup(sys.settrace, sys.gettrace())
sys.settrace(None)
@unittest.expectedFailure # TODO: RUSTPYTHON; BINARY_OP 0 (+)
def test_load_fast_known_simple(self):
def f():
x = 1

View File

@@ -1,23 +0,0 @@
import os
import sys
import unittest
try:
from _testinternalcapi import perf_map_state_teardown, write_perf_map_entry
except ImportError:
raise unittest.SkipTest("requires _testinternalcapi")
if sys.platform != 'linux':
raise unittest.SkipTest('Linux only')
class TestPerfMapWriting(unittest.TestCase):
def test_write_perf_map_entry(self):
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn("1234 162e entry1", perf_file_contents)
self.assertIn("2345 1a85 entry2", perf_file_contents)
perf_map_state_teardown()

View File

@@ -692,6 +692,7 @@ class ScopeTests(unittest.TestCase):
self.assertEqual(c.dec(), 1)
self.assertEqual(c.dec(), 0)
@unittest.expectedFailure # TODO: RUSTPYTHON; figure out how to communicate that `y = 9` should be stored as a global rather than a STORE_NAME, even when the `global y` is in a nested subscope
def testGlobalInParallelNestedFunctions(self):
# A symbol table bug leaked the global statement from one
# function to other nested functions in the same block.

View File

@@ -854,7 +854,6 @@ class StrTest(string_tests.StringLikeTest,
self.assertTrue('\U0001F46F'.isprintable())
self.assertFalse('\U000E0020'.isprintable())
@unittest.expectedFailure # TODO: RUSTPYTHON
@support.requires_resource('cpu')
def test_isprintable_invariant(self):
for codepoint in range(sys.maxunicode + 1):

View File

@@ -1903,6 +1903,7 @@ class RunFuncTestCase(BaseTestCase):
res = subprocess.run(args)
self.assertEqual(res.returncode, 57)
@unittest.skipIf(mswindows, "TODO: RUSTPYTHON; empty env block fails nondeterministically")
@unittest.skipUnless(mswindows, "Maybe test trigger a leak on Ubuntu")
def test_run_with_an_empty_env(self):
# gh-105436: fix subprocess.run(..., env={}) broken on Windows

View File

@@ -209,6 +209,7 @@ class TestSuper(unittest.TestCase):
self.assertIs(test_class, A)
@unittest.expectedFailure # TODO: RUSTPYTHON
def test___classcell___expected_behaviour(self):
# See issue #23722
class Meta(type):

View File

@@ -878,6 +878,7 @@ class SysModuleTest(unittest.TestCase):
def test_sys_version_info_no_instantiation(self):
self.assert_raise_on_new_sys_type(sys.version_info)
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError not raised for getwindowsversion instantiation
def test_sys_getwindowsversion_no_instantiation(self):
# Skip if not being run on Windows.
test.support.get_attribute(sys, "getwindowsversion")

View File

@@ -1420,6 +1420,8 @@ class JumpTestCase(unittest.TestCase):
output.append(6)
output.append(7)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(4, 5, [3, 5])
async def test_jump_out_of_async_for_block_forwards(output):
for i in [1]:
@@ -1428,6 +1430,8 @@ class JumpTestCase(unittest.TestCase):
output.append(4)
output.append(5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(5, 2, [2, 4, 2, 4, 5, 6])
async def test_jump_out_of_async_for_block_backwards(output):
for i in [1]:
@@ -1535,6 +1539,8 @@ class JumpTestCase(unittest.TestCase):
output.append(2)
output.append(3)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(2, 3, [1, 3])
async def test_jump_forwards_out_of_async_with_block(output):
async with asynctracecontext(output, 1):
@@ -1547,6 +1553,8 @@ class JumpTestCase(unittest.TestCase):
with tracecontext(output, 2):
output.append(3)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(3, 1, [1, 2, 1, 2, 3, -2])
async def test_jump_backwards_out_of_async_with_block(output):
output.append(1)
@@ -1616,6 +1624,8 @@ class JumpTestCase(unittest.TestCase):
with tracecontext(output, 4):
output.append(5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(2, 4, [1, 4, 5, -4])
async def test_jump_across_async_with(output):
output.append(1)
@@ -1633,6 +1643,8 @@ class JumpTestCase(unittest.TestCase):
output.append(5)
output.append(6)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(4, 5, [1, 3, 5, 6])
async def test_jump_out_of_async_with_block_within_for_block(output):
output.append(1)
@@ -1651,6 +1663,8 @@ class JumpTestCase(unittest.TestCase):
output.append(5)
output.append(6)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(4, 5, [1, 2, 3, 5, -2, 6])
async def test_jump_out_of_async_with_block_within_with_block(output):
output.append(1)
@@ -1670,6 +1684,8 @@ class JumpTestCase(unittest.TestCase):
output.append(6)
output.append(7)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(5, 6, [2, 4, 6, 7])
async def test_jump_out_of_async_with_block_within_finally_block(output):
try:
@@ -1703,6 +1719,8 @@ class JumpTestCase(unittest.TestCase):
output.append(4)
output.append(5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(3, 5, [1, 2, 5])
async def test_jump_out_of_async_with_assignment(output):
output.append(1)
@@ -1750,6 +1768,8 @@ class JumpTestCase(unittest.TestCase):
output.append(7)
output.append(8)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(1, 7, [7, 8])
async def test_jump_over_async_for_block_before_else(output):
output.append(1)
@@ -2033,6 +2053,8 @@ class JumpTestCase(unittest.TestCase):
with tracecontext(output, 4):
output.append(5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@async_jump_test(3, 5, [1, 2, 5, -2])
async def test_jump_between_async_with_blocks(output):
output.append(1)
@@ -2041,6 +2063,8 @@ class JumpTestCase(unittest.TestCase):
async with asynctracecontext(output, 4):
output.append(5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@jump_test(5, 7, [2, 4], (ValueError, "after"))
def test_no_jump_over_return_out_of_finally_block(output):
try:

View File

@@ -232,6 +232,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
b = 'C\u0338' * 20 + '\xC7'
self.assertEqual(self.db.normalize('NFC', a), b)
@unittest.expectedFailure # TODO: RUSTPYTHON; ? +
def test_issue29456(self):
# Fix #29456
u1176_str_a = '\u1100\u1176\u11a8'
@@ -388,7 +389,6 @@ class NormalizationTest(unittest.TestCase):
data = [int(x, 16) for x in data.split(" ")]
return "".join([chr(x) for x in data])
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true : 13055
@requires_resource('network')
@requires_resource('cpu')
def test_normalization(self):

View File

@@ -14,7 +14,6 @@ std = ["thiserror/std", "itertools/use_std"]
[dependencies]
rustpython-compiler-core = { workspace = true }
rustpython-unicode = { workspace = true, default-features = false }
rustpython-literal = {workspace = true }
rustpython-wtf8 = { workspace = true }
ruff_python_ast = { workspace = true }
@@ -30,6 +29,7 @@ num-traits = { workspace = true }
thiserror = { workspace = true }
malachite-bigint = { workspace = true }
memchr = { workspace = true }
unicode_names2 = { workspace = true }
[dev-dependencies]
ruff_python_parser = { workspace = true }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,23 +1,21 @@
---
source: crates/codegen/src/compile.rs
assertion_line: 9553
assertion_line: 9100
expression: "compile_exec(\"\\\nif True and False and False:\n pass\n\")"
---
1 0 RESUME (0)
1 LOAD_CONST (True)
2 POP_JUMP_IF_FALSE (11)
>> 3 CACHE
>> 1 LOAD_CONST (True)
2 POP_JUMP_IF_FALSE (9)
3 CACHE
4 NOT_TAKEN
5 LOAD_CONST (False)
6 POP_JUMP_IF_FALSE (7)
>> 7 CACHE
>> 5 LOAD_CONST (False)
6 POP_JUMP_IF_FALSE (5)
7 CACHE
8 NOT_TAKEN
9 LOAD_CONST (False)
10 POP_JUMP_IF_FALSE (3)
>> 11 CACHE
>> 9 LOAD_CONST (False)
10 POP_JUMP_IF_FALSE (1)
11 CACHE
12 NOT_TAKEN
2 13 LOAD_CONST (None)
14 RETURN_VALUE
15 LOAD_CONST (None)
16 RETURN_VALUE

View File

@@ -1,27 +1,25 @@
---
source: crates/codegen/src/compile.rs
assertion_line: 9563
assertion_line: 9110
expression: "compile_exec(\"\\\nif (True and False) or (False and True):\n pass\n\")"
---
1 0 RESUME (0)
1 LOAD_CONST (True)
>> 1 LOAD_CONST (True)
2 POP_JUMP_IF_FALSE (5)
>> 3 CACHE
3 CACHE
4 NOT_TAKEN
>> 5 LOAD_CONST (False)
6 POP_JUMP_IF_TRUE (9)
>> 7 CACHE
7 CACHE
8 NOT_TAKEN
>> 9 LOAD_CONST (False)
10 POP_JUMP_IF_FALSE (7)
10 POP_JUMP_IF_FALSE (5)
11 CACHE
12 NOT_TAKEN
13 LOAD_CONST (True)
14 POP_JUMP_IF_FALSE (3)
14 POP_JUMP_IF_FALSE (1)
15 CACHE
16 NOT_TAKEN
2 17 LOAD_CONST (None)
18 RETURN_VALUE
19 LOAD_CONST (None)
20 RETURN_VALUE

View File

@@ -1,23 +1,21 @@
---
source: crates/codegen/src/compile.rs
assertion_line: 9543
assertion_line: 9090
expression: "compile_exec(\"\\\nif True or False or False:\n pass\n\")"
---
1 0 RESUME (0)
1 LOAD_CONST (True)
>> 1 LOAD_CONST (True)
2 POP_JUMP_IF_TRUE (9)
>> 3 CACHE
3 CACHE
4 NOT_TAKEN
>> 5 LOAD_CONST (False)
6 POP_JUMP_IF_TRUE (5)
7 CACHE
8 NOT_TAKEN
>> 9 LOAD_CONST (False)
10 POP_JUMP_IF_FALSE (3)
10 POP_JUMP_IF_FALSE (1)
11 CACHE
12 NOT_TAKEN
2 13 LOAD_CONST (None)
14 RETURN_VALUE
15 LOAD_CONST (None)
16 RETURN_VALUE

View File

@@ -1,6 +1,5 @@
---
source: crates/codegen/src/compile.rs
assertion_line: 9688
expression: "compile_exec(\"\\\nx = Test() and False or False\n\")"
---
1 0 RESUME (0)
@@ -10,26 +9,18 @@ expression: "compile_exec(\"\\\nx = Test() and False or False\n\")"
4 CACHE
5 CACHE
6 CACHE
7 COPY (1)
8 TO_BOOL
>> 7 COPY (1)
8 POP_JUMP_IF_FALSE (7)
9 CACHE
10 CACHE
>> 11 CACHE
12 POP_JUMP_IF_FALSE (11)
13 CACHE
14 NOT_TAKEN
15 POP_TOP
16 LOAD_CONST (False)
17 COPY (1)
18 TO_BOOL
19 CACHE
20 CACHE
21 CACHE
22 POP_JUMP_IF_TRUE (3)
23 CACHE
24 NOT_TAKEN
25 POP_TOP
26 LOAD_CONST (False)
27 STORE_NAME (1, x)
28 LOAD_CONST (None)
29 RETURN_VALUE
10 NOT_TAKEN
11 POP_TOP
12 LOAD_CONST (False)
13 COPY (1)
14 POP_JUMP_IF_TRUE (3)
15 CACHE
16 NOT_TAKEN
17 POP_TOP
18 LOAD_CONST (False)
19 STORE_NAME (1, x)
20 LOAD_CONST (None)
21 RETURN_VALUE

View File

@@ -1,5 +1,6 @@
---
source: crates/codegen/src/compile.rs
assertion_line: 9089
expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with egg():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")"
---
1 0 RESUME (0)
@@ -22,7 +23,7 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
15 CACHE
16 CACHE
17 CACHE
18 LOAD_CONST ("ham")
>> 18 LOAD_CONST ("ham")
19 CALL (1)
20 CACHE
21 CACHE
@@ -31,15 +32,15 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
24 GET_ITER
25 FOR_ITER (71)
26 CACHE
27 STORE_FAST (0, stop_exc)
>> 27 STORE_FAST (0, stop_exc)
3 28 LOAD_GLOBAL (4, self)
3 >> 28 LOAD_GLOBAL (4, self)
29 CACHE
30 CACHE
31 CACHE
>> 32 CACHE
32 CACHE
33 LOAD_ATTR (7, subTest, method=true)
34 CACHE
>> 34 CACHE
35 CACHE
36 CACHE
37 CACHE
@@ -52,8 +53,8 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
44 CACHE
45 CACHE
46 CACHE
47 CACHE
>> 48 LOAD_FAST (0, stop_exc)
>> 47 CACHE
48 LOAD_FAST (0, stop_exc)
49 CALL (1)
50 CACHE
51 CACHE
@@ -66,8 +67,8 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
58 COPY (1)
59 LOAD_SPECIAL (__exit__)
60 SWAP (2)
61 SWAP (3)
62 LOAD_SPECIAL (__enter__)
61 LOAD_SPECIAL (__enter__)
62 PUSH_NULL
63 CALL (0)
64 CACHE
65 CACHE
@@ -88,8 +89,8 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
78 COPY (1)
79 LOAD_SPECIAL (__aexit__)
80 SWAP (2)
81 SWAP (3)
82 LOAD_SPECIAL (__aenter__)
81 LOAD_SPECIAL (__aenter__)
82 PUSH_NULL
83 CALL (0)
84 CACHE
85 CACHE
@@ -114,141 +115,162 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter
5 102 CLEANUP_THROW
103 JUMP_BACKWARD_NO_INTERRUPT(10)
104 PUSH_EXC_INFO
105 WITH_EXCEPT_START
106 GET_AWAITABLE (2)
107 LOAD_CONST (None)
108 SEND (4)
109 CACHE
110 YIELD_VALUE (1)
111 RESUME (3)
112 JUMP_BACKWARD_NO_INTERRUPT(5)
113 CLEANUP_THROW
114 END_SEND
115 TO_BOOL
116 CACHE
117 CACHE
118 CACHE
119 POP_JUMP_IF_TRUE (2)
120 CACHE
121 NOT_TAKEN
122 RERAISE (2)
123 POP_TOP
124 POP_EXCEPT
125 POP_TOP
126 POP_TOP
127 POP_TOP
128 JUMP_FORWARD (48)
129 COPY (3)
130 POP_EXCEPT
131 RERAISE (1)
132 PUSH_EXC_INFO
7 133 LOAD_GLOBAL (12, Exception)
134 CACHE
135 CACHE
6 104 NOP
5 105 PUSH_NULL
106 LOAD_CONST (None)
107 LOAD_CONST (None)
108 LOAD_CONST (None)
109 CALL (3)
110 CACHE
111 CACHE
112 CACHE
113 GET_AWAITABLE (2)
114 LOAD_CONST (None)
115 SEND (4)
116 CACHE
117 YIELD_VALUE (1)
118 RESUME (3)
119 JUMP_BACKWARD_NO_INTERRUPT(5)
120 CLEANUP_THROW
121 END_SEND
122 POP_TOP
123 JUMP_FORWARD (27)
124 PUSH_EXC_INFO
125 WITH_EXCEPT_START
126 GET_AWAITABLE (2)
127 LOAD_CONST (None)
128 SEND (4)
129 CACHE
130 YIELD_VALUE (1)
131 RESUME (3)
132 JUMP_BACKWARD_NO_INTERRUPT(5)
133 CLEANUP_THROW
134 END_SEND
135 TO_BOOL
136 CACHE
137 CACHE
138 CHECK_EXC_MATCH
139 POP_JUMP_IF_FALSE (32)
138 CACHE
139 POP_JUMP_IF_TRUE (2)
140 CACHE
141 NOT_TAKEN
142 STORE_FAST (1, ex)
142 RERAISE (2)
143 POP_TOP
144 POP_EXCEPT
145 POP_TOP
146 POP_TOP
147 JUMP_FORWARD (3)
148 COPY (3)
149 POP_EXCEPT
150 RERAISE (1)
151 JUMP_FORWARD (47)
152 PUSH_EXC_INFO
8 143 LOAD_GLOBAL (4, self)
144 CACHE
145 CACHE
146 CACHE
147 CACHE
148 LOAD_ATTR (15, assertIs, method=true)
149 CACHE
150 CACHE
151 CACHE
152 CACHE
153 CACHE
7 153 LOAD_GLOBAL (12, Exception)
154 CACHE
155 CACHE
156 CACHE
157 CACHE
158 LOAD_FAST_LOAD_FAST (ex, stop_exc)
159 CALL (2)
158 CHECK_EXC_MATCH
159 POP_JUMP_IF_FALSE (34)
160 CACHE
161 CACHE
162 CACHE
163 POP_TOP
164 POP_EXCEPT
165 LOAD_CONST (None)
166 STORE_FAST (1, ex)
167 DELETE_FAST (1, ex)
168 JUMP_FORWARD (32)
169 LOAD_CONST (None)
170 STORE_FAST (1, ex)
171 DELETE_FAST (1, ex)
172 RERAISE (1)
173 RERAISE (0)
174 COPY (3)
175 POP_EXCEPT
176 RERAISE (1)
161 NOT_TAKEN
162 STORE_FAST (1, ex)
10 177 LOAD_GLOBAL (4, self)
178 CACHE
179 CACHE
180 CACHE
8 163 LOAD_GLOBAL (4, self)
164 CACHE
165 CACHE
166 CACHE
167 CACHE
168 LOAD_ATTR (15, assertIs, method=true)
169 CACHE
170 CACHE
171 CACHE
172 CACHE
173 CACHE
174 CACHE
175 CACHE
176 CACHE
177 CACHE
178 LOAD_FAST (1, ex)
179 LOAD_FAST (0, stop_exc)
180 CALL (2)
181 CACHE
182 LOAD_ATTR (17, fail, method=true)
182 CACHE
183 CACHE
184 CACHE
185 CACHE
186 CACHE
>> 187 CACHE
188 CACHE
189 CACHE
190 CACHE
191 CACHE
192 LOAD_FAST_BORROW (0, stop_exc)
193 FORMAT_SIMPLE
194 LOAD_CONST (" was suppressed")
195 BUILD_STRING (2)
196 CALL (1)
197 CACHE
198 CACHE
199 CACHE
200 POP_TOP
201 NOP
184 POP_TOP
185 JUMP_FORWARD (4)
186 LOAD_CONST (None)
187 STORE_FAST (1, ex)
188 DELETE_FAST (1, ex)
189 RERAISE (1)
190 POP_EXCEPT
191 LOAD_CONST (None)
192 STORE_FAST (1, ex)
193 DELETE_FAST (1, ex)
194 JUMP_FORWARD (28)
195 RERAISE (0)
196 COPY (3)
197 POP_EXCEPT
198 RERAISE (1)
3 202 LOAD_CONST (None)
203 LOAD_CONST (None)
>> 204 LOAD_CONST (None)
205 CALL (3)
10 199 LOAD_GLOBAL (4, self)
200 CACHE
201 CACHE
202 CACHE
203 CACHE
204 LOAD_ATTR (17, fail, method=true)
205 CACHE
206 CACHE
207 CACHE
208 CACHE
209 POP_TOP
210 JUMP_BACKWARD (187)
209 CACHE
210 CACHE
211 CACHE
212 PUSH_EXC_INFO
213 WITH_EXCEPT_START
214 TO_BOOL
215 CACHE
216 CACHE
217 CACHE
218 POP_JUMP_IF_TRUE (2)
212 CACHE
213 CACHE
214 LOAD_FAST_BORROW (0, stop_exc)
215 FORMAT_SIMPLE
216 LOAD_CONST (" was suppressed")
217 BUILD_STRING (2)
218 CALL (1)
219 CACHE
220 NOT_TAKEN
221 RERAISE (2)
220 CACHE
221 CACHE
222 POP_TOP
223 POP_EXCEPT
224 POP_TOP
225 POP_TOP
226 POP_TOP
227 JUMP_BACKWARD (204)
228 CACHE
229 COPY (3)
230 POP_EXCEPT
231 RERAISE (1)
223 NOP
2 232 CALL_INTRINSIC_1 (StopIterationError)
233 RERAISE (1)
3 224 PUSH_NULL
225 LOAD_CONST (None)
226 LOAD_CONST (None)
227 LOAD_CONST (None)
228 CALL (3)
>> 229 CACHE
230 CACHE
231 CACHE
232 POP_TOP
233 JUMP_FORWARD (18)
234 PUSH_EXC_INFO
235 WITH_EXCEPT_START
236 TO_BOOL
237 CACHE
238 CACHE
239 CACHE
240 POP_JUMP_IF_TRUE (2)
241 CACHE
242 NOT_TAKEN
243 RERAISE (2)
244 POP_TOP
245 POP_EXCEPT
246 POP_TOP
247 POP_TOP
248 JUMP_FORWARD (3)
249 COPY (3)
250 POP_EXCEPT
251 RERAISE (1)
252 JUMP_BACKWARD (229)
253 CACHE
2 MAKE_FUNCTION
3 STORE_NAME (0, test)

View File

@@ -113,9 +113,7 @@ impl StringParser {
let name_and_ending = self.skip_bytes(close_idx + 1);
let name = &name_and_ending[..name_and_ending.len() - 1];
rustpython_unicode::data::lookup(name)
.and_then(char::from_u32)
.ok_or_else(|| unreachable!())
unicode_names2::character(name).ok_or_else(|| unreachable!())
}
/// Parse an escaped character, returning the new character.

View File

@@ -54,9 +54,6 @@ pub struct SymbolTable {
/// Whether this type param scope can see the parent class scope
pub can_see_class_scope: bool,
/// Whether this scope contains yield/yield from (is a generator function)
pub is_generator: bool,
/// Whether this comprehension scope should be inlined (PEP 709)
/// True for list/set/dict comprehensions in non-generator expressions
pub comp_inlined: bool,
@@ -92,7 +89,6 @@ impl SymbolTable {
needs_class_closure: false,
needs_classdict: false,
can_see_class_scope: false,
is_generator: false,
comp_inlined: false,
annotation_block: None,
has_conditional_annotations: false,
@@ -296,20 +292,6 @@ fn drop_class_free(symbol_table: &mut SymbolTable, newfree: &mut IndexSet<String
symbol_table.needs_classdict = true;
}
// Classes with function definitions need __classdict__ for PEP 649
// (but not when `from __future__ import annotations` is active)
if !symbol_table.needs_classdict && !symbol_table.future_annotations {
let has_functions = symbol_table.sub_tables.iter().any(|t| {
matches!(
t.typ,
CompilerScope::Function | CompilerScope::AsyncFunction
)
});
if has_functions {
symbol_table.needs_classdict = true;
}
}
// Check if __conditional_annotations__ is in the free variables collected from children
// Remove it from free set - it's handled specially in class scope
if newfree.shift_remove("__conditional_annotations__") {
@@ -317,88 +299,6 @@ fn drop_class_free(symbol_table: &mut SymbolTable, newfree: &mut IndexSet<String
}
}
/// Check if an expression contains an `await` node (shallow, not into nested scopes).
fn expr_contains_await(expr: &ast::Expr) -> bool {
use ast::visitor::Visitor;
struct AwaitFinder(bool);
impl ast::visitor::Visitor<'_> for AwaitFinder {
fn visit_expr(&mut self, expr: &ast::Expr) {
if !self.0 {
if matches!(expr, ast::Expr::Await(_)) {
self.0 = true;
} else {
ast::visitor::walk_expr(self, expr);
}
}
}
}
let mut finder = AwaitFinder(false);
finder.visit_expr(expr);
finder.0
}
/// PEP 709: Merge symbols from an inlined comprehension into the parent scope.
/// Matches symtable.c inline_comprehension().
fn inline_comprehension(
parent_symbols: &mut SymbolMap,
comp: &SymbolTable,
comp_free: &mut IndexSet<String>,
inlined_cells: &mut IndexSet<String>,
parent_type: CompilerScope,
) {
for (name, sub_symbol) in &comp.symbols {
// Skip the .0 parameter
if sub_symbol.flags.contains(SymbolFlags::PARAMETER) {
continue;
}
// Track inlined cells
if sub_symbol.scope == SymbolScope::Cell
|| sub_symbol.flags.contains(SymbolFlags::COMP_CELL)
{
inlined_cells.insert(name.clone());
}
// Handle __class__ in ClassBlock
let scope = if sub_symbol.scope == SymbolScope::Free
&& parent_type == CompilerScope::Class
&& name == "__class__"
{
comp_free.swap_remove(name);
SymbolScope::GlobalImplicit
} else {
sub_symbol.scope
};
if let Some(existing) = parent_symbols.get(name) {
// Name exists in parent
if existing.is_bound() && parent_type != CompilerScope::Class {
// Check if the name is free in any child of the comprehension
let is_free_in_child = comp.sub_tables.iter().any(|child| {
child
.symbols
.get(name)
.is_some_and(|s| s.scope == SymbolScope::Free)
});
if !is_free_in_child {
comp_free.swap_remove(name);
}
}
} else {
// Name doesn't exist in parent, copy from comprehension.
// Reset scope to Unknown so analyze_symbol will resolve it
// in the parent's context.
let mut symbol = sub_symbol.clone();
symbol.scope = if sub_symbol.is_bound() {
SymbolScope::Unknown
} else {
scope
};
parent_symbols.insert(name.clone(), symbol);
}
}
}
type SymbolMap = IndexMap<String, Symbol>;
mod stack {
@@ -492,9 +392,14 @@ impl SymbolTableAnalyzer {
let symbols = core::mem::take(&mut symbol_table.symbols);
let sub_tables = &mut *symbol_table.sub_tables;
// Collect free variables from all child scopes
let mut newfree = IndexSet::default();
let annotation_block = &mut symbol_table.annotation_block;
// PEP 649: Determine class_entry to pass to children
// If current scope is a class with annotation block that can_see_class_scope,
// we need to pass class symbols to the annotation scope
let is_class = symbol_table.typ == CompilerScope::Class;
// Clone class symbols if needed for child scopes with can_see_class_scope
@@ -513,16 +418,12 @@ impl SymbolTableAnalyzer {
None
};
// Collect (child_free, is_inlined) pairs from child scopes.
// We need to process inlined comprehensions after the closure
// when we have access to symbol_table.symbols.
let mut child_frees: Vec<(IndexSet<String>, bool)> = Vec::new();
let mut annotation_free: Option<IndexSet<String>> = None;
let mut info = (symbols, symbol_table.typ);
self.tables.with_append(&mut info, |list| {
let inner_scope = unsafe { &mut *(list as *mut _ as *mut Self) };
// Analyze sub scopes and collect their free variables
for sub_table in sub_tables.iter_mut() {
// Pass class_entry to sub-scopes that can see the class scope
let child_class_entry = if sub_table.can_see_class_scope {
if is_class {
class_symbols_clone.as_ref()
@@ -533,10 +434,12 @@ impl SymbolTableAnalyzer {
None
};
let child_free = inner_scope.analyze_symbol_table(sub_table, child_class_entry)?;
child_frees.push((child_free, sub_table.comp_inlined));
// Propagate child's free variables to this scope
newfree.extend(child_free);
}
// PEP 649: Analyze annotation block if present
if let Some(annotation_table) = annotation_block {
// Pass class symbols to annotation scope if can_see_class_scope
let ann_class_entry = if annotation_table.can_see_class_scope {
if is_class {
class_symbols_clone.as_ref()
@@ -548,59 +451,59 @@ impl SymbolTableAnalyzer {
};
let child_free =
inner_scope.analyze_symbol_table(annotation_table, ann_class_entry)?;
annotation_free = Some(child_free);
// Propagate annotation's free variables to this scope
newfree.extend(child_free);
}
Ok(())
})?;
symbol_table.symbols = info.0;
// PEP 709: Process inlined comprehensions.
// Merge symbols from inlined comps into parent scope without bail-out.
let mut inlined_cells: IndexSet<String> = IndexSet::default();
let mut newfree = IndexSet::default();
for (idx, (mut child_free, is_inlined)) in child_frees.into_iter().enumerate() {
if is_inlined {
inline_comprehension(
&mut symbol_table.symbols,
&sub_tables[idx],
&mut child_free,
&mut inlined_cells,
symbol_table.typ,
);
}
newfree.extend(child_free);
}
if let Some(ann_free) = annotation_free {
// Propagate annotation-scope free names to this scope so
// implicit class-scope cells (__classdict__/__conditional_annotations__)
// can be materialized by drop_class_free when needed.
newfree.extend(ann_free);
}
// PEP 709: Merge symbols from inlined comprehensions into parent scope
// Only merge symbols that are actually bound in the comprehension,
// not references to outer scope variables (Free symbols).
const BOUND_FLAGS: SymbolFlags = SymbolFlags::ASSIGNED
.union(SymbolFlags::PARAMETER)
.union(SymbolFlags::ITER)
.union(SymbolFlags::ASSIGNED_IN_COMPREHENSION);
let sub_tables = &*symbol_table.sub_tables;
for sub_table in sub_tables.iter() {
if sub_table.comp_inlined {
for (name, sub_symbol) in &sub_table.symbols {
// Skip the .0 parameter - it's internal to the comprehension
if name == ".0" {
continue;
}
// Only merge symbols that are bound in the comprehension
// Skip Free references to outer scope variables
if !sub_symbol.flags.intersects(BOUND_FLAGS) {
continue;
}
// If the symbol doesn't exist in parent, add it
if !symbol_table.symbols.contains_key(name) {
let mut symbol = sub_symbol.clone();
// Mark as local in parent scope
symbol.scope = SymbolScope::Local;
symbol_table.symbols.insert(name.clone(), symbol);
}
}
}
}
// Analyze symbols in current scope
for symbol in symbol_table.symbols.values_mut() {
self.analyze_symbol(symbol, symbol_table.typ, sub_tables, class_entry)?;
// Collect free variables from this scope
// These will be propagated to the parent scope
if symbol.scope == SymbolScope::Free || symbol.flags.contains(SymbolFlags::FREE_CLASS) {
newfree.insert(symbol.name.clone());
}
}
// PEP 709: Promote LOCAL to CELL and set COMP_CELL for inlined cell vars
for symbol in symbol_table.symbols.values_mut() {
if inlined_cells.contains(&symbol.name) {
if symbol.scope == SymbolScope::Local {
symbol.scope = SymbolScope::Cell;
}
symbol.flags.insert(SymbolFlags::COMP_CELL);
}
}
// Handle class-specific implicit cells
// This removes __class__ and __classdict__ from newfree if present
// and sets the corresponding flags on the symbol table
if symbol_table.typ == CompilerScope::Class {
drop_class_free(symbol_table, &mut newfree);
}
@@ -762,12 +665,6 @@ impl SymbolTableAnalyzer {
if let Some(decl_depth) = decl_depth {
// decl_depth is the number of tables between the current one and
// the one that declared the cell var
// For implicit class scope variables (__classdict__, __conditional_annotations__),
// only propagate free to annotation/type-param scopes, not regular functions.
// Regular method functions don't need these in their freevars.
let is_class_implicit =
name == "__classdict__" || name == "__conditional_annotations__";
for (table, typ) in self.tables.iter_mut().rev().take(decl_depth) {
if let CompilerScope::Class = typ {
if let Some(free_class) = table.get_mut(name) {
@@ -778,19 +675,10 @@ impl SymbolTableAnalyzer {
symbol.scope = SymbolScope::Free;
table.insert(name.to_owned(), symbol);
}
} else if is_class_implicit
&& matches!(
typ,
CompilerScope::Function
| CompilerScope::AsyncFunction
| CompilerScope::Lambda
)
{
// Skip: don't add __classdict__/__conditional_annotations__
// as free vars in regular functions — only annotation/type scopes need them
} else if !table.contains_key(name) {
let mut symbol = Symbol::new(name);
symbol.scope = SymbolScope::Free;
// symbol.is_referenced = true;
table.insert(name.to_owned(), symbol);
}
}
@@ -806,11 +694,6 @@ impl SymbolTableAnalyzer {
st_typ: CompilerScope,
) -> Option<SymbolScope> {
sub_tables.iter().find_map(|st| {
// PEP 709: For inlined comprehensions, check their children
// instead of the comp itself (its symbols are merged into parent).
if st.comp_inlined {
return self.found_in_inner_scope(&st.sub_tables, name, st_typ);
}
let sym = st.symbols.get(name)?;
if sym.scope == SymbolScope::Free || sym.flags.contains(SymbolFlags::FREE_CLASS) {
if st_typ == CompilerScope::Class && name != "__class__" {
@@ -1035,7 +918,6 @@ impl SymbolTableBuilder {
.and_then(|t| t.mangled_names.clone())
.filter(|_| typ != CompilerScope::Class);
let mut table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested);
table.future_annotations = self.future_annotations;
table.mangled_names = inherited_mangled_names;
self.tables.push(table);
// Save parent's varnames and start fresh for the new scope
@@ -1246,30 +1128,20 @@ impl SymbolTableBuilder {
}
fn scan_annotation(&mut self, annotation: &ast::Expr) -> SymbolTableResult {
self.scan_annotation_inner(annotation, false)
}
/// Scan an annotation from an AnnAssign statement (can be conditional)
fn scan_ann_assign_annotation(&mut self, annotation: &ast::Expr) -> SymbolTableResult {
self.scan_annotation_inner(annotation, true)
}
fn scan_annotation_inner(
&mut self,
annotation: &ast::Expr,
is_ann_assign: bool,
) -> SymbolTableResult {
let current_scope = self.tables.last().map(|t| t.typ);
// PEP 649: Only AnnAssign annotations can be conditional.
// Function parameter/return annotations are never conditional.
if is_ann_assign && !self.future_annotations {
// PEP 649: Check if this is a conditional annotation
// Module-level: always conditional (module may be partially executed)
// Class-level: conditional only when inside if/for/while/etc.
if !self.future_annotations {
let is_conditional = matches!(current_scope, Some(CompilerScope::Module))
|| (matches!(current_scope, Some(CompilerScope::Class))
&& self.in_conditional_block);
if is_conditional && !self.tables.last().unwrap().has_conditional_annotations {
self.tables.last_mut().unwrap().has_conditional_annotations = true;
// Register __conditional_annotations__ as both Assigned and Used so that
// it becomes a Cell variable in class scope (children reference it as Free)
self.register_name(
"__conditional_annotations__",
SymbolUsage::Assigned,
@@ -1601,7 +1473,7 @@ impl SymbolTableBuilder {
// sub_tables that cause mismatch in the annotation scope's sub_table index.
let is_simple_name = *simple && matches!(&**target, Expr::Name(_));
if is_simple_name {
self.scan_ann_assign_annotation(annotation)?;
self.scan_annotation(annotation)?;
} else {
// Still validate annotation for forbidden expressions
// (yield, await, named) even for non-simple targets.
@@ -1857,7 +1729,6 @@ impl SymbolTableBuilder {
node_index: _,
range: _,
}) => {
self.tables.last_mut().unwrap().is_generator = true;
if let Some(expression) = value {
self.scan_expression(expression, context)?;
}
@@ -1867,7 +1738,6 @@ impl SymbolTableBuilder {
node_index: _,
range: _,
}) => {
self.tables.last_mut().unwrap().is_generator = true;
self.scan_expression(value, context)?;
}
Expr::UnaryOp(ExprUnaryOp {
@@ -2166,31 +2036,14 @@ impl SymbolTableBuilder {
CompilerScope::Comprehension,
self.line_index_start(range),
);
// Generator expressions need the is_generator flag
self.tables.last_mut().unwrap().is_generator = is_generator;
// PEP 709: Mark non-generator comprehensions for inlining.
// Only in function-like scopes for now. Module/class scope inlining
// needs more work (Cell name resolution, __class__ handling).
// Also excluded: generator expressions, async comprehensions,
// and annotation scopes nested in classes (can_see_class_scope).
let element_has_await = expr_contains_await(elt1) || elt2.is_some_and(expr_contains_await);
if !is_generator && !has_async_gen && !element_has_await {
let parent = self.tables.iter().rev().nth(1);
let parent_can_see_class = parent.is_some_and(|t| t.can_see_class_scope);
let parent_is_func = parent.is_some_and(|t| {
matches!(
t.typ,
CompilerScope::Function
| CompilerScope::AsyncFunction
| CompilerScope::Lambda
| CompilerScope::Comprehension
)
});
if !parent_can_see_class && parent_is_func {
self.tables.last_mut().unwrap().comp_inlined = true;
}
}
// PEP 709: inlined comprehensions are not yet implemented in the
// compiler (is_inlined_comprehension_context always returns false),
// so do NOT mark comp_inlined here. Setting it would cause the
// symbol-table analyzer to merge comprehension-local symbols into
// the parent scope, while the compiler still emits a separate code
// object — leading to the merged symbols being missing from the
// comprehension's own symbol table lookup.
// Register the passed argument to the generator function as the name ".0"
self.register_name(".0", SymbolUsage::Parameter, range)?;

View File

@@ -16,7 +16,6 @@ wasm_js = ["getrandom/wasm_js"]
[dependencies]
rustpython-literal = { workspace = true }
rustpython-unicode = { workspace = true, default-features = false }
rustpython-wtf8 = { workspace = true }
ascii = { workspace = true }
@@ -30,6 +29,7 @@ malachite-q = { workspace = true }
malachite-base = { workspace = true }
num-traits = { workspace = true }
parking_lot = { workspace = true, optional = true }
unicode_names2 = { workspace = true }
radium = { workspace = true }
lock_api = "0.4"

View File

@@ -414,7 +414,7 @@ pub mod errors {
let mut out = String::with_capacity(num_chars * 4);
for c in err_str.code_points() {
let c_u32 = c.to_u32();
if let Some(c_name) = rustpython_unicode::data::name(c_u32) {
if let Some(c_name) = c.to_char().and_then(unicode_names2::name) {
write!(out, "\\N{{{c_name}}}").unwrap();
} else if c_u32 >= 0x10000 {
write!(out, "\\U{c_u32:08x}").unwrap();

View File

@@ -19,7 +19,7 @@ itertools = { workspace = true }
malachite-bigint = { workspace = true }
num-complex = { workspace = true }
lz4_flex = "0.13"
lz4_flex = "0.12"
[lints]
workspace = true

View File

@@ -3,7 +3,7 @@
use crate::{
marshal::MarshalError,
varint::{read_varint, read_varint_with_start, write_varint_be, write_varint_with_start},
varint::{read_varint, read_varint_with_start, write_varint, write_varint_with_start},
{OneIndexed, SourceLocation},
};
use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeSet, fmt, string::String, vec::Vec};
@@ -27,7 +27,7 @@ pub use crate::bytecode::{
BinaryOperator, BuildSliceArgCount, CommonConstant, ComparisonOperator, ConvertValueOparg,
IntrinsicFunction1, IntrinsicFunction2, Invert, Label, LoadAttr, LoadSuperAttr,
MakeFunctionFlag, MakeFunctionFlags, NameIdx, OpArg, OpArgByte, OpArgState, OpArgType,
RaiseKind, SpecialMethod, UnpackExArgs,
RaiseKind, ResumeType, SpecialMethod, UnpackExArgs,
},
};
@@ -71,9 +71,9 @@ pub fn encode_exception_table(entries: &[ExceptionTableEntry]) -> alloc::boxed::
let depth_lasti = ((entry.depth as u32) << 1) | (entry.push_lasti as u32);
write_varint_with_start(&mut data, entry.start);
write_varint_be(&mut data, size);
write_varint_be(&mut data, entry.target);
write_varint_be(&mut data, depth_lasti);
write_varint(&mut data, size);
write_varint(&mut data, entry.target);
write_varint(&mut data, depth_lasti);
}
data.into_boxed_slice()
}
@@ -135,72 +135,6 @@ pub fn decode_exception_table(table: &[u8]) -> Vec<ExceptionTableEntry> {
entries
}
/// Parse linetable to build a boolean mask indicating which code units
/// have NO_LOCATION (line == -1). Returns a Vec<bool> of length `num_units`.
pub fn build_no_location_mask(linetable: &[u8], num_units: usize) -> Vec<bool> {
let mut mask = Vec::new();
mask.resize(num_units, false);
let mut pos = 0;
let mut unit_idx = 0;
while pos < linetable.len() && unit_idx < num_units {
let header = linetable[pos];
pos += 1;
let code = (header >> 3) & 0xf;
let length = ((header & 7) + 1) as usize;
let is_no_location = code == PyCodeLocationInfoKind::None as u8;
// Skip payload bytes based on location kind
match code {
0..=9 => pos += 1, // Short forms: 1 byte payload
10..=12 => pos += 2, // OneLine forms: 2 bytes payload
13 => {
// NoColumns: signed varint (line delta)
while pos < linetable.len() {
let b = linetable[pos];
pos += 1;
if b & 0x40 == 0 {
break;
}
}
}
14 => {
// Long form: signed varint (line delta) + 3 unsigned varints
// line_delta
while pos < linetable.len() {
let b = linetable[pos];
pos += 1;
if b & 0x40 == 0 {
break;
}
}
// end_line_delta, col+1, end_col+1
for _ in 0..3 {
while pos < linetable.len() {
let b = linetable[pos];
pos += 1;
if b & 0x40 == 0 {
break;
}
}
}
}
15 => {} // None: no payload
_ => {}
}
for _ in 0..length {
if unit_idx < num_units {
mask[unit_idx] = is_no_location;
unit_idx += 1;
}
}
}
mask
}
/// CPython 3.11+ linetable location info codes
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
@@ -270,7 +204,7 @@ impl PyCodeLocationInfoKind {
}
}
pub trait Constant: Sized + Clone {
pub trait Constant: Sized {
type Name: AsRef<str>;
/// Transforms the given Constant to a BorrowedConstant
@@ -292,8 +226,6 @@ impl Constant for ConstantData {
Self::Bytes { value } => Bytes { value },
Self::Code { code } => Code { code },
Self::Tuple { elements } => Tuple { elements },
Self::Slice { elements } => Slice { elements },
Self::Frozenset { elements } => Frozenset { elements },
Self::None => None,
Self::Ellipsis => Ellipsis,
}
@@ -402,12 +334,6 @@ impl<T> IndexMut<oparg::VarNum> for [T] {
}
}
/// Per-slot kind flags for localsplus (co_localspluskinds).
pub const CO_FAST_HIDDEN: u8 = 0x10;
pub const CO_FAST_LOCAL: u8 = 0x20;
pub const CO_FAST_CELL: u8 = 0x40;
pub const CO_FAST_FREE: u8 = 0x80;
/// Primary container of a single code object. Each python function has
/// a code object. Also a module has a code object.
#[derive(Clone)]
@@ -426,14 +352,12 @@ pub struct CodeObject<C: Constant = ConstantData> {
pub obj_name: C::Name,
/// Qualified name of the object (like CPython's co_qualname)
pub qualname: C::Name,
pub cell2arg: Option<Box<[i32]>>,
pub constants: Constants<C>,
pub names: Box<[C::Name]>,
pub varnames: Box<[C::Name]>,
pub cellvars: Box<[C::Name]>,
pub freevars: Box<[C::Name]>,
/// Per-slot kind flags: CO_FAST_LOCAL, CO_FAST_CELL, CO_FAST_FREE, CO_FAST_HIDDEN.
/// Length = nlocalsplus (nlocals + ncells + nfrees).
pub localspluskinds: Box<[u8]>,
/// Line number table (CPython 3.11+ format)
pub linetable: Box<[u8]>,
/// Exception handling table
@@ -635,14 +559,6 @@ impl Deref for CodeUnits {
}
impl CodeUnits {
/// Disable adaptive specialization by setting all counters to unreachable.
/// Used for CPython-compiled bytecode where specialization may not be safe.
pub fn disable_specialization(&self) {
for counter in self.adaptive_counters.iter() {
counter.store(UNREACHABLE_BACKOFF, Ordering::Relaxed);
}
}
/// Replace the opcode at `index` in-place without changing the arg byte.
/// Uses atomic Release store to ensure prior cache writes are visible
/// to threads that subsequently read the new opcode with Acquire.
@@ -851,37 +767,14 @@ impl CodeUnits {
/// ```
#[derive(Debug, Clone)]
pub enum ConstantData {
Tuple {
elements: Vec<ConstantData>,
},
Integer {
value: BigInt,
},
Float {
value: f64,
},
Complex {
value: Complex64,
},
Boolean {
value: bool,
},
Str {
value: Wtf8Buf,
},
Bytes {
value: Vec<u8>,
},
Code {
code: Box<CodeObject>,
},
/// Constant slice(start, stop, step)
Slice {
elements: Box<[ConstantData; 3]>,
},
Frozenset {
elements: Vec<ConstantData>,
},
Tuple { elements: Vec<ConstantData> },
Integer { value: BigInt },
Float { value: f64 },
Complex { value: Complex64 },
Boolean { value: bool },
Str { value: Wtf8Buf },
Bytes { value: Vec<u8> },
Code { code: Box<CodeObject> },
None,
Ellipsis,
}
@@ -903,8 +796,6 @@ impl PartialEq for ConstantData {
(Bytes { value: a }, Bytes { value: b }) => a == b,
(Code { code: a }, Code { code: b }) => core::ptr::eq(a.as_ref(), b.as_ref()),
(Tuple { elements: a }, Tuple { elements: b }) => a == b,
(Slice { elements: a }, Slice { elements: b }) => a == b,
(Frozenset { elements: a }, Frozenset { elements: b }) => a == b,
(None, None) => true,
(Ellipsis, Ellipsis) => true,
_ => false,
@@ -931,8 +822,6 @@ impl hash::Hash for ConstantData {
Bytes { value } => value.hash(state),
Code { code } => core::ptr::hash(code.as_ref(), state),
Tuple { elements } => elements.hash(state),
Slice { elements } => elements.hash(state),
Frozenset { elements } => elements.hash(state),
None => {}
Ellipsis => {}
}
@@ -949,8 +838,6 @@ pub enum BorrowedConstant<'a, C: Constant> {
Bytes { value: &'a [u8] },
Code { code: &'a CodeObject<C> },
Tuple { elements: &'a [C] },
Slice { elements: &'a [C; 3] },
Frozenset { elements: &'a [C] },
None,
Ellipsis,
}
@@ -988,28 +875,6 @@ impl<C: Constant> BorrowedConstant<'_, C> {
}
write!(f, ")")
}
BorrowedConstant::Slice { elements } => {
write!(f, "slice(")?;
elements[0].borrow_constant().fmt_display(f)?;
write!(f, ", ")?;
elements[1].borrow_constant().fmt_display(f)?;
write!(f, ", ")?;
elements[2].borrow_constant().fmt_display(f)?;
write!(f, ")")
}
BorrowedConstant::Frozenset { elements } => {
write!(f, "frozenset({{")?;
let mut first = true;
for c in *elements {
if first {
first = false
} else {
write!(f, ", ")?;
}
c.borrow_constant().fmt_display(f)?;
}
write!(f, "}})")
}
BorrowedConstant::None => write!(f, "None"),
BorrowedConstant::Ellipsis => write!(f, "..."),
}
@@ -1040,15 +905,6 @@ impl<C: Constant> BorrowedConstant<'_, C> {
.map(|c| c.borrow_constant().to_owned())
.collect(),
},
BorrowedConstant::Slice { elements } => Slice {
elements: Box::new(elements.each_ref().map(|c| c.borrow_constant().to_owned())),
},
BorrowedConstant::Frozenset { elements } => Frozenset {
elements: elements
.iter()
.map(|c| c.borrow_constant().to_owned())
.collect(),
},
BorrowedConstant::None => None,
BorrowedConstant::Ellipsis => Ellipsis,
}
@@ -1169,7 +1025,7 @@ impl<C: Constant> CodeObject<C> {
}
// arrow and offset
let arrow = if label_targets.contains(&Label::from_u32(offset as u32)) {
let arrow = if label_targets.contains(&Label::new(offset as u32)) {
">>"
} else {
" "
@@ -1224,7 +1080,7 @@ impl<C: Constant> CodeObject<C> {
kwonlyarg_count: self.kwonlyarg_count,
first_line_number: self.first_line_number,
max_stackdepth: self.max_stackdepth,
localspluskinds: self.localspluskinds,
cell2arg: self.cell2arg,
linetable: self.linetable,
exceptiontable: self.exceptiontable,
}
@@ -1256,7 +1112,7 @@ impl<C: Constant> CodeObject<C> {
kwonlyarg_count: self.kwonlyarg_count,
first_line_number: self.first_line_number,
max_stackdepth: self.max_stackdepth,
localspluskinds: self.localspluskinds.clone(),
cell2arg: self.cell2arg.clone(),
linetable: self.linetable.clone(),
exceptiontable: self.exceptiontable.clone(),
}
@@ -1285,8 +1141,7 @@ pub trait InstrDisplayContext {
fn get_varname(&self, var_num: oparg::VarNum) -> &str;
/// Get name for a localsplus index (used by DEREF instructions).
fn get_localsplus_name(&self, var_num: oparg::VarNum) -> &str;
fn get_cell_name(&self, i: usize) -> &str;
}
impl<C: Constant> InstrDisplayContext for CodeObject<C> {
@@ -1304,18 +1159,11 @@ impl<C: Constant> InstrDisplayContext for CodeObject<C> {
self.varnames[var_num].as_ref()
}
fn get_localsplus_name(&self, var_num: oparg::VarNum) -> &str {
let idx = var_num.as_usize();
let nlocals = self.varnames.len();
if idx < nlocals {
self.varnames[idx].as_ref()
} else {
let cell_idx = idx - nlocals;
self.cellvars
.get(cell_idx)
.unwrap_or_else(|| &self.freevars[cell_idx - self.cellvars.len()])
.as_ref()
}
fn get_cell_name(&self, i: usize) -> &str {
self.cellvars
.get(i)
.unwrap_or_else(|| &self.freevars[i - self.cellvars.len()])
.as_ref()
}
}

View File

@@ -130,7 +130,7 @@ pub enum Instruction {
namei: Arg<NameIdx>,
} = 61,
DeleteDeref {
i: Arg<oparg::VarNum>,
i: Arg<NameIdx>,
} = 62,
DeleteFast {
var_num: Arg<oparg::VarNum>,
@@ -189,7 +189,7 @@ pub enum Instruction {
consti: Arg<oparg::ConstIdx>,
} = 82,
LoadDeref {
i: Arg<oparg::VarNum>,
i: Arg<NameIdx>,
} = 83,
LoadFast {
var_num: Arg<oparg::VarNum>,
@@ -210,7 +210,7 @@ pub enum Instruction {
var_nums: Arg<oparg::VarNums>,
} = 89,
LoadFromDictOrDeref {
i: Arg<oparg::VarNum>,
i: Arg<NameIdx>,
} = 90,
LoadFromDictOrGlobals {
i: Arg<NameIdx>,
@@ -231,7 +231,7 @@ pub enum Instruction {
namei: Arg<LoadSuperAttr>,
} = 96,
MakeCell {
i: Arg<oparg::VarNum>,
i: Arg<NameIdx>,
} = 97,
MapAdd {
i: Arg<u32>,
@@ -273,7 +273,7 @@ pub enum Instruction {
namei: Arg<NameIdx>,
} = 110,
StoreDeref {
i: Arg<oparg::VarNum>,
i: Arg<NameIdx>,
} = 111,
StoreFast {
var_num: Arg<oparg::VarNum>,
@@ -304,7 +304,7 @@ pub enum Instruction {
} = 120,
// CPython 3.14 RESUME (128)
Resume {
context: Arg<oparg::ResumeContext>,
context: Arg<oparg::ResumeType>,
} = 128,
// CPython 3.14 specialized opcodes (129-211)
BinaryOpAddFloat = 129, // Placeholder
@@ -1020,7 +1020,7 @@ impl InstructionMetadata for Instruction {
Self::LoadLocals => (1, 0),
Self::LoadName { .. } => (1, 0),
Self::LoadSmallInt { .. } => (1, 0),
Self::LoadSpecial { .. } => (2, 1),
Self::LoadSpecial { .. } => (1, 1),
Self::LoadSuperAttr { .. } => (1 + (oparg & 1), 3),
Self::LoadSuperAttrAttr => (1, 3),
Self::LoadSuperAttrMethod => (2, 3),
@@ -1085,7 +1085,7 @@ impl InstructionMetadata for Instruction {
Self::UnpackSequenceList => (oparg, 1),
Self::UnpackSequenceTuple => (oparg, 1),
Self::UnpackSequenceTwoTuple => (2, 1),
Self::WithExceptStart => (7, 6),
Self::WithExceptStart => (6, 5),
Self::YieldValue { .. } => (1, 1),
};
@@ -1128,7 +1128,7 @@ impl InstructionMetadata for Instruction {
let varname = |var_num: oparg::VarNum| ctx.get_varname(var_num);
let name = |i: u32| ctx.get_name(i as usize);
let cell_name = |i: oparg::VarNum| ctx.get_localsplus_name(i);
let cell_name = |i: u32| ctx.get_cell_name(i as usize);
let fmt_const = |op: &str,
arg: OpArg,

View File

@@ -276,6 +276,48 @@ impl fmt::Display for ConvertValueOparg {
}
}
/// Resume type for the RESUME instruction
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum ResumeType {
AtFuncStart,
AfterYield,
AfterYieldFrom,
AfterAwait,
Other(u32),
}
impl From<u32> for ResumeType {
fn from(value: u32) -> Self {
match value {
0 => Self::AtFuncStart,
1 => Self::AfterYield,
2 => Self::AfterYieldFrom,
3 => Self::AfterAwait,
_ => Self::Other(value),
}
}
}
impl From<ResumeType> for u32 {
fn from(typ: ResumeType) -> Self {
match typ {
ResumeType::AtFuncStart => 0,
ResumeType::AfterYield => 1,
ResumeType::AfterYieldFrom => 2,
ResumeType::AfterAwait => 3,
ResumeType::Other(v) => v,
}
}
}
impl core::fmt::Display for ResumeType {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
u32::from(*self).fmt(f)
}
}
impl OpArgType for ResumeType {}
pub type NameIdx = u32;
impl OpArgType for u32 {}
@@ -340,20 +382,16 @@ oparg_enum!(
);
bitflagset::bitflag! {
/// `SET_FUNCTION_ATTRIBUTE` flags.
/// Bitmask: Defaults=0x01, KwOnly=0x02, Annotations=0x04,
/// Closure=0x08, TypeParams=0x10, Annotate=0x20.
/// Stored as bit position (0-5) by `bitflag!` macro.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum MakeFunctionFlag {
Defaults = 0,
KwOnlyDefaults = 1,
Annotations = 2,
Closure = 3,
Closure = 0,
Annotations = 1,
KwOnlyDefaults = 2,
Defaults = 3,
TypeParams = 4,
/// PEP 649: __annotate__ function closure (instead of __annotations__ dict)
Annotate = 4,
TypeParams = 5,
Annotate = 5,
}
}
@@ -365,86 +403,33 @@ bitflagset::bitflagset! {
impl TryFrom<u32> for MakeFunctionFlag {
type Error = MarshalError;
/// Decode from CPython-compatible power-of-two value
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
0x01 => Ok(Self::Defaults),
0x02 => Ok(Self::KwOnlyDefaults),
0x04 => Ok(Self::Annotations),
0x08 => Ok(Self::Closure),
0x10 => Ok(Self::Annotate),
0x20 => Ok(Self::TypeParams),
_ => Err(MarshalError::InvalidBytecode),
}
Self::try_from(value as u8).map_err(|_| MarshalError::InvalidBytecode)
}
}
impl From<MakeFunctionFlag> for u32 {
/// Encode as CPython-compatible power-of-two value
fn from(flag: MakeFunctionFlag) -> Self {
1u32 << (flag as u32)
flag as u32
}
}
impl OpArgType for MakeFunctionFlag {}
/// `COMPARE_OP` arg is `(cmp_index << 5) | mask`. Only the upper
/// 3 bits identify the comparison; the lower 5 bits are an inline
/// cache mask for adaptive specialization.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ComparisonOperator {
Less,
LessOrEqual,
Equal,
NotEqual,
Greater,
GreaterOrEqual,
}
impl TryFrom<u8> for ComparisonOperator {
type Error = MarshalError;
fn try_from(value: u8) -> Result<Self, Self::Error> {
Self::try_from(value as u32)
oparg_enum!(
/// The possible comparison operators.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ComparisonOperator {
// be intentional with bits so that we can do eval_ord with just a bitwise and
// bits: | Equal | Greater | Less |
Less = 0b001,
Greater = 0b010,
NotEqual = 0b011,
Equal = 0b100,
LessOrEqual = 0b101,
GreaterOrEqual = 0b110,
}
}
impl TryFrom<u32> for ComparisonOperator {
type Error = MarshalError;
/// Decode from `COMPARE_OP` arg: `(cmp_index << 5) | mask`.
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value >> 5 {
0 => Ok(Self::Less),
1 => Ok(Self::LessOrEqual),
2 => Ok(Self::Equal),
3 => Ok(Self::NotEqual),
4 => Ok(Self::Greater),
5 => Ok(Self::GreaterOrEqual),
_ => Err(MarshalError::InvalidBytecode),
}
}
}
impl From<ComparisonOperator> for u8 {
/// Encode as `cmp_index << 5` (mask bits zero).
fn from(value: ComparisonOperator) -> Self {
match value {
ComparisonOperator::Less => 0,
ComparisonOperator::LessOrEqual => 1 << 5,
ComparisonOperator::Equal => 2 << 5,
ComparisonOperator::NotEqual => 3 << 5,
ComparisonOperator::Greater => 4 << 5,
ComparisonOperator::GreaterOrEqual => 5 << 5,
}
}
}
impl From<ComparisonOperator> for u32 {
fn from(value: ComparisonOperator) -> Self {
Self::from(u8::from(value))
}
}
impl OpArgType for ComparisonOperator {}
);
oparg_enum!(
/// The possible Binary operators
@@ -645,10 +630,6 @@ oparg_enum!(
BuiltinAll = 3,
/// Built-in `any` function
BuiltinAny = 4,
/// Built-in `list` type
BuiltinList = 5,
/// Built-in `set` type
BuiltinSet = 6,
}
);
@@ -660,8 +641,6 @@ impl fmt::Display for CommonConstant {
Self::BuiltinTuple => "tuple",
Self::BuiltinAll => "all",
Self::BuiltinAny => "any",
Self::BuiltinList => "list",
Self::BuiltinSet => "set",
};
write!(f, "{name}")
}
@@ -720,10 +699,16 @@ macro_rules! newtype_oparg {
impl $name {
/// Creates a new [`$name`] instance.
#[must_use]
pub const fn from_u32(value: u32) -> Self {
pub const fn new(value: u32) -> Self {
Self(value)
}
/// Alias to [`$name::new`].
#[must_use]
pub const fn from_u32(value: u32) -> Self {
Self::new(value)
}
/// Returns the oparg as a `u32` value.
#[must_use]
pub const fn as_u32(self) -> u32 {
@@ -801,119 +786,15 @@ newtype_oparg!(
pub struct Label(u32)
);
newtype_oparg!(
/// Context for [`Instruction::Resume`].
///
/// The oparg consists of two parts:
/// 1. [`ResumeContext::location`]: Indicates where the instruction occurs.
/// 2. [`ResumeContext::is_exception_depth1`]: Is the instruction is at except-depth 1.
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct ResumeContext(u32)
);
impl ResumeContext {
/// [CPython `RESUME_OPARG_LOCATION_MASK`](https://github.com/python/cpython/blob/v3.14.3/Include/internal/pycore_opcode_utils.h#L84)
pub const LOCATION_MASK: u32 = 0x3;
/// [CPython `RESUME_OPARG_DEPTH1_MASK`](https://github.com/python/cpython/blob/v3.14.3/Include/internal/pycore_opcode_utils.h#L85)
pub const DEPTH1_MASK: u32 = 0x4;
#[must_use]
pub const fn new(location: ResumeLocation, is_exception_depth1: bool) -> Self {
let value = if is_exception_depth1 {
Self::DEPTH1_MASK
} else {
0
};
Self::from_u32(location.as_u32() | value)
}
/// Resume location is determined by [`Self::LOCATION_MASK`].
#[must_use]
pub fn location(&self) -> ResumeLocation {
// SAFETY: The mask should return a value that is in range.
unsafe { ResumeLocation::try_from(self.as_u32() & Self::LOCATION_MASK).unwrap_unchecked() }
}
/// True if the bit at [`Self::DEPTH1_MASK`] is on.
#[must_use]
pub const fn is_exception_depth1(&self) -> bool {
(self.as_u32() & Self::DEPTH1_MASK) != 0
}
}
#[derive(Copy, Clone)]
pub enum ResumeLocation {
/// At the start of a function, which is neither a generator, coroutine nor an async generator.
AtFuncStart,
/// After a `yield` expression.
AfterYield,
/// After a `yield from` expression.
AfterYieldFrom,
/// After an `await` expression.
AfterAwait,
}
impl From<ResumeLocation> for ResumeContext {
fn from(location: ResumeLocation) -> Self {
Self::new(location, false)
}
}
impl TryFrom<u32> for ResumeLocation {
type Error = MarshalError;
fn try_from(value: u32) -> Result<Self, Self::Error> {
Ok(match value {
0 => Self::AtFuncStart,
1 => Self::AfterYield,
2 => Self::AfterYieldFrom,
3 => Self::AfterAwait,
_ => return Err(Self::Error::InvalidBytecode),
})
}
}
impl ResumeLocation {
#[must_use]
pub const fn as_u8(&self) -> u8 {
match self {
Self::AtFuncStart => 0,
Self::AfterYield => 1,
Self::AfterYieldFrom => 2,
Self::AfterAwait => 3,
}
}
#[must_use]
pub const fn as_u32(&self) -> u32 {
self.as_u8() as u32
}
}
impl From<ResumeLocation> for u8 {
fn from(location: ResumeLocation) -> Self {
location.as_u8()
}
}
impl From<ResumeLocation> for u32 {
fn from(location: ResumeLocation) -> Self {
location.as_u32()
}
}
impl VarNums {
#[must_use]
pub const fn idx_1(self) -> VarNum {
VarNum::from_u32(self.0 >> 4)
VarNum::new(self.0 >> 4)
}
#[must_use]
pub const fn idx_2(self) -> VarNum {
VarNum::from_u32(self.0 & 15)
VarNum::new(self.0 & 15)
}
#[must_use]
@@ -924,8 +805,8 @@ impl VarNums {
impl LoadAttr {
#[must_use]
pub const fn new(name_idx: u32, is_method: bool) -> Self {
Self::from_u32((name_idx << 1) | (is_method as u32))
pub fn builder() -> LoadAttrBuilder {
LoadAttrBuilder::default()
}
#[must_use]
@@ -939,10 +820,36 @@ impl LoadAttr {
}
}
#[derive(Clone, Copy, Default)]
pub struct LoadAttrBuilder {
name_idx: u32,
is_method: bool,
}
impl LoadAttrBuilder {
#[must_use]
pub const fn build(self) -> LoadAttr {
let value = (self.name_idx << 1) | (self.is_method as u32);
LoadAttr::new(value)
}
#[must_use]
pub const fn name_idx(mut self, value: u32) -> Self {
self.name_idx = value;
self
}
#[must_use]
pub const fn is_method(mut self, value: bool) -> Self {
self.is_method = value;
self
}
}
impl LoadSuperAttr {
#[must_use]
pub const fn new(name_idx: u32, is_load_method: bool, has_class: bool) -> Self {
Self::from_u32((name_idx << 2) | (is_load_method as u32) | ((has_class as u32) << 1))
pub fn builder() -> LoadSuperAttrBuilder {
LoadSuperAttrBuilder::default()
}
#[must_use]
@@ -960,3 +867,43 @@ impl LoadSuperAttr {
(self.0 & 2) == 2
}
}
#[derive(Clone, Copy, Default)]
pub struct LoadSuperAttrBuilder {
name_idx: u32,
is_load_method: bool,
has_class: bool,
}
impl LoadSuperAttrBuilder {
#[must_use]
pub const fn build(self) -> LoadSuperAttr {
let value =
(self.name_idx << 2) | ((self.has_class as u32) << 1) | (self.is_load_method as u32);
LoadSuperAttr::new(value)
}
#[must_use]
pub const fn name_idx(mut self, value: u32) -> Self {
self.name_idx = value;
self
}
#[must_use]
pub const fn is_load_method(mut self, value: bool) -> Self {
self.is_load_method = value;
self
}
#[must_use]
pub const fn has_class(mut self, value: bool) -> Self {
self.has_class = value;
self
}
}
impl From<LoadSuperAttrBuilder> for LoadSuperAttr {
fn from(builder: LoadSuperAttrBuilder) -> Self {
builder.build()
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,12 @@
//! Variable-length integer encoding utilities.
//!
//! Two encodings are used:
//! - **Little-endian** (low bits first): linetable
//! - **Big-endian** (high bits first): exception tables
//!
//! Both use 6-bit chunks with 0x40 as the continuation bit.
//! Uses 6-bit chunks with a continuation bit (0x40) to encode integers.
//! Used for exception tables and line number tables.
use alloc::vec::Vec;
/// Write a little-endian varint (used by linetable).
/// Write a variable-length unsigned integer using 6-bit chunks.
/// Returns the number of bytes written.
#[inline]
pub fn write_varint(buf: &mut Vec<u8>, mut val: u32) -> usize {
let start_len = buf.len();
@@ -20,10 +18,12 @@ pub fn write_varint(buf: &mut Vec<u8>, mut val: u32) -> usize {
buf.len() - start_len
}
/// Write a little-endian signed varint.
/// Write a variable-length signed integer.
/// Returns the number of bytes written.
#[inline]
pub fn write_signed_varint(buf: &mut Vec<u8>, val: i32) -> usize {
let uval = if val < 0 {
// (0 - val as u32) handles INT_MIN correctly
((0u32.wrapping_sub(val as u32)) << 1) | 1
} else {
(val as u32) << 1
@@ -31,72 +31,70 @@ pub fn write_signed_varint(buf: &mut Vec<u8>, val: i32) -> usize {
write_varint(buf, uval)
}
/// Write a big-endian varint (used by exception tables).
pub fn write_varint_be(buf: &mut Vec<u8>, val: u32) -> usize {
let start_len = buf.len();
if val >= 1 << 30 {
buf.push(0x40 | ((val >> 30) & 0x3f) as u8);
}
if val >= 1 << 24 {
buf.push(0x40 | ((val >> 24) & 0x3f) as u8);
}
if val >= 1 << 18 {
buf.push(0x40 | ((val >> 18) & 0x3f) as u8);
}
if val >= 1 << 12 {
buf.push(0x40 | ((val >> 12) & 0x3f) as u8);
}
if val >= 1 << 6 {
buf.push(0x40 | ((val >> 6) & 0x3f) as u8);
}
buf.push((val & 0x3f) as u8);
buf.len() - start_len
}
/// Write a big-endian varint with the start marker (0x80) on the first byte.
/// Write a variable-length unsigned integer with a start marker (0x80 bit).
/// Used for exception table entries where each entry starts with the marker.
pub fn write_varint_with_start(data: &mut Vec<u8>, val: u32) {
let start_pos = data.len();
write_varint_be(data, val);
write_varint(data, val);
// Set start bit on first byte
if let Some(first) = data.get_mut(start_pos) {
*first |= 0x80;
}
}
/// Read a big-endian varint with start marker (0x80).
/// Read a variable-length unsigned integer that starts with a start marker (0x80 bit).
/// Returns None if not at a valid start byte or end of data.
pub fn read_varint_with_start(data: &[u8], pos: &mut usize) -> Option<u32> {
if *pos >= data.len() {
return None;
}
let first = data[*pos];
if first & 0x80 == 0 {
return None;
return None; // Not a start byte
}
*pos += 1;
let mut val = (first & 0x3f) as u32;
let mut cont = first & 0x40 != 0;
while cont && *pos < data.len() {
let b = data[*pos];
let mut shift = 6;
let mut has_continuation = first & 0x40 != 0;
while has_continuation && *pos < data.len() {
let byte = data[*pos];
if byte & 0x80 != 0 {
break; // Next entry start
}
*pos += 1;
val = (val << 6) | (b & 0x3f) as u32;
cont = b & 0x40 != 0;
val |= ((byte & 0x3f) as u32) << shift;
shift += 6;
has_continuation = byte & 0x40 != 0;
}
Some(val)
}
/// Read a big-endian varint (no start marker).
/// Read a variable-length unsigned integer.
/// Returns None if end of data or malformed.
pub fn read_varint(data: &[u8], pos: &mut usize) -> Option<u32> {
if *pos >= data.len() {
return None;
}
let first = data[*pos];
*pos += 1;
let mut val = (first & 0x3f) as u32;
let mut cont = first & 0x40 != 0;
while cont && *pos < data.len() {
let b = data[*pos];
let mut val = 0u32;
let mut shift = 0;
loop {
if *pos >= data.len() {
return None;
}
let byte = data[*pos];
if byte & 0x80 != 0 && shift > 0 {
break; // Next entry start
}
*pos += 1;
val = (val << 6) | (b & 0x3f) as u32;
cont = b & 0x40 != 0;
val |= ((byte & 0x3f) as u32) << shift;
shift += 6;
if byte & 0x40 == 0 {
break;
}
}
Some(val)
}
@@ -106,39 +104,37 @@ mod tests {
use super::*;
#[test]
fn test_le_varint_roundtrip() {
// Little-endian is only used internally in linetable,
// no read function needed outside of linetable parsing.
fn test_write_read_varint() {
let mut buf = Vec::new();
write_varint(&mut buf, 0);
write_varint(&mut buf, 63);
write_varint(&mut buf, 64);
write_varint(&mut buf, 4095);
// Values: 0, 63, 64, 4095
assert_eq!(buf.len(), 1 + 1 + 2 + 2);
}
#[test]
fn test_be_varint_roundtrip() {
for &val in &[0u32, 1, 63, 64, 127, 128, 4095, 4096, 1_000_000] {
let mut buf = Vec::new();
write_varint_be(&mut buf, val);
let mut pos = 0;
assert_eq!(read_varint(&buf, &mut pos), Some(val), "val={val}");
assert_eq!(pos, buf.len());
}
fn test_write_read_signed_varint() {
let mut buf = Vec::new();
write_signed_varint(&mut buf, 0);
write_signed_varint(&mut buf, 1);
write_signed_varint(&mut buf, -1);
write_signed_varint(&mut buf, i32::MIN);
assert!(!buf.is_empty());
}
#[test]
fn test_be_varint_with_start() {
fn test_varint_with_start() {
let mut buf = Vec::new();
write_varint_with_start(&mut buf, 42);
write_varint_with_start(&mut buf, 100);
write_varint_with_start(&mut buf, 71);
let mut pos = 0;
assert_eq!(read_varint_with_start(&buf, &mut pos), Some(42));
assert_eq!(read_varint_with_start(&buf, &mut pos), Some(100));
assert_eq!(read_varint_with_start(&buf, &mut pos), Some(71));
assert_eq!(read_varint_with_start(&buf, &mut pos), None);
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "rustpython-compiler-source"
description = "(DEPRECATED) RustPython Source and Index"
version = "0.4.1+deprecated"
version = "0.5.0+deprecated"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true

View File

@@ -1,6 +1,5 @@
[package]
name = "rustpython-doc"
description = "Python __doc__ database for RustPython"
version.workspace = true
authors.workspace = true
edition.workspace = true

View File

@@ -17,9 +17,9 @@ num-traits = { workspace = true }
thiserror = { workspace = true }
libffi = { workspace = true }
cranelift = "0.130.0"
cranelift-jit = "0.130.0"
cranelift-module = "0.130.0"
cranelift = "0.129.1"
cranelift-jit = "0.129.1"
cranelift-module = "0.129.1"
[dev-dependencies]
rustpython-derive = { workspace = true }

View File

@@ -162,7 +162,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
let target = after
.checked_add(u32::from(arg))
.ok_or(JitCompileError::BadBytecode)?;
Ok(Label::from_u32(target))
Ok(Label::new(target))
}
fn jump_target_backward(
@@ -177,7 +177,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
let target = after
.checked_sub(u32::from(arg))
.ok_or(JitCompileError::BadBytecode)?;
Ok(Label::from_u32(target))
Ok(Label::new(target))
}
fn instruction_target(
@@ -232,7 +232,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
let mut in_unreachable_code = false;
for (offset, &raw_instr) in clean_instructions.iter().enumerate() {
let label = Label::from_u32(offset as u32);
let label = Label::new(offset as u32);
let (instruction, arg) = arg_state.get(raw_instr);
// If this is a label that some earlier jump can target,
@@ -624,10 +624,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
_ => Err(JitCompileError::NotSupported),
}
}
Instruction::ExtendedArg
| Instruction::Cache
| Instruction::MakeCell { .. }
| Instruction::CopyFreeVars { .. } => Ok(()),
Instruction::ExtendedArg | Instruction::Cache => Ok(()),
Instruction::JumpBackward { .. }
| Instruction::JumpBackwardNoInterrupt { .. }
@@ -736,28 +733,6 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
let val = self.stack.pop().ok_or(JitCompileError::BadBytecode)?;
self.store_variable(var_num.get(arg), val)
}
Instruction::StoreFastLoadFast { var_nums } => {
let oparg = var_nums.get(arg);
let (store_idx, load_idx) = oparg.indexes();
let val = self.stack.pop().ok_or(JitCompileError::BadBytecode)?;
self.store_variable(store_idx, val)?;
let local = self.variables[load_idx]
.as_ref()
.ok_or(JitCompileError::BadBytecode)?;
self.stack.push(JitValue::from_type_and_value(
local.ty.clone(),
self.builder.use_var(local.var),
));
Ok(())
}
Instruction::StoreFastStoreFast { var_nums } => {
let oparg = var_nums.get(arg);
let (idx1, idx2) = oparg.indexes();
let val1 = self.stack.pop().ok_or(JitCompileError::BadBytecode)?;
self.store_variable(idx1, val1)?;
let val2 = self.stack.pop().ok_or(JitCompileError::BadBytecode)?;
self.store_variable(idx2, val2)
}
Instruction::Swap { i: index } => {
let len = self.stack.len();
let i = len - 1;

View File

@@ -42,7 +42,6 @@ impl Function {
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
enum StackValue {
String(String),
@@ -50,8 +49,6 @@ enum StackValue {
Map(HashMap<Wtf8Buf, StackValue>),
Code(Box<CodeObject>),
Function(Function),
Slice(Box<[StackValue; 3]>),
Frozenset(Vec<StackValue>),
}
impl From<ConstantData> for StackValue {
@@ -62,13 +59,6 @@ impl From<ConstantData> for StackValue {
}
ConstantData::None => StackValue::None,
ConstantData::Code { code } => StackValue::Code(code),
ConstantData::Slice { elements } => {
let [start, stop, step] = *elements;
StackValue::Slice(Box::new([start.into(), stop.into(), step.into()]))
}
ConstantData::Frozenset { elements } => {
StackValue::Frozenset(elements.into_iter().map(Into::into).collect())
}
c => unimplemented!("constant {:?} isn't yet supported in py_function!", c),
}
}

View File

@@ -9,13 +9,13 @@ license = { workspace = true }
rust-version = { workspace = true }
[dependencies]
rustpython-unicode = { workspace = true, default-features = false }
rustpython-wtf8 = { workspace = true }
hexf-parse = "0.2.1"
is-macro.workspace = true
lexical-parse-float = { version = "1.0.6", features = ["format"] }
num-traits = { workspace = true }
unic-ucd-category = { workspace = true }
[dev-dependencies]
rand = { workspace = true }

View File

@@ -0,0 +1,15 @@
use unic_ucd_category::GeneralCategory;
/// According to python following categories aren't printable:
/// * Cc (Other, Control)
/// * Cf (Other, Format)
/// * Cs (Other, Surrogate)
/// * Co (Other, Private Use)
/// * Cn (Other, Not Assigned)
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
/// * Zs (Separator, Space) other than ASCII space('\x20').
pub fn is_printable(c: char) -> bool {
let cat = GeneralCategory::of(c);
!(cat.is_other() || cat.is_separator())
}

View File

@@ -204,7 +204,7 @@ impl UnicodeEscape<'_> {
'\\' | '\t' | '\r' | '\n' => 2,
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
ch if ch.is_ascii() => 1,
ch if rustpython_unicode::classify::is_repr_printable(ch as u32) => {
ch if crate::char::is_printable(ch) => {
// max = std::cmp::max(ch, max);
ch.len_utf8()
}
@@ -238,9 +238,7 @@ impl UnicodeEscape<'_> {
ch if ch.is_ascii() => {
write!(formatter, "\\x{:02x}", ch as u8)
}
ch if rustpython_unicode::classify::is_repr_printable(ch as u32) => {
formatter.write_char(ch)
}
ch if crate::char::is_printable(ch) => formatter.write_char(ch),
'\0'..='\u{ff}' => {
write!(formatter, "\\x{:02x}", ch as u32)
}

View File

@@ -2,6 +2,7 @@
extern crate alloc;
pub mod char;
pub mod complex;
pub mod escape;
pub mod float;

View File

@@ -2,7 +2,7 @@
name = "rustpython-pylib"
description = "A subset of the Python standard library for use with RustPython"
license-file = "Lib/PSF-LICENSE"
include = ["Cargo.toml", "build.rs", "src/**/*.rs", "Lib/", "!Lib/**/test/", "!Lib/**/*.pyc"]
include = ["Cargo.toml", "src/**/*.rs", "Lib/", "!Lib/**/test/", "!Lib/**/*.pyc"]
authors = ["CPython Developers"]
version.workspace = true
edition.workspace = true

View File

@@ -15,7 +15,6 @@ name = "benches"
harness = false
[dependencies]
rustpython-unicode = { workspace = true, default-features = false }
rustpython-wtf8 = { workspace = true }
num_enum = { workspace = true }
bitflags = { workspace = true }

View File

@@ -1,10 +1,14 @@
// good luck to those that follow; here be dragons
use crate::string::{
is_digit, is_linebreak, is_loc_word, is_space, is_uni_digit, is_uni_linebreak, is_uni_space,
is_uni_word, is_word, lower_ascii, lower_locate, lower_unicode, upper_locate, upper_unicode,
};
use super::{MAXREPEAT, SreAtCode, SreCatCode, SreInfo, SreOpcode, StrDrive, StringCursor};
use alloc::{vec, vec::Vec};
use core::{convert::TryFrom, ptr::null};
use optional::Optioned;
use rustpython_unicode::regex as unicode_regex;
#[derive(Debug, Clone, Copy)]
pub struct Request<'a, S> {
@@ -655,10 +659,10 @@ fn _match<S: StrDrive>(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo
}
SreOpcode::IN => general_op_in!(charset),
SreOpcode::IN_IGNORE => {
general_op_in!(|set, c| charset(set, unicode_regex::lower_ascii(c)))
general_op_in!(|set, c| charset(set, lower_ascii(c)))
}
SreOpcode::IN_UNI_IGNORE => {
general_op_in!(|set, c| charset(set, unicode_regex::lower_unicode(c)))
general_op_in!(|set, c| charset(set, lower_unicode(c)))
}
SreOpcode::IN_LOC_IGNORE => general_op_in!(charset_loc_ignore),
SreOpcode::MARK => {
@@ -799,31 +803,25 @@ fn _match<S: StrDrive>(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo
SreOpcode::LITERAL => general_op_literal!(|code, c| code == c),
SreOpcode::NOT_LITERAL => general_op_literal!(|code, c| code != c),
SreOpcode::LITERAL_IGNORE => {
general_op_literal!(|code, c| code == unicode_regex::lower_ascii(c))
general_op_literal!(|code, c| code == lower_ascii(c))
}
SreOpcode::NOT_LITERAL_IGNORE => {
general_op_literal!(|code, c| code != unicode_regex::lower_ascii(c))
general_op_literal!(|code, c| code != lower_ascii(c))
}
SreOpcode::LITERAL_UNI_IGNORE => {
general_op_literal!(|code, c| code == unicode_regex::lower_unicode(c))
general_op_literal!(|code, c| code == lower_unicode(c))
}
SreOpcode::NOT_LITERAL_UNI_IGNORE => {
general_op_literal!(|code, c| code != unicode_regex::lower_unicode(c))
general_op_literal!(|code, c| code != lower_unicode(c))
}
SreOpcode::LITERAL_LOC_IGNORE => general_op_literal!(char_loc_ignore),
SreOpcode::NOT_LITERAL_LOC_IGNORE => {
general_op_literal!(|code, c| !char_loc_ignore(code, c))
}
SreOpcode::GROUPREF => general_op_groupref!(|x| x),
SreOpcode::GROUPREF_IGNORE => {
general_op_groupref!(unicode_regex::lower_ascii)
}
SreOpcode::GROUPREF_LOC_IGNORE => {
general_op_groupref!(unicode_regex::lower_locale)
}
SreOpcode::GROUPREF_UNI_IGNORE => {
general_op_groupref!(unicode_regex::lower_unicode)
}
SreOpcode::GROUPREF_IGNORE => general_op_groupref!(lower_ascii),
SreOpcode::GROUPREF_LOC_IGNORE => general_op_groupref!(lower_locate),
SreOpcode::GROUPREF_UNI_IGNORE => general_op_groupref!(lower_unicode),
SreOpcode::GROUPREF_EXISTS => {
let (group_start, group_end) =
state.marks.get(ctx.peek_code(req, 1) as usize);
@@ -1127,7 +1125,7 @@ impl MatchContext {
}
fn at_linebreak<S: StrDrive>(&self, req: &Request<'_, S>) -> bool {
!self.at_end(req) && unicode_regex::is_linebreak(self.peek_char::<S>())
!self.at_end(req) && is_linebreak(self.peek_char::<S>())
}
fn at_boundary<S: StrDrive, F: FnMut(u32) -> bool>(
@@ -1194,56 +1192,54 @@ impl MatchContext {
fn at<S: StrDrive>(req: &Request<'_, S>, ctx: &MatchContext, at_code: SreAtCode) -> bool {
match at_code {
SreAtCode::BEGINNING | SreAtCode::BEGINNING_STRING => ctx.at_beginning(),
SreAtCode::BEGINNING_LINE => {
ctx.at_beginning() || unicode_regex::is_linebreak(ctx.back_peek_char::<S>())
}
SreAtCode::BOUNDARY => ctx.at_boundary(req, unicode_regex::is_word),
SreAtCode::NON_BOUNDARY => ctx.at_non_boundary(req, unicode_regex::is_word),
SreAtCode::BEGINNING_LINE => ctx.at_beginning() || is_linebreak(ctx.back_peek_char::<S>()),
SreAtCode::BOUNDARY => ctx.at_boundary(req, is_word),
SreAtCode::NON_BOUNDARY => ctx.at_non_boundary(req, is_word),
SreAtCode::END => {
(ctx.remaining_chars(req) == 1 && ctx.at_linebreak(req)) || ctx.at_end(req)
}
SreAtCode::END_LINE => ctx.at_linebreak(req) || ctx.at_end(req),
SreAtCode::END_STRING => ctx.at_end(req),
SreAtCode::LOC_BOUNDARY => ctx.at_boundary(req, unicode_regex::is_locale_word),
SreAtCode::LOC_NON_BOUNDARY => ctx.at_non_boundary(req, unicode_regex::is_locale_word),
SreAtCode::UNI_BOUNDARY => ctx.at_boundary(req, unicode_regex::is_unicode_word),
SreAtCode::UNI_NON_BOUNDARY => ctx.at_non_boundary(req, unicode_regex::is_unicode_word),
SreAtCode::LOC_BOUNDARY => ctx.at_boundary(req, is_loc_word),
SreAtCode::LOC_NON_BOUNDARY => ctx.at_non_boundary(req, is_loc_word),
SreAtCode::UNI_BOUNDARY => ctx.at_boundary(req, is_uni_word),
SreAtCode::UNI_NON_BOUNDARY => ctx.at_non_boundary(req, is_uni_word),
}
}
fn char_loc_ignore(code: u32, c: u32) -> bool {
code == c || code == unicode_regex::lower_locale(c) || code == unicode_regex::upper_locale(c)
code == c || code == lower_locate(c) || code == upper_locate(c)
}
fn charset_loc_ignore(set: &[u32], c: u32) -> bool {
let lo = unicode_regex::lower_locale(c);
let lo = lower_locate(c);
if charset(set, c) {
return true;
}
let up = unicode_regex::upper_locale(c);
let up = upper_locate(c);
up != lo && charset(set, up)
}
fn category(cat_code: SreCatCode, c: u32) -> bool {
match cat_code {
SreCatCode::DIGIT => unicode_regex::is_digit(c),
SreCatCode::NOT_DIGIT => !unicode_regex::is_digit(c),
SreCatCode::SPACE => unicode_regex::is_space(c),
SreCatCode::NOT_SPACE => !unicode_regex::is_space(c),
SreCatCode::WORD => unicode_regex::is_word(c),
SreCatCode::NOT_WORD => !unicode_regex::is_word(c),
SreCatCode::LINEBREAK => unicode_regex::is_linebreak(c),
SreCatCode::NOT_LINEBREAK => !unicode_regex::is_linebreak(c),
SreCatCode::LOC_WORD => unicode_regex::is_locale_word(c),
SreCatCode::LOC_NOT_WORD => !unicode_regex::is_locale_word(c),
SreCatCode::UNI_DIGIT => unicode_regex::is_unicode_digit(c),
SreCatCode::UNI_NOT_DIGIT => !unicode_regex::is_unicode_digit(c),
SreCatCode::UNI_SPACE => unicode_regex::is_unicode_space(c),
SreCatCode::UNI_NOT_SPACE => !unicode_regex::is_unicode_space(c),
SreCatCode::UNI_WORD => unicode_regex::is_unicode_word(c),
SreCatCode::UNI_NOT_WORD => !unicode_regex::is_unicode_word(c),
SreCatCode::UNI_LINEBREAK => unicode_regex::is_unicode_linebreak(c),
SreCatCode::UNI_NOT_LINEBREAK => !unicode_regex::is_unicode_linebreak(c),
SreCatCode::DIGIT => is_digit(c),
SreCatCode::NOT_DIGIT => !is_digit(c),
SreCatCode::SPACE => is_space(c),
SreCatCode::NOT_SPACE => !is_space(c),
SreCatCode::WORD => is_word(c),
SreCatCode::NOT_WORD => !is_word(c),
SreCatCode::LINEBREAK => is_linebreak(c),
SreCatCode::NOT_LINEBREAK => !is_linebreak(c),
SreCatCode::LOC_WORD => is_loc_word(c),
SreCatCode::LOC_NOT_WORD => !is_loc_word(c),
SreCatCode::UNI_DIGIT => is_uni_digit(c),
SreCatCode::UNI_NOT_DIGIT => !is_uni_digit(c),
SreCatCode::UNI_SPACE => is_uni_space(c),
SreCatCode::UNI_NOT_SPACE => !is_uni_space(c),
SreCatCode::UNI_WORD => is_uni_word(c),
SreCatCode::UNI_NOT_WORD => !is_uni_word(c),
SreCatCode::UNI_LINEBREAK => is_uni_linebreak(c),
SreCatCode::UNI_NOT_LINEBREAK => !is_uni_linebreak(c),
}
}
@@ -1324,7 +1320,7 @@ fn charset(set: &[u32], ch: u32) -> bool {
if set[i + 1] <= ch && ch <= set[i + 2] {
return ok;
}
let ch = unicode_regex::upper_unicode(ch);
let ch = upper_unicode(ch);
if set[i + 1] <= ch && ch <= set[i + 2] {
return ok;
}
@@ -1372,14 +1368,10 @@ fn _count<S: StrDrive>(
general_count_literal(req, ctx, end, |code, c| code != c);
}
SreOpcode::LITERAL_IGNORE => {
general_count_literal(req, ctx, end, |code, c| {
code == unicode_regex::lower_ascii(c)
});
general_count_literal(req, ctx, end, |code, c| code == lower_ascii(c));
}
SreOpcode::NOT_LITERAL_IGNORE => {
general_count_literal(req, ctx, end, |code, c| {
code != unicode_regex::lower_ascii(c)
});
general_count_literal(req, ctx, end, |code, c| code != lower_ascii(c));
}
SreOpcode::LITERAL_LOC_IGNORE => {
general_count_literal(req, ctx, end, char_loc_ignore);
@@ -1388,14 +1380,10 @@ fn _count<S: StrDrive>(
general_count_literal(req, ctx, end, |code, c| !char_loc_ignore(code, c));
}
SreOpcode::LITERAL_UNI_IGNORE => {
general_count_literal(req, ctx, end, |code, c| {
code == unicode_regex::lower_unicode(c)
});
general_count_literal(req, ctx, end, |code, c| code == lower_unicode(c));
}
SreOpcode::NOT_LITERAL_UNI_IGNORE => {
general_count_literal(req, ctx, end, |code, c| {
code != unicode_regex::lower_unicode(c)
});
general_count_literal(req, ctx, end, |code, c| code != lower_unicode(c));
}
_ => {
/* General case */

View File

@@ -331,3 +331,136 @@ const fn utf8_is_cont_byte(byte: u8) -> bool {
/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;
const fn is_py_ascii_whitespace(b: u8) -> bool {
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
}
#[inline]
pub(crate) fn is_word(ch: u32) -> bool {
ch == '_' as u32
|| u8::try_from(ch)
.map(|x| x.is_ascii_alphanumeric())
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_space(ch: u32) -> bool {
u8::try_from(ch)
.map(is_py_ascii_whitespace)
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_digit(ch: u32) -> bool {
u8::try_from(ch)
.map(|x| x.is_ascii_digit())
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_loc_alnum(ch: u32) -> bool {
// FIXME: Ignore the locales
u8::try_from(ch)
.map(|x| x.is_ascii_alphanumeric())
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_loc_word(ch: u32) -> bool {
ch == '_' as u32 || is_loc_alnum(ch)
}
#[inline]
pub(crate) const fn is_linebreak(ch: u32) -> bool {
ch == '\n' as u32
}
#[inline]
pub fn lower_ascii(ch: u32) -> u32 {
u8::try_from(ch)
.map(|x| x.to_ascii_lowercase() as u32)
.unwrap_or(ch)
}
#[inline]
pub(crate) fn lower_locate(ch: u32) -> u32 {
// FIXME: Ignore the locales
lower_ascii(ch)
}
#[inline]
pub(crate) fn upper_locate(ch: u32) -> u32 {
// FIXME: Ignore the locales
u8::try_from(ch)
.map(|x| x.to_ascii_uppercase() as u32)
.unwrap_or(ch)
}
#[inline]
pub(crate) fn is_uni_digit(ch: u32) -> bool {
// TODO: check with cpython
char::try_from(ch)
.map(|x| x.is_ascii_digit())
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_uni_space(ch: u32) -> bool {
// TODO: check with cpython
is_space(ch)
|| matches!(
ch,
0x0009
| 0x000A
| 0x000B
| 0x000C
| 0x000D
| 0x001C
| 0x001D
| 0x001E
| 0x001F
| 0x0020
| 0x0085
| 0x00A0
| 0x1680
| 0x2000
| 0x2001
| 0x2002
| 0x2003
| 0x2004
| 0x2005
| 0x2006
| 0x2007
| 0x2008
| 0x2009
| 0x200A
| 0x2028
| 0x2029
| 0x202F
| 0x205F
| 0x3000
)
}
#[inline]
pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
matches!(
ch,
0x000A | 0x000B | 0x000C | 0x000D | 0x001C | 0x001D | 0x001E | 0x0085 | 0x2028 | 0x2029
)
}
#[inline]
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
// TODO: check with cpython
char::try_from(ch)
.map(|x| x.is_alphanumeric())
.unwrap_or(false)
}
#[inline]
pub(crate) fn is_uni_word(ch: u32) -> bool {
ch == '_' as u32 || is_uni_alnum(ch)
}
#[inline]
pub fn lower_unicode(ch: u32) -> u32 {
// TODO: check with cpython
char::try_from(ch)
.map(|x| x.to_lowercase().next().unwrap() as u32)
.unwrap_or(ch)
}
#[inline]
pub fn upper_unicode(ch: u32) -> u32 {
// TODO: check with cpython
char::try_from(ch)
.map(|x| x.to_uppercase().next().unwrap() as u32)
.unwrap_or(ch)
}

View File

@@ -28,7 +28,6 @@ flame-it = ["flame"]
[dependencies]
# rustpython crates
rustpython-derive = { workspace = true }
rustpython-unicode = { workspace = true, features = ["casefold"] }
rustpython-vm = { workspace = true, default-features = false, features = ["compiler"]}
rustpython-common = { workspace = true }
@@ -77,6 +76,16 @@ pbkdf2 = { version = "0.12", features = ["hmac"] }
constant_time_eq = { workspace = true }
## unicode stuff
unicode_names2 = { workspace = true }
# update version all at the same time
unic-char-property = { workspace = true }
unic-normal = { workspace = true }
unic-ucd-bidi = { workspace = true }
unic-ucd-category = { workspace = true }
unic-ucd-age = { workspace = true }
ucd = "0.1.1"
unicode-bidi-mirroring = { workspace = true }
# compression
adler32 = "1.2.0"
crc32fast = "1.3.2"
@@ -125,12 +134,12 @@ x509-parser = { version = "0.18", optional = true }
der = { version = "0.7", features = ["alloc", "oid"], optional = true }
pem-rfc7468 = { version = "1.0", features = ["alloc"], optional = true }
webpki-roots = { version = "1.0", optional = true }
aws-lc-rs = { version = "1.16.2", optional = true }
aws-lc-rs = { version = "1.16.0", optional = true }
oid-registry = { version = "0.8", features = ["x509", "pkcs1", "nist_algs"], optional = true }
pkcs8 = { version = "0.10", features = ["encryption", "pkcs5", "pem"], optional = true }
[target.'cfg(not(any(target_os = "android", target_arch = "wasm32")))'.dependencies]
libsqlite3-sys = { version = "0.37", features = ["bundled"], optional = true }
libsqlite3-sys = { version = "0.36", features = ["bundled"], optional = true }
liblzma = "0.4"
liblzma-sys = "0.4"

View File

@@ -1204,7 +1204,7 @@ mod mmap {
// Check if this is a Named mmap - these cannot be resized
if let Some(MmapObj::Named(_)) = mmap_guard.as_ref() {
return Err(vm.new_os_error("mmap: cannot resize a named memory mapping"));
return Err(vm.new_system_error("mmap: cannot resize a named memory mapping"));
}
let is_anonymous = handle == INVALID_HANDLE_VALUE as isize;

View File

@@ -6,30 +6,55 @@
pub(crate) use unicodedata::module_def;
use crate::vm::{
PyObject, PyResult, VirtualMachine, builtins::PyStr, convert::TryFromBorrowedObject,
};
enum NormalizeForm {
Nfc,
Nfkc,
Nfd,
Nfkd,
}
impl<'a> TryFromBorrowedObject<'a> for NormalizeForm {
fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
obj.try_value_with(
|form: &PyStr| match form.as_bytes() {
b"NFC" => Ok(Self::Nfc),
b"NFKC" => Ok(Self::Nfkc),
b"NFD" => Ok(Self::Nfd),
b"NFKD" => Ok(Self::Nfkd),
_ => Err(vm.new_value_error("invalid normalization form")),
},
vm,
)
}
}
#[pymodule]
mod unicodedata {
use super::NormalizeForm::*;
use crate::vm::{
Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
builtins::{PyModule, PyStrRef},
function::OptionalArg,
};
use itertools::Itertools;
use rustpython_common::wtf8::{CodePoint, Wtf8Buf};
use rustpython_unicode::{NormalizeForm, UNICODE_VERSION, UnicodeVersion, data};
fn parse_normalize_form(form: PyStrRef, vm: &VirtualMachine) -> PyResult<NormalizeForm> {
form.to_str()
.ok_or_else(|| vm.new_value_error("invalid normalization form"))?
.parse()
.map_err(|()| vm.new_value_error("invalid normalization form"))
}
use ucd::{Codepoint, DecompositionType, EastAsianWidth, Number, NumericType};
use unic_char_property::EnumeratedCharProperty;
use unic_normal::StrNormalForm;
use unic_ucd_age::{Age, UNICODE_VERSION, UnicodeVersion};
use unic_ucd_bidi::BidiClass;
use unic_ucd_category::GeneralCategory;
use unicode_bidi_mirroring::is_mirroring;
pub(crate) fn module_exec(vm: &VirtualMachine, module: &Py<PyModule>) -> PyResult<()> {
__module_exec(vm, module);
// Add UCD methods as module-level functions
let ucd: PyObjectRef = PyUcd::new(data::Ucd::default()).into_ref(&vm.ctx).into();
let ucd: PyObjectRef = Ucd::new(UNICODE_VERSION).into_ref(&vm.ctx).into();
for attr in [
"category",
@@ -55,40 +80,56 @@ mod unicodedata {
#[pyattr]
#[pyclass(name = "UCD")]
#[derive(Debug, PyPayload)]
pub(super) struct PyUcd(data::Ucd);
pub(super) struct Ucd {
unic_version: UnicodeVersion,
}
impl PyUcd {
pub const fn new(ucd: data::Ucd) -> Self {
Self(ucd)
impl Ucd {
pub const fn new(unic_version: UnicodeVersion) -> Self {
Self { unic_version }
}
fn extract_char(character: PyStrRef, vm: &VirtualMachine) -> PyResult<CodePoint> {
character
fn check_age(&self, c: CodePoint) -> bool {
c.to_char()
.is_none_or(|c| Age::of(c).is_some_and(|age| age.actual() <= self.unic_version))
}
fn extract_char(
&self,
character: PyStrRef,
vm: &VirtualMachine,
) -> PyResult<Option<CodePoint>> {
let c = character
.as_wtf8()
.code_points()
.exactly_one()
.map_err(|_| vm.new_type_error("argument must be a Unicode character, not str"))
.map_err(|_| vm.new_type_error("argument must be an unicode character, not str"))?;
Ok(self.check_age(c).then_some(c))
}
}
#[pyclass(flags(DISALLOW_INSTANTIATION))]
impl PyUcd {
impl Ucd {
#[pymethod]
fn category(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
Ok(self
.0
.category(Self::extract_char(character, vm)?.to_u32())
.extract_char(character, vm)?
.map_or(GeneralCategory::Unassigned, |c| {
c.to_char()
.map_or(GeneralCategory::Surrogate, GeneralCategory::of)
})
.abbr_name()
.to_owned())
}
#[pymethod]
fn lookup(&self, name: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
if let Some(name_str) = name.to_str()
&& let Some(character) = self.0.lookup(name_str)
&& let Some(character) = unicode_names2::character(name_str)
&& self.check_age(character.into())
{
return Ok(char::from_u32(character)
.expect("unicode_names2 only returns Unicode scalar values")
.to_string());
return Ok(character.to_string());
}
Err(vm.new_key_error(
vm.ctx
@@ -104,8 +145,13 @@ mod unicodedata {
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyResult {
if let Some(name) = self.0.name(Self::extract_char(character, vm)?.to_u32()) {
return Ok(vm.ctx.new_str(name).into());
let c = self.extract_char(character, vm)?;
if let Some(c) = c
&& self.check_age(c)
&& let Some(name) = c.to_char().and_then(unicode_names2::name)
{
return Ok(vm.ctx.new_str(name.to_string()).into());
}
default.ok_or_else(|| vm.new_value_error("no such name"))
}
@@ -116,9 +162,14 @@ mod unicodedata {
character: PyStrRef,
vm: &VirtualMachine,
) -> PyResult<&'static str> {
Ok(self
.0
.bidirectional(Self::extract_char(character, vm)?.to_u32()))
let bidi = match self.extract_char(character, vm)? {
Some(c) => c
.to_char()
.map_or(BidiClass::LeftToRight, BidiClass::of)
.abbr_name(),
None => "",
};
Ok(bidi)
}
/// NOTE: This function uses 9.0.0 database instead of 3.2.0
@@ -129,51 +180,76 @@ mod unicodedata {
vm: &VirtualMachine,
) -> PyResult<&'static str> {
Ok(self
.0
.east_asian_width(Self::extract_char(character, vm)?.to_u32()))
.extract_char(character, vm)?
.and_then(|c| c.to_char())
.map_or(EastAsianWidth::Neutral, |c| c.east_asian_width())
.abbr_name())
}
#[pymethod]
fn normalize(
&self,
form: PyStrRef,
unistr: PyStrRef,
vm: &VirtualMachine,
) -> PyResult<Wtf8Buf> {
Ok(self
.0
.normalize(parse_normalize_form(form, vm)?, unistr.as_wtf8()))
fn normalize(&self, form: super::NormalizeForm, unistr: PyStrRef) -> PyResult<Wtf8Buf> {
let text = unistr.as_wtf8();
let normalized_text = match form {
Nfc => text.map_utf8(|s| s.nfc()).collect(),
Nfkc => text.map_utf8(|s| s.nfkc()).collect(),
Nfd => text.map_utf8(|s| s.nfd()).collect(),
Nfkd => text.map_utf8(|s| s.nfkd()).collect(),
};
Ok(normalized_text)
}
#[pymethod]
fn is_normalized(
&self,
form: PyStrRef,
unistr: PyStrRef,
vm: &VirtualMachine,
) -> PyResult<bool> {
Ok(self
.0
.is_normalized(parse_normalize_form(form, vm)?, unistr.as_wtf8()))
fn is_normalized(&self, form: super::NormalizeForm, unistr: PyStrRef) -> PyResult<bool> {
let text = unistr.as_wtf8();
let normalized: Wtf8Buf = match form {
Nfc => text.map_utf8(|s| s.nfc()).collect(),
Nfkc => text.map_utf8(|s| s.nfkc()).collect(),
Nfd => text.map_utf8(|s| s.nfd()).collect(),
Nfkd => text.map_utf8(|s| s.nfkd()).collect(),
};
Ok(text == &*normalized)
}
#[pymethod]
fn mirrored(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<i32> {
Ok(self.0.mirrored(Self::extract_char(character, vm)?.to_u32()) as i32)
match self.extract_char(character, vm)? {
Some(c) => {
if let Some(ch) = c.to_char() {
// Check if the character is mirrored in bidirectional text using Unicode standard
Ok(if is_mirroring(ch) { 1 } else { 0 })
} else {
Ok(0)
}
}
None => Ok(0),
}
}
#[pymethod]
fn combining(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<u8> {
fn combining(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<i32> {
Ok(self
.0
.combining(Self::extract_char(character, vm)?.to_u32()))
.extract_char(character, vm)?
.and_then(|c| c.to_char())
.map_or(0, |ch| ch.canonical_combining_class() as i32))
}
#[pymethod]
fn decomposition(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
Ok(self
.0
.decomposition(Self::extract_char(character, vm)?.to_u32()))
let ch = match self.extract_char(character, vm)?.and_then(|c| c.to_char()) {
Some(ch) => ch,
None => return Ok(String::new()),
};
let chars: Vec<char> = ch.decomposition_map().collect();
// If decomposition maps to just the character itself, there's no decomposition
if chars.len() == 1 && chars[0] == ch {
return Ok(String::new());
}
let hex_parts = chars.iter().map(|c| format!("{:04X}", *c as u32)).join(" ");
let tag = match ch.decomposition_type() {
Some(DecompositionType::Canonical) | None => return Ok(hex_parts),
Some(dt) => decomposition_type_tag(dt),
};
Ok(format!("<{tag}> {hex_parts}"))
}
#[pymethod]
@@ -183,8 +259,15 @@ mod unicodedata {
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyResult {
if let Some(value) = self.0.digit(Self::extract_char(character, vm)?.to_u32()) {
return Ok(vm.ctx.new_int(value).into());
let ch = self.extract_char(character, vm)?.and_then(|c| c.to_char());
if let Some(ch) = ch
&& matches!(
ch.numeric_type(),
Some(NumericType::Decimal) | Some(NumericType::Digit)
)
&& let Some(Number::Integer(n)) = ch.numeric_value()
{
return Ok(vm.ctx.new_int(n).into());
}
default.ok_or_else(|| vm.new_value_error("not a digit"))
}
@@ -196,8 +279,12 @@ mod unicodedata {
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyResult {
if let Some(value) = self.0.decimal(Self::extract_char(character, vm)?.to_u32()) {
return Ok(vm.ctx.new_int(value).into());
let ch = self.extract_char(character, vm)?.and_then(|c| c.to_char());
if let Some(ch) = ch
&& ch.numeric_type() == Some(NumericType::Decimal)
&& let Some(Number::Integer(n)) = ch.numeric_value()
{
return Ok(vm.ctx.new_int(n).into());
}
default.ok_or_else(|| vm.new_value_error("not a decimal"))
}
@@ -209,29 +296,75 @@ mod unicodedata {
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyResult {
if let Some(value) = self.0.numeric(Self::extract_char(character, vm)?.to_u32()) {
let value = match value {
data::NumericValue::Integer(n) => n as f64,
data::NumericValue::Rational(num, den) => num as f64 / den as f64,
};
return Ok(vm.ctx.new_float(value).into());
let ch = self.extract_char(character, vm)?.and_then(|c| c.to_char());
if let Some(ch) = ch {
match ch.numeric_value() {
Some(Number::Integer(n)) => {
return Ok(vm.ctx.new_float(n as f64).into());
}
Some(Number::Rational(num, den)) => {
return Ok(vm.ctx.new_float(num as f64 / den as f64).into());
}
None => {}
}
}
default.ok_or_else(|| vm.new_value_error("not a numeric character"))
}
#[pygetset]
fn unidata_version(&self) -> String {
self.0.unicode_version().to_string()
self.unic_version.to_string()
}
}
fn decomposition_type_tag(dt: DecompositionType) -> &'static str {
match dt {
DecompositionType::Canonical => "canonical",
DecompositionType::Compat => "compat",
DecompositionType::Circle => "circle",
DecompositionType::Final => "final",
DecompositionType::Font => "font",
DecompositionType::Fraction => "fraction",
DecompositionType::Initial => "initial",
DecompositionType::Isolated => "isolated",
DecompositionType::Medial => "medial",
DecompositionType::Narrow => "narrow",
DecompositionType::Nobreak => "noBreak",
DecompositionType::Small => "small",
DecompositionType::Square => "square",
DecompositionType::Sub => "sub",
DecompositionType::Super => "super",
DecompositionType::Vertical => "vertical",
DecompositionType::Wide => "wide",
}
}
trait EastAsianWidthAbbrName {
fn abbr_name(&self) -> &'static str;
}
impl EastAsianWidthAbbrName for EastAsianWidth {
fn abbr_name(&self) -> &'static str {
match self {
Self::Narrow => "Na",
Self::Wide => "W",
Self::Neutral => "N",
Self::Ambiguous => "A",
Self::FullWidth => "F",
Self::HalfWidth => "H",
}
}
}
#[pyattr]
fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef<PyUcd> {
PyUcd::new(data::Ucd::new(UnicodeVersion {
major: 3,
minor: 2,
micro: 0,
}))
fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef<Ucd> {
Ucd {
unic_version: UnicodeVersion {
major: 3,
minor: 2,
micro: 0,
},
}
.into_ref(&vm.ctx)
}

View File

@@ -1,29 +0,0 @@
[package]
name = "rustpython-unicode"
description = "Shared Unicode semantics and data for RustPython and related Python tooling."
version.workspace = true
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
repository.workspace = true
license.workspace = true
[features]
default = ["std", "casefold"]
std = []
casefold = ["std", "dep:caseless"]
[dependencies]
rustpython-wtf8 = { workspace = true }
icu_normalizer = { workspace = true }
icu_properties = { workspace = true }
itertools = { workspace = true }
unicode-casing = { workspace = true }
unicode_names2 = { version = "2.0.0", default-features = false, features = ["no_std"] }
unic-ucd-age = { workspace = true }
ucd = "0.1.1"
caseless = { version = "0.2.2", optional = true }
[lints]
workspace = true

View File

@@ -1,111 +0,0 @@
#[cfg(feature = "casefold")]
use alloc::string::String;
#[cfg(feature = "casefold")]
use rustpython_wtf8::Wtf8Chunk;
use rustpython_wtf8::{Wtf8, Wtf8Buf};
use unicode_casing::CharExt;
use crate::char_from_codepoint;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CaseMapping {
len: u8,
codepoints: [u32; 3],
}
impl CaseMapping {
pub const fn identity(cp: u32) -> Self {
Self {
len: 1,
codepoints: [cp, 0, 0],
}
}
pub const fn first(self) -> Option<u32> {
if self.len == 0 {
None
} else {
Some(self.codepoints[0])
}
}
pub fn iter(self) -> impl Iterator<Item = u32> {
self.codepoints.into_iter().take(usize::from(self.len))
}
}
fn mapping_from_chars(chars: impl Iterator<Item = char>) -> CaseMapping {
let mut codepoints = [0; 3];
let mut len = 0;
for ch in chars.take(codepoints.len()) {
codepoints[len] = ch as u32;
len += 1;
}
CaseMapping {
len: len as u8,
codepoints,
}
}
#[cfg(feature = "casefold")]
fn mapping_from_string(text: String) -> CaseMapping {
mapping_from_chars(text.chars())
}
pub fn to_lowercase(cp: u32) -> CaseMapping {
char_from_codepoint(cp).map_or_else(
|| CaseMapping::identity(cp),
|ch| mapping_from_chars(ch.to_lowercase()),
)
}
pub fn to_uppercase(cp: u32) -> CaseMapping {
char_from_codepoint(cp).map_or_else(
|| CaseMapping::identity(cp),
|ch| mapping_from_chars(ch.to_uppercase()),
)
}
pub fn to_titlecase(cp: u32) -> CaseMapping {
char_from_codepoint(cp).map_or_else(
|| CaseMapping::identity(cp),
|ch| mapping_from_chars(ch.to_titlecase()),
)
}
pub fn to_lowercase_wtf8(text: &Wtf8) -> Wtf8Buf {
text.map_utf8(|s| s.chars().flat_map(char::to_lowercase))
.collect()
}
pub fn to_uppercase_wtf8(text: &Wtf8) -> Wtf8Buf {
text.map_utf8(|s| s.chars().flat_map(char::to_uppercase))
.collect()
}
#[cfg(feature = "casefold")]
pub fn casefold(cp: u32) -> CaseMapping {
char_from_codepoint(cp).map_or_else(
|| CaseMapping::identity(cp),
|ch| {
let mut buf = [0; 4];
mapping_from_string(caseless::default_case_fold_str(ch.encode_utf8(&mut buf)))
},
)
}
#[cfg(feature = "casefold")]
pub fn casefold_str(text: &str) -> String {
caseless::default_case_fold_str(text)
}
#[cfg(feature = "casefold")]
pub fn casefold_wtf8(text: &Wtf8) -> Wtf8Buf {
text.chunks()
.map(|chunk| match chunk {
Wtf8Chunk::Utf8(s) => Wtf8Buf::from_string(casefold_str(s)),
Wtf8Chunk::Surrogate(c) => Wtf8Buf::from(c),
})
.collect()
}

View File

@@ -1,67 +0,0 @@
use icu_properties::props::{BidiClass, EnumeratedProperty, GeneralCategory};
use ucd::{Codepoint, NumericType};
use crate::{char_from_codepoint, is_surrogate};
pub fn general_category(cp: u32) -> GeneralCategory {
if is_surrogate(cp) {
GeneralCategory::Surrogate
} else {
char_from_codepoint(cp).map_or(GeneralCategory::Unassigned, GeneralCategory::for_char)
}
}
pub fn is_alpha(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(char::is_alphabetic)
}
pub fn is_alnum(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(char::is_alphanumeric)
}
pub fn is_decimal(cp: u32) -> bool {
matches!(general_category(cp), GeneralCategory::DecimalNumber)
}
pub fn is_digit(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(|ch| {
matches!(
ch.numeric_type(),
Some(NumericType::Decimal) | Some(NumericType::Digit)
)
})
}
pub fn is_numeric(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(|ch| ch.numeric_value().is_some())
}
pub fn is_space(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(|ch| {
matches!(general_category(cp), GeneralCategory::SpaceSeparator)
|| matches!(
BidiClass::for_char(ch),
BidiClass::WhiteSpace | BidiClass::ParagraphSeparator | BidiClass::SegmentSeparator
)
})
}
/// Python's `str.isprintable()` semantics, which treat ASCII space as printable.
pub fn is_printable(cp: u32) -> bool {
cp == '\u{0020}' as u32 || is_repr_printable(cp)
}
/// Repr/escape printable semantics, which exclude all Unicode space separators.
pub fn is_repr_printable(cp: u32) -> bool {
!matches!(
general_category(cp),
GeneralCategory::SpaceSeparator
| GeneralCategory::LineSeparator
| GeneralCategory::ParagraphSeparator
| GeneralCategory::Control
| GeneralCategory::Format
| GeneralCategory::Surrogate
| GeneralCategory::PrivateUse
| GeneralCategory::Unassigned
)
}

View File

@@ -1,230 +0,0 @@
use alloc::{format, string::String, vec::Vec};
use icu_properties::{
CodePointSetData,
props::{
BidiClass, BidiMirrored, CanonicalCombiningClass, EastAsianWidth, EnumeratedProperty,
NamedEnumeratedProperty,
},
};
use itertools::Itertools;
use ucd::{Codepoint, DecompositionType, Number, NumericType};
use unic_ucd_age::{Age, UNICODE_VERSION, UnicodeVersion};
use crate::{char_from_codepoint, classify, is_surrogate};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum NumericValue {
Integer(i64),
Rational(i64, i64),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Ucd {
unic_version: UnicodeVersion,
}
impl Default for Ucd {
fn default() -> Self {
Self::new(UNICODE_VERSION)
}
}
impl Ucd {
pub const fn new(unic_version: UnicodeVersion) -> Self {
Self { unic_version }
}
pub const fn unicode_version(&self) -> UnicodeVersion {
self.unic_version
}
pub fn category(&self, cp: u32) -> &'static str {
if self.contains(cp) {
category(cp)
} else {
"Cn"
}
}
pub fn lookup(&self, name: &str) -> Option<u32> {
let cp = lookup(name)?;
self.contains(cp).then_some(cp)
}
pub fn name(&self, cp: u32) -> Option<String> {
self.contains(cp).then(|| name(cp)).flatten()
}
pub fn bidirectional(&self, cp: u32) -> &'static str {
if self.contains(cp) {
bidirectional(cp)
} else {
""
}
}
pub fn east_asian_width(&self, cp: u32) -> &'static str {
if self.contains(cp) {
east_asian_width(cp)
} else {
"N"
}
}
pub fn normalize(
&self,
form: crate::NormalizeForm,
text: &rustpython_wtf8::Wtf8,
) -> rustpython_wtf8::Wtf8Buf {
crate::normalize::normalize(form, text)
}
pub fn is_normalized(&self, form: crate::NormalizeForm, text: &rustpython_wtf8::Wtf8) -> bool {
crate::normalize::is_normalized(form, text)
}
pub fn mirrored(&self, cp: u32) -> bool {
self.contains(cp) && mirrored(cp)
}
pub fn combining(&self, cp: u32) -> u8 {
if self.contains(cp) { combining(cp) } else { 0 }
}
pub fn decomposition(&self, cp: u32) -> String {
if self.contains(cp) {
decomposition(cp)
} else {
String::new()
}
}
pub fn digit(&self, cp: u32) -> Option<u32> {
self.contains(cp).then(|| digit(cp)).flatten()
}
pub fn decimal(&self, cp: u32) -> Option<u32> {
self.contains(cp).then(|| decimal(cp)).flatten()
}
pub fn numeric(&self, cp: u32) -> Option<NumericValue> {
self.contains(cp).then(|| numeric(cp)).flatten()
}
fn contains(&self, cp: u32) -> bool {
is_assigned_in_version(cp, self.unic_version)
}
}
pub fn is_assigned_in_version(cp: u32, version: UnicodeVersion) -> bool {
if is_surrogate(cp) {
true
} else {
char_from_codepoint(cp)
.is_some_and(|ch| Age::of(ch).is_some_and(|age| age.actual() <= version))
}
}
pub fn category(cp: u32) -> &'static str {
classify::general_category(cp).short_name()
}
pub fn lookup(name: &str) -> Option<u32> {
unicode_names2::character(name).map(u32::from)
}
pub fn name(cp: u32) -> Option<String> {
char_from_codepoint(cp)
.and_then(unicode_names2::name)
.map(|name| name.collect())
}
pub fn bidirectional(cp: u32) -> &'static str {
char_from_codepoint(cp)
.map_or(BidiClass::LeftToRight, BidiClass::for_char)
.short_name()
}
pub fn east_asian_width(cp: u32) -> &'static str {
char_from_codepoint(cp)
.map_or(EastAsianWidth::Neutral, EastAsianWidth::for_char)
.short_name()
}
pub fn mirrored(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(|ch| CodePointSetData::new::<BidiMirrored>().contains(ch))
}
pub fn combining(cp: u32) -> u8 {
char_from_codepoint(cp).map_or(0, |ch| {
CanonicalCombiningClass::for_char(ch).to_icu4c_value()
})
}
pub fn decomposition(cp: u32) -> String {
let ch = match char_from_codepoint(cp) {
Some(ch) => ch,
None => return String::new(),
};
let chars: Vec<char> = ch.decomposition_map().collect();
if chars.len() == 1 && chars[0] == ch {
return String::new();
}
let hex_parts = chars.iter().map(|c| format!("{:04X}", *c as u32)).join(" ");
match ch.decomposition_type() {
Some(DecompositionType::Canonical) | None => hex_parts,
Some(dt) => format!("<{}> {hex_parts}", decomposition_type_tag(dt)),
}
}
pub fn digit(cp: u32) -> Option<u32> {
let ch = char_from_codepoint(cp)?;
if matches!(
ch.numeric_type(),
Some(NumericType::Decimal) | Some(NumericType::Digit)
) && let Some(Number::Integer(value)) = ch.numeric_value()
{
return u32::try_from(value).ok();
}
None
}
pub fn decimal(cp: u32) -> Option<u32> {
let ch = char_from_codepoint(cp)?;
if ch.numeric_type() == Some(NumericType::Decimal)
&& let Some(Number::Integer(value)) = ch.numeric_value()
{
return u32::try_from(value).ok();
}
None
}
pub fn numeric(cp: u32) -> Option<NumericValue> {
match char_from_codepoint(cp)?.numeric_value()? {
Number::Integer(value) => Some(NumericValue::Integer(value)),
Number::Rational(num, den) => Some(NumericValue::Rational(num.into(), den.into())),
}
}
fn decomposition_type_tag(dt: DecompositionType) -> &'static str {
match dt {
DecompositionType::Canonical => "canonical",
DecompositionType::Compat => "compat",
DecompositionType::Circle => "circle",
DecompositionType::Final => "final",
DecompositionType::Font => "font",
DecompositionType::Fraction => "fraction",
DecompositionType::Initial => "initial",
DecompositionType::Isolated => "isolated",
DecompositionType::Medial => "medial",
DecompositionType::Narrow => "narrow",
DecompositionType::Nobreak => "noBreak",
DecompositionType::Small => "small",
DecompositionType::Square => "square",
DecompositionType::Sub => "sub",
DecompositionType::Super => "super",
DecompositionType::Vertical => "vertical",
DecompositionType::Wide => "wide",
}
}

View File

@@ -1,27 +0,0 @@
use icu_properties::props::{BinaryProperty, XidContinue, XidStart};
use crate::char_from_codepoint;
pub fn is_xid_start(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(XidStart::for_char)
}
pub fn is_xid_continue(cp: u32) -> bool {
char_from_codepoint(cp).is_some_and(XidContinue::for_char)
}
pub fn is_python_identifier_start(cp: u32) -> bool {
cp == '_' as u32 || is_xid_start(cp)
}
pub fn is_python_identifier_continue(cp: u32) -> bool {
is_xid_continue(cp)
}
pub fn is_python_identifier(text: &str) -> bool {
let mut chars = text.chars();
let is_identifier_start = chars
.next()
.is_some_and(|ch| is_python_identifier_start(ch as u32));
is_identifier_start && chars.all(|ch| is_python_identifier_continue(ch as u32))
}

View File

@@ -1,77 +0,0 @@
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
pub mod case;
pub mod classify;
pub mod data;
pub mod identifier;
pub mod normalize;
pub mod regex;
pub use normalize::NormalizeForm;
pub use unic_ucd_age::{UNICODE_VERSION, UnicodeVersion};
use core::char;
pub(crate) fn char_from_codepoint(cp: u32) -> Option<char> {
char::from_u32(cp)
}
pub(crate) const fn is_surrogate(cp: u32) -> bool {
matches!(cp, 0xD800..=0xDFFF)
}
#[cfg(test)]
mod tests {
use alloc::vec::Vec;
use rustpython_wtf8::Wtf8Buf;
use crate::{NormalizeForm, case, classify, data, identifier, normalize, regex};
#[test]
fn printable_and_repr_printable_follow_python_rules() {
assert!(classify::is_printable(' ' as u32));
assert!(!classify::is_repr_printable(' ' as u32));
assert!(!classify::is_printable('\n' as u32));
}
#[test]
fn identifier_and_regex_predicates_share_unicode_tables() {
assert!(identifier::is_python_identifier_start('_' as u32));
assert!(identifier::is_python_identifier("유니코드"));
assert!(regex::is_unicode_word('가' as u32));
assert!(regex::is_unicode_digit('५' as u32));
assert!(regex::is_unicode_space('\u{3000}' as u32));
}
#[test]
fn case_and_normalization_helpers_support_full_mappings() {
let upper: Vec<_> = case::to_uppercase('ß' as u32).iter().collect();
assert_eq!(upper, vec!['S' as u32, 'S' as u32]);
let text = Wtf8Buf::from("e\u{301}");
assert_eq!(
normalize::normalize(NormalizeForm::Nfc, &text),
Wtf8Buf::from("é")
);
assert!(normalize::is_normalized(
NormalizeForm::Nfd,
&normalize::normalize(NormalizeForm::Nfd, &Wtf8Buf::from("é"))
));
}
#[test]
fn unicode_data_queries_match_existing_unicodedata_behavior() {
assert_eq!(data::category('A' as u32), "Lu");
assert_eq!(data::category(0xD800), "Cs");
assert_eq!(data::lookup("SNOWMAN"), Some('☃' as u32));
assert_eq!(data::name('☃' as u32).as_deref(), Some("SNOWMAN"));
assert_eq!(data::decimal('५' as u32), Some(5));
assert_eq!(data::digit('²' as u32), Some(2));
assert_eq!(
data::numeric('⅓' as u32),
Some(data::NumericValue::Rational(1, 3))
);
}
}

View File

@@ -1,55 +0,0 @@
use core::str::FromStr;
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};
use rustpython_wtf8::{Wtf8, Wtf8Buf};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormalizeForm {
Nfc,
Nfkc,
Nfd,
Nfkd,
}
impl FromStr for NormalizeForm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"NFC" => Ok(Self::Nfc),
"NFKC" => Ok(Self::Nfkc),
"NFD" => Ok(Self::Nfd),
"NFKD" => Ok(Self::Nfkd),
_ => Err(()),
}
}
}
pub fn normalize(form: NormalizeForm, text: &Wtf8) -> Wtf8Buf {
match form {
NormalizeForm::Nfc => {
let normalizer = ComposingNormalizerBorrowed::new_nfc();
text.map_utf8(|s| normalizer.normalize_iter(s.chars()))
.collect()
}
NormalizeForm::Nfkc => {
let normalizer = ComposingNormalizerBorrowed::new_nfkc();
text.map_utf8(|s| normalizer.normalize_iter(s.chars()))
.collect()
}
NormalizeForm::Nfd => {
let normalizer = DecomposingNormalizerBorrowed::new_nfd();
text.map_utf8(|s| normalizer.normalize_iter(s.chars()))
.collect()
}
NormalizeForm::Nfkd => {
let normalizer = DecomposingNormalizerBorrowed::new_nfkd();
text.map_utf8(|s| normalizer.normalize_iter(s.chars()))
.collect()
}
}
}
pub fn is_normalized(form: NormalizeForm, text: &Wtf8) -> bool {
let normalized = normalize(form, text);
text == &*normalized
}

View File

@@ -1,87 +0,0 @@
use crate::{case, classify};
const UNDERSCORE: u32 = '_' as u32;
const fn is_py_ascii_whitespace(byte: u8) -> bool {
matches!(byte, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
}
pub fn is_word(cp: u32) -> bool {
cp == UNDERSCORE
|| u8::try_from(cp)
.map(|byte| byte.is_ascii_alphanumeric())
.unwrap_or(false)
}
pub fn is_space(cp: u32) -> bool {
u8::try_from(cp)
.map(is_py_ascii_whitespace)
.unwrap_or(false)
}
pub fn is_digit(cp: u32) -> bool {
u8::try_from(cp)
.map(|byte| byte.is_ascii_digit())
.unwrap_or(false)
}
pub fn is_locale_alnum(cp: u32) -> bool {
u8::try_from(cp)
.map(|byte| byte.is_ascii_alphanumeric())
.unwrap_or(false)
}
pub fn is_locale_word(cp: u32) -> bool {
cp == UNDERSCORE || is_locale_alnum(cp)
}
pub const fn is_linebreak(cp: u32) -> bool {
cp == '\n' as u32
}
pub fn lower_ascii(cp: u32) -> u32 {
u8::try_from(cp)
.map(|byte| byte.to_ascii_lowercase() as u32)
.unwrap_or(cp)
}
pub fn lower_locale(cp: u32) -> u32 {
lower_ascii(cp)
}
pub fn upper_locale(cp: u32) -> u32 {
u8::try_from(cp)
.map(|byte| byte.to_ascii_uppercase() as u32)
.unwrap_or(cp)
}
pub fn is_unicode_digit(cp: u32) -> bool {
classify::is_decimal(cp)
}
pub fn is_unicode_space(cp: u32) -> bool {
classify::is_space(cp)
}
pub const fn is_unicode_linebreak(cp: u32) -> bool {
matches!(
cp,
0x000A | 0x000B | 0x000C | 0x000D | 0x001C | 0x001D | 0x001E | 0x0085 | 0x2028 | 0x2029
)
}
pub fn is_unicode_alnum(cp: u32) -> bool {
classify::is_alnum(cp)
}
pub fn is_unicode_word(cp: u32) -> bool {
cp == UNDERSCORE || is_unicode_alnum(cp)
}
pub fn lower_unicode(cp: u32) -> u32 {
case::to_lowercase(cp).first().unwrap_or(cp)
}
pub fn upper_unicode(cp: u32) -> u32 {
case::to_uppercase(cp).first().unwrap_or(cp)
}

View File

@@ -41,7 +41,6 @@ ruff_text_size = { workspace = true, optional = true }
rustpython-compiler-core = { workspace = true }
rustpython-literal = { workspace = true }
rustpython-sre_engine = { workspace = true }
rustpython-unicode = { workspace = true, features = ["casefold"] }
ascii = { workspace = true }
ahash = { workspace = true }
@@ -75,6 +74,7 @@ strum_macros = { workspace = true }
thiserror = { workspace = true }
memchr = { workspace = true }
caseless = "0.2.2"
flamer = { version = "0.5", optional = true }
half = "2"
psm = "0.1"
@@ -86,6 +86,10 @@ timsort = "0.1.2"
# TODO: use unic for this; needed for title case:
# https://github.com/RustPython/RustPython/pull/832#discussion_r275428939
unicode-casing = { workspace = true }
# update version all at the same time
unic-ucd-bidi = { workspace = true }
unic-ucd-category = { workspace = true }
unic-ucd-ident = { workspace = true }
[target.'cfg(unix)'.dependencies]
rustix = { workspace = true }
@@ -124,7 +128,6 @@ features = [
"Win32_System_Environment",
"Win32_System_IO",
"Win32_System_Ioctl",
"Win32_System_JobObjects",
"Win32_System_Kernel",
"Win32_System_LibraryLoader",
"Win32_System_Memory",

View File

@@ -396,10 +396,6 @@ pub trait AnyStr {
fn py_zfill(&self, width: isize) -> Vec<u8> {
let width = width.to_usize().unwrap_or(0);
let char_len = self.elements().count();
let width = self
.bytes_len()
.saturating_add(width.saturating_sub(char_len));
rustpython_common::str::zfill(self.as_bytes(), width)
}

View File

@@ -215,15 +215,6 @@ impl PyByteArray {
size_of::<Self>() + self.borrow_buf().len() * size_of::<u8>()
}
#[pyslot]
fn slot_str(zelf: &PyObject, vm: &VirtualMachine) -> PyResult<PyStrRef> {
let zelf = zelf.downcast_ref::<Self>().expect("expected bytearray");
PyBytesInner::warn_on_str("str() on a bytearray instance", vm)?;
let class_name = zelf.class().name();
let repr = zelf.inner().repr_with_name(&class_name, vm)?;
Ok(vm.ctx.new_str(repr))
}
fn __add__(&self, other: ArgBytesLike) -> Self {
self.inner().add(&other.borrow_buf()).into()
}

View File

@@ -224,13 +224,6 @@ impl PyBytes {
size_of::<Self>() + self.len() * size_of::<u8>()
}
#[pyslot]
fn slot_str(zelf: &PyObject, vm: &VirtualMachine) -> PyResult<PyStrRef> {
let zelf = zelf.downcast_ref::<Self>().expect("expected bytes");
PyBytesInner::warn_on_str("str() on a bytes instance", vm)?;
Ok(vm.ctx.new_str(zelf.inner.repr_bytes(vm)?))
}
fn __add__(&self, other: ArgBytesLike) -> Vec<u8> {
self.inner.add(&other.borrow_buf())
}

View File

@@ -194,12 +194,6 @@ impl From<Literal> for PyObjectRef {
}
}
impl From<PyObjectRef> for Literal {
fn from(obj: PyObjectRef) -> Self {
Literal(obj)
}
}
fn borrow_obj_constant(obj: &PyObject) -> BorrowedConstant<'_, Literal> {
match_class!(match obj {
ref i @ super::int::PyInt => {
@@ -232,23 +226,6 @@ fn borrow_obj_constant(obj: &PyObject) -> BorrowedConstant<'_, Literal> {
}
super::singletons::PyNone => BorrowedConstant::None,
super::slice::PyEllipsis => BorrowedConstant::Ellipsis,
ref s @ super::slice::PySlice => {
// Constant pool slices always store Some() for start/step (even for None).
// Box::leak the array so it outlives the borrow. Leak is acceptable since
// constant pool objects live for the program's lifetime.
let start = s.start.clone().unwrap();
let stop = s.stop.clone();
let step = s.step.clone().unwrap();
let arr = Box::leak(Box::new([Literal(start), Literal(stop), Literal(step)]));
BorrowedConstant::Slice { elements: arr }
}
ref fs @ super::set::PyFrozenSet => {
// Box::leak the elements so they outlive the borrow. Leak is acceptable since
// constant pool objects live for the program's lifetime.
let elems: Vec<Literal> = fs.elements().into_iter().map(Literal).collect();
let elements = Box::leak(elems.into_boxed_slice());
BorrowedConstant::Frozenset { elements }
}
_ => panic!("unexpected payload for constant python value"),
})
}
@@ -300,30 +277,6 @@ impl ConstantBag for PyObjBag<'_> {
.collect();
ctx.new_tuple(elements).into()
}
BorrowedConstant::Slice { elements } => {
let [start, stop, step] = elements;
let start_obj = self.make_constant(start.borrow_constant()).0;
let stop_obj = self.make_constant(stop.borrow_constant()).0;
let step_obj = self.make_constant(step.borrow_constant()).0;
// Store as PySlice with Some() for all fields (even None values)
// so borrow_obj_constant can reference them.
use crate::builtins::PySlice;
PySlice {
start: Some(start_obj),
stop: stop_obj,
step: Some(step_obj),
}
.into_ref(ctx)
.into()
}
BorrowedConstant::Frozenset { elements: _ } => {
// Creating a frozenset requires VirtualMachine for element hashing.
// PyObjBag only has Context, so we cannot construct PyFrozenSet here.
// Frozenset constants from .pyc are handled by PyMarshalBag which has VM access.
unimplemented!(
"frozenset constant in PyObjBag::make_constant requires VirtualMachine"
)
}
BorrowedConstant::None => ctx.none(),
BorrowedConstant::Ellipsis => ctx.ellipsis.clone().into(),
};
@@ -680,38 +633,6 @@ impl Constructor for PyCode {
)],
> = vec![(loc, loc); instructions.len()].into_boxed_slice();
// Build localspluskinds with cell-local merging
let localspluskinds = {
use rustpython_compiler_core::bytecode::*;
let nlocals = varnames.len();
let ncells = cellvars.len();
let nfrees = freevars.len();
let numdropped = cellvars
.iter()
.filter(|cv| varnames.iter().any(|v| *v == **cv))
.count();
let nlocalsplus = nlocals + ncells - numdropped + nfrees;
let mut kinds = vec![0u8; nlocalsplus];
for kind in kinds.iter_mut().take(nlocals) {
*kind = CO_FAST_LOCAL;
}
let mut cell_numdropped = 0usize;
for (i, cv) in cellvars.iter().enumerate() {
let merged_idx = varnames.iter().position(|v| **v == **cv);
if let Some(local_idx) = merged_idx {
kinds[local_idx] |= CO_FAST_CELL;
cell_numdropped += 1;
} else {
kinds[nlocals + i - cell_numdropped] = CO_FAST_CELL;
}
}
let free_start = nlocals + ncells - numdropped;
for i in 0..nfrees {
kinds[free_start + i] = CO_FAST_FREE;
}
kinds.into_boxed_slice()
};
// Build the CodeObject
let code = CodeObject {
instructions,
@@ -729,12 +650,12 @@ impl Constructor for PyCode {
max_stackdepth: args.stacksize,
obj_name: vm.ctx.intern_str(args.name.as_wtf8()),
qualname: vm.ctx.intern_str(args.qualname.as_wtf8()),
cell2arg: None, // TODO: reuse `fn cell2arg`
constants,
names,
varnames,
cellvars,
freevars,
localspluskinds,
linetable: args.linetable.as_bytes().to_vec().into_boxed_slice(),
exceptiontable: args.exceptiontable.as_bytes().to_vec().into_boxed_slice(),
};
@@ -1316,7 +1237,7 @@ impl PyCode {
.collect(),
cellvars,
freevars,
localspluskinds: self.code.localspluskinds.clone(),
cell2arg: self.code.cell2arg.clone(),
linetable,
exceptiontable,
};
@@ -1331,34 +1252,22 @@ impl PyCode {
let idx = usize::try_from(opcode).map_err(|_| idx_err(vm))?;
let varnames_len = self.code.varnames.len();
// Non-parameter cells: cellvars that are NOT also in varnames
let nonparam_cellvars: Vec<_> = self
.code
.cellvars
.iter()
.filter(|s| {
let s_str: &str = s.as_ref();
!self.code.varnames.iter().any(|v| {
let v_str: &str = v.as_ref();
v_str == s_str
})
})
.collect();
let nonparam_len = nonparam_cellvars.len();
let cellvars_len = self.code.cellvars.len();
let name = if idx < varnames_len {
// Index in varnames (includes parameter cells)
// Index in varnames
self.code.varnames.get(idx).ok_or_else(|| idx_err(vm))?
} else if idx < varnames_len + nonparam_len {
// Index in non-parameter cellvars
*nonparam_cellvars
} else if idx < varnames_len + cellvars_len {
// Index in cellvars
self.code
.cellvars
.get(idx - varnames_len)
.ok_or_else(|| idx_err(vm))?
} else {
// Index in freevars
self.code
.freevars
.get(idx - varnames_len - nonparam_len)
.get(idx - varnames_len - cellvars_len)
.ok_or_else(|| idx_err(vm))?
};
Ok(name.to_object())

View File

@@ -64,7 +64,7 @@ pub struct PyFunction {
code: PyAtomicRef<PyCode>,
globals: PyDictRef,
builtins: PyObjectRef,
pub(crate) closure: Option<PyRef<PyTuple<PyCellRef>>>,
closure: Option<PyRef<PyTuple<PyCellRef>>>,
defaults_and_kwdefaults: PyMutex<(Option<PyTupleRef>, Option<PyDictRef>)>,
name: PyMutex<PyStrRef>,
qualname: PyMutex<PyStrRef>,
@@ -443,6 +443,13 @@ impl PyFunction {
}
}
if let Some(cell2arg) = code.cell2arg.as_deref() {
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
let x = fastlocals[*arg_idx as usize].take();
frame.set_cell_contents(cell_idx, x);
}
}
Ok(())
}
@@ -718,6 +725,14 @@ impl Py<PyFunction> {
}
}
if let Some(cell2arg) = code.cell2arg.as_deref() {
let fastlocals = unsafe { frame.fastlocals_mut() };
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
let x = fastlocals[*arg_idx as usize].take();
frame.set_cell_contents(cell_idx, x);
}
}
frame
}
@@ -765,7 +780,11 @@ pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py<PyCode>) -> Option<u
{
return None;
}
let nlocalsplus = code.localspluskinds.len();
let nlocalsplus = code
.varnames
.len()
.checked_add(code.cellvars.len())?
.checked_add(code.freevars.len())?;
let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?;
capacity.checked_mul(core::mem::size_of::<usize>())
}
@@ -1197,17 +1216,6 @@ impl GetAttr for PyBoundMethod {
}
}
impl GetDescriptor for PyBoundMethod {
fn descr_get(
zelf: PyObjectRef,
_obj: Option<PyObjectRef>,
_cls: Option<PyObjectRef>,
_vm: &VirtualMachine,
) -> PyResult {
Ok(zelf)
}
}
#[derive(FromArgs)]
pub struct PyBoundMethodNewArgs {
#[pyarg(positional)]
@@ -1222,14 +1230,8 @@ impl Constructor for PyBoundMethod {
fn py_new(
_cls: &Py<PyType>,
Self::Args { function, object }: Self::Args,
vm: &VirtualMachine,
_vm: &VirtualMachine,
) -> PyResult<Self> {
if !function.is_callable() {
return Err(vm.new_type_error("first argument must be callable".to_owned()));
}
if vm.is_none(&object) {
return Err(vm.new_type_error("instance must not be None".to_owned()));
}
Ok(Self::new(object, function))
}
}
@@ -1256,15 +1258,7 @@ impl PyBoundMethod {
}
#[pyclass(
with(
Callable,
Comparable,
Hashable,
GetAttr,
GetDescriptor,
Constructor,
Representable
),
with(Callable, Comparable, Hashable, GetAttr, Constructor, Representable),
flags(IMMUTABLETYPE, HAS_WEAKREF)
)]
impl PyBoundMethod {
@@ -1272,11 +1266,11 @@ impl PyBoundMethod {
fn __reduce__(
&self,
vm: &VirtualMachine,
) -> PyResult<(PyObjectRef, (PyObjectRef, PyObjectRef))> {
let builtins_getattr = vm.builtins.get_attr("getattr", vm)?;
) -> (Option<PyObjectRef>, (PyObjectRef, Option<PyObjectRef>)) {
let builtins_getattr = vm.builtins.get_attr("getattr", vm).ok();
let func_self = self.object.clone();
let func_name = self.function.get_attr("__name__", vm)?;
Ok((builtins_getattr, (func_self, func_name)))
let func_name = self.function.get_attr("__name__", vm).ok();
(builtins_getattr, (func_self, func_name))
}
#[pygetset]

View File

@@ -41,9 +41,11 @@ use rustpython_common::{
hash,
lock::PyMutex,
str::DeduceStrKind,
wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Concat},
wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Chunk, Wtf8Concat},
};
use unic_ucd_bidi::BidiClass;
use unic_ucd_category::GeneralCategory;
use unic_ucd_ident::{is_xid_continue, is_xid_start};
use unicode_casing::CharExt;
impl<'a> TryFromBorrowedObject<'a> for String {
@@ -695,7 +697,7 @@ impl PyStr {
match self.as_str_kind() {
PyKindStr::Ascii(s) => s.to_ascii_lowercase().into(),
PyKindStr::Utf8(s) => s.to_lowercase().into(),
PyKindStr::Wtf8(w) => rustpython_unicode::case::to_lowercase_wtf8(w).into(),
PyKindStr::Wtf8(w) => w.to_lowercase().into(),
}
}
@@ -703,9 +705,16 @@ impl PyStr {
#[pymethod]
fn casefold(&self) -> Self {
match self.as_str_kind() {
PyKindStr::Ascii(s) => rustpython_unicode::case::casefold_str(s.as_str()).into(),
PyKindStr::Utf8(s) => rustpython_unicode::case::casefold_str(s).into(),
PyKindStr::Wtf8(w) => rustpython_unicode::case::casefold_wtf8(w).into(),
PyKindStr::Ascii(s) => caseless::default_case_fold_str(s.as_str()).into(),
PyKindStr::Utf8(s) => caseless::default_case_fold_str(s).into(),
PyKindStr::Wtf8(w) => w
.chunks()
.map(|c| match c {
Wtf8Chunk::Utf8(s) => Wtf8Buf::from_string(caseless::default_case_fold_str(s)),
Wtf8Chunk::Surrogate(c) => Wtf8Buf::from(c),
})
.collect::<Wtf8Buf>()
.into(),
}
}
@@ -714,7 +723,7 @@ impl PyStr {
match self.as_str_kind() {
PyKindStr::Ascii(s) => s.to_ascii_uppercase().into(),
PyKindStr::Utf8(s) => s.to_uppercase().into(),
PyKindStr::Wtf8(w) => rustpython_unicode::case::to_uppercase_wtf8(w).into(),
PyKindStr::Wtf8(w) => w.to_uppercase().into(),
}
}
@@ -957,7 +966,7 @@ impl PyStr {
#[pymethod]
fn isdecimal(&self) -> bool {
!self.data.is_empty()
&& self.char_all(|c| rustpython_unicode::classify::is_decimal(c as u32))
&& self.char_all(|c| GeneralCategory::of(c) == GeneralCategory::DecimalNumber)
}
fn __mod__(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
@@ -1077,12 +1086,17 @@ impl PyStr {
#[pymethod]
fn isprintable(&self) -> bool {
self.char_all(|c| rustpython_unicode::classify::is_printable(c as u32))
self.char_all(|c| c == '\u{0020}' || rustpython_literal::char::is_printable(c))
}
#[pymethod]
fn isspace(&self) -> bool {
!self.data.is_empty() && self.char_all(|c| rustpython_unicode::classify::is_space(c as u32))
use unic_ucd_bidi::bidi_class::abbr_names::*;
!self.data.is_empty()
&& self.char_all(|c| {
GeneralCategory::of(c) == GeneralCategory::SpaceSeparator
|| matches!(BidiClass::of(c), WS | B | S)
})
}
// Return true if all cased characters in the string are lowercase and there is at least one cased character, false otherwise.
@@ -1339,8 +1353,11 @@ impl PyStr {
#[pymethod]
pub fn isidentifier(&self) -> bool {
self.to_str()
.is_some_and(rustpython_unicode::identifier::is_python_identifier)
let Some(s) = self.to_str() else { return false };
let mut chars = s.chars();
let is_identifier_start = chars.next().is_some_and(|c| c == '_' || is_xid_start(c));
// a string is not an identifier if it has whitespace or starts with a number
is_identifier_start && chars.all(is_xid_continue)
}
// https://docs.python.org/3/library/stdtypes.html#str.translate

View File

@@ -7,7 +7,6 @@ See also [CPython source code.](https://github.com/python/cpython/blob/50b48572d
use super::{PyStr, PyType, PyTypeRef};
use crate::{
AsObject, Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
builtins::function::PyCell,
class::PyClassImpl,
common::lock::PyRwLock,
function::{FuncArgs, IntoFuncArgs, OptionalArg},
@@ -87,33 +86,27 @@ impl Initializer for PySuper {
return Err(vm.new_runtime_error("super(): no arguments"));
}
// SAFETY: Frame is current and not concurrently mutated.
use rustpython_compiler_core::bytecode::CO_FAST_CELL;
let obj = unsafe { frame.fastlocals() }[0]
.clone()
.and_then(|val| {
// If slot 0 is a merged cell (LOCAL|CELL), extract value from cell
if frame
.code
.localspluskinds
.first()
.is_some_and(|&k| k & CO_FAST_CELL != 0)
{
val.downcast_ref::<PyCell>().and_then(|c| c.get())
.or_else(|| {
if let Some(cell2arg) = frame.code.cell2arg.as_deref() {
cell2arg[..frame.code.cellvars.len()]
.iter()
.enumerate()
.find(|(_, arg_idx)| **arg_idx == 0)
.and_then(|(cell_idx, _)| frame.get_cell_contents(cell_idx))
} else {
Some(val)
None
}
})
.ok_or_else(|| vm.new_runtime_error("super(): arg[0] deleted"))?;
let mut typ = None;
// Search for __class__ in freevars using localspluskinds
let nlocalsplus = frame.code.localspluskinds.len();
let nfrees = frame.code.freevars.len();
let free_start = nlocalsplus - nfrees;
for (i, var) in frame.code.freevars.iter().enumerate() {
if var.as_bytes() == b"__class__" {
let i = frame.code.cellvars.len() + i;
let class = frame
.get_cell_contents(free_start + i)
.get_cell_contents(i)
.ok_or_else(|| vm.new_runtime_error("super(): empty __class__ cell"))?;
typ = Some(class.downcast().map_err(|o| {
vm.new_type_error(format!(

View File

@@ -276,7 +276,6 @@ pub struct TypeSpecializationCache {
pub init: PyAtomicRef<Option<PyFunction>>,
pub getitem: PyAtomicRef<Option<PyFunction>>,
pub getitem_version: AtomicU32,
// Serialize cache writes/invalidation similar to CPython's BEGIN_TYPE_LOCK.
write_lock: PyMutex<()>,
retired: PyRwLock<Vec<PyObjectRef>>,
}
@@ -302,9 +301,6 @@ impl TypeSpecializationCache {
#[inline]
fn swap_init(&self, new_init: Option<PyRef<PyFunction>>, vm: Option<&VirtualMachine>) {
if let Some(vm) = vm {
// Keep replaced refs alive for the currently executing frame, matching
// CPython-style "old pointer remains valid during ongoing execution"
// without accumulating global retired refs.
self.init.swap_to_temporary_refs(new_init, vm);
return;
}
@@ -329,8 +325,6 @@ impl TypeSpecializationCache {
#[inline]
fn invalidate_for_type_modified(&self) {
let _guard = self.write_lock.lock();
// _spec_cache contract: type modification invalidates all cached
// specialization functions.
self.swap_init(None, None);
self.swap_getitem(None, None);
}
@@ -457,9 +451,15 @@ fn is_subtype_with_mro(a_mro: &[PyTypeRef], a: &Py<PyType>, b: &Py<PyType>) -> b
}
impl PyType {
#[inline]
fn with_type_lock<R>(vm: &VirtualMachine, f: impl FnOnce() -> R) -> R {
let _guard = vm.state.type_mutex.lock();
f()
}
/// Assign a fresh version tag. Returns 0 if the version counter has been
/// exhausted, in which case no new cache entries can be created.
pub fn assign_version_tag(&self) -> u32 {
fn assign_version_tag_inner(&self) -> u32 {
let v = self.tp_version_tag.load(Ordering::Acquire);
if v != 0 {
return v;
@@ -467,7 +467,7 @@ impl PyType {
// Assign versions to all direct bases first (MRO invariant).
for base in self.bases.read().iter() {
if base.assign_version_tag() == 0 {
if base.assign_version_tag_inner() == 0 {
return 0;
}
}
@@ -487,8 +487,23 @@ impl PyType {
}
}
pub fn assign_version_tag(&self) -> u32 {
self.assign_version_tag_inner()
}
pub(crate) fn version_for_specialization(&self, vm: &VirtualMachine) -> u32 {
Self::with_type_lock(vm, || {
let version = self.tp_version_tag.load(Ordering::Acquire);
if version == 0 {
self.assign_version_tag_inner()
} else {
version
}
})
}
/// Invalidate this type's version tag and cascade to all subclasses.
pub fn modified(&self) {
fn modified_inner(&self) {
if let Some(ext) = self.heaptype_ext.as_ref() {
ext.specialization_cache.invalidate_for_type_modified();
}
@@ -505,11 +520,15 @@ impl PyType {
let subclasses = self.subclasses.read();
for weak_ref in subclasses.iter() {
if let Some(sub) = weak_ref.upgrade() {
sub.downcast_ref::<PyType>().unwrap().modified();
sub.downcast_ref::<PyType>().unwrap().modified_inner();
}
}
}
pub fn modified(&self) {
self.modified_inner();
}
pub fn new_simple_heap(
name: &str,
base: &Py<PyType>,
@@ -898,6 +917,74 @@ impl PyType {
self.find_name_in_mro(attr_name)
}
/// CPython-style `_PyType_LookupRefAndVersion` equivalent for interned names.
/// Returns the observed lookup result and the type version used for the lookup.
pub(crate) fn lookup_ref_and_version_interned(
&self,
name: &'static PyStrInterned,
vm: &VirtualMachine,
) -> (Option<PyObjectRef>, u32) {
let version = self.tp_version_tag.load(Ordering::Acquire);
if version != 0 {
let idx = type_cache_hash(version, name);
let entry = &TYPE_CACHE[idx];
let name_ptr = name as *const _ as *mut _;
loop {
let seq1 = entry.begin_read();
let entry_version = entry.version.load(Ordering::Acquire);
let type_version = self.tp_version_tag.load(Ordering::Acquire);
if entry_version != type_version
|| !core::ptr::eq(entry.name.load(Ordering::Relaxed), name_ptr)
{
break;
}
let ptr = entry.value.load(Ordering::Acquire);
if ptr.is_null() {
if entry.end_read(seq1) {
return (None, entry_version);
}
continue;
}
if let Some(cloned) = unsafe { PyObject::try_to_owned_from_ptr(ptr) } {
let same_ptr = core::ptr::eq(entry.value.load(Ordering::Relaxed), ptr);
if same_ptr && entry.end_read(seq1) {
return (Some(cloned), entry_version);
}
drop(cloned);
continue;
}
break;
}
}
Self::with_type_lock(vm, || {
let assigned = if self.tp_version_tag.load(Ordering::Acquire) == 0 {
self.assign_version_tag_inner()
} else {
self.tp_version_tag.load(Ordering::Acquire)
};
let result = self.find_name_in_mro_uncached(name);
if assigned != 0
&& !TYPE_CACHE_CLEARING.load(Ordering::Acquire)
&& self.tp_version_tag.load(Ordering::Acquire) == assigned
{
let idx = type_cache_hash(assigned, name);
let entry = &TYPE_CACHE[idx];
let name_ptr = name as *const _ as *mut _;
entry.begin_write();
entry.version.store(0, Ordering::Release);
let new_ptr = result.as_ref().map_or(core::ptr::null_mut(), |found| {
&**found as *const PyObject as *mut _
});
entry.value.store(new_ptr, Ordering::Relaxed);
entry.name.store(name_ptr, Ordering::Relaxed);
entry.version.store(assigned, Ordering::Release);
entry.end_write();
}
(result, assigned)
})
}
/// Cache __init__ for CALL_ALLOC_AND_ENTER_INIT specialization.
/// The cache is valid only when guarded by the type version check.
pub(crate) fn cache_init_for_specialization(
@@ -912,15 +999,17 @@ impl PyType {
if tp_version == 0 {
return false;
}
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
let _guard = ext.specialization_cache.write_lock.lock();
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
ext.specialization_cache.swap_init(Some(init), Some(vm));
true
Self::with_type_lock(vm, || {
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
let _guard = ext.specialization_cache.write_lock.lock();
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
ext.specialization_cache.swap_init(Some(init), Some(vm));
true
})
}
/// Read cached __init__ for CALL_ALLOC_AND_ENTER_INIT specialization.
@@ -954,26 +1043,27 @@ impl PyType {
if tp_version == 0 {
return false;
}
let _guard = ext.specialization_cache.write_lock.lock();
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
let func_version = getitem.get_version_for_current_state();
if func_version == 0 {
return false;
}
ext.specialization_cache
.swap_getitem(Some(getitem), Some(vm));
ext.specialization_cache
.getitem_version
.store(func_version, Ordering::Relaxed);
true
Self::with_type_lock(vm, || {
let _guard = ext.specialization_cache.write_lock.lock();
if self.tp_version_tag.load(Ordering::Acquire) != tp_version {
return false;
}
let func_version = getitem.get_version_for_current_state();
if func_version == 0 {
return false;
}
ext.specialization_cache
.getitem_version
.store(func_version, Ordering::Release);
ext.specialization_cache
.swap_getitem(Some(getitem), Some(vm));
true
})
}
/// Read cached __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization.
pub(crate) fn get_cached_getitem_for_specialization(&self) -> Option<(PyRef<PyFunction>, u32)> {
let ext = self.heaptype_ext.as_ref()?;
// Match CPython check order: pointer (Acquire) then function version.
let getitem = ext
.specialization_cache
.getitem
@@ -981,7 +1071,7 @@ impl PyType {
let cached_version = ext
.specialization_cache
.getitem_version
.load(Ordering::Relaxed);
.load(Ordering::Acquire);
if cached_version == 0 {
return None;
}
@@ -1334,38 +1424,41 @@ impl PyType {
// // TODO: how to uniquely identify the subclasses to remove?
// }
*zelf.bases.write() = bases;
// Recursively update the mros of this class and all subclasses
fn update_mro_recursively(cls: &PyType, vm: &VirtualMachine) -> PyResult<()> {
let mut mro =
PyType::resolve_mro(&cls.bases.read()).map_err(|msg| vm.new_type_error(msg))?;
// Preserve self (mro[0]) when updating MRO
mro.insert(0, cls.mro.read()[0].to_owned());
*cls.mro.write() = mro;
for subclass in cls.subclasses.write().iter() {
let subclass = subclass.upgrade().unwrap();
let subclass: &Py<PyType> = subclass.downcast_ref().unwrap();
update_mro_recursively(subclass, vm)?;
Self::with_type_lock(vm, || {
*zelf.bases.write() = bases;
// Recursively update the mros of this class and all subclasses
fn update_mro_recursively(cls: &PyType, vm: &VirtualMachine) -> PyResult<()> {
let mut mro =
PyType::resolve_mro(&cls.bases.read()).map_err(|msg| vm.new_type_error(msg))?;
// Preserve self (mro[0]) when updating MRO
mro.insert(0, cls.mro.read()[0].to_owned());
*cls.mro.write() = mro;
for subclass in cls.subclasses.write().iter() {
let subclass = subclass.upgrade().unwrap();
let subclass: &Py<PyType> = subclass.downcast_ref().unwrap();
update_mro_recursively(subclass, vm)?;
}
Ok(())
}
update_mro_recursively(zelf, vm)?;
// Invalidate inline caches
zelf.modified_inner();
// TODO: do any old slots need to be cleaned up first?
zelf.init_slots(&vm.ctx);
// Register this type as a subclass of its new bases
let weakref_type = super::PyWeak::static_type();
for base in zelf.bases.read().iter() {
base.subclasses.write().push(
zelf.as_object()
.downgrade_with_weakref_typ_opt(None, weakref_type.to_owned())
.unwrap(),
);
}
Ok(())
}
update_mro_recursively(zelf, vm)?;
// Invalidate inline caches
zelf.modified();
// TODO: do any old slots need to be cleaned up first?
zelf.init_slots(&vm.ctx);
// Register this type as a subclass of its new bases
let weakref_type = super::PyWeak::static_type();
for base in zelf.bases.read().iter() {
base.subclasses.write().push(
zelf.as_object()
.downgrade_with_weakref_typ_opt(None, weakref_type.to_owned())
.unwrap(),
);
}
})?;
Ok(())
}
@@ -1457,20 +1550,31 @@ impl PyType {
)));
}
let mut attrs = self.attributes.write();
// First try __annotate__, in case that's been set explicitly
if let Some(annotate) = attrs.get(identifier!(vm, __annotate__)).cloned() {
let annotate_key = identifier!(vm, __annotate__);
let annotate_func_key = identifier!(vm, __annotate_func__);
let attrs = self.attributes.read();
if let Some(annotate) = attrs.get(annotate_key).cloned() {
return Ok(annotate);
}
// Then try __annotate_func__
if let Some(annotate) = attrs.get(identifier!(vm, __annotate_func__)).cloned() {
// TODO: Apply descriptor tp_descr_get if needed
if let Some(annotate) = attrs.get(annotate_func_key).cloned() {
return Ok(annotate);
}
// Set __annotate_func__ = None and return None
drop(attrs);
let none = vm.ctx.none();
attrs.insert(identifier!(vm, __annotate_func__), none.clone());
Ok(none)
let (result, _prev) = Self::with_type_lock(vm, || {
let mut attrs = self.attributes.write();
if let Some(annotate) = attrs.get(annotate_key).cloned() {
return (annotate, None);
}
if let Some(annotate) = attrs.get(annotate_func_key).cloned() {
return (annotate, None);
}
self.modified_inner();
let prev = attrs.insert(annotate_func_key, none.clone());
(none, prev)
});
Ok(result)
}
#[pygetset(setter)]
@@ -1493,20 +1597,27 @@ impl PyType {
return Err(vm.new_type_error("__annotate__ must be callable or None"));
}
let mut attrs = self.attributes.write();
// Clear cached annotations only when setting to a new callable
if !vm.is_none(&value) {
attrs.swap_remove(identifier!(vm, __annotations_cache__));
}
attrs.insert(identifier!(vm, __annotate_func__), value.clone());
let _prev_values = Self::with_type_lock(vm, || {
self.modified_inner();
let mut attrs = self.attributes.write();
let removed = if !vm.is_none(&value) {
attrs.swap_remove(identifier!(vm, __annotations_cache__))
} else {
None
};
let prev = attrs.insert(identifier!(vm, __annotate_func__), value);
(removed, prev)
});
Ok(())
}
#[pygetset]
fn __annotations__(&self, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
let annotations_key = identifier!(vm, __annotations__);
let annotations_cache_key = identifier!(vm, __annotations_cache__);
let attrs = self.attributes.read();
if let Some(annotations) = attrs.get(identifier!(vm, __annotations__)).cloned() {
if let Some(annotations) = attrs.get(annotations_key).cloned() {
// Ignore the __annotations__ descriptor stored on type itself.
if !annotations.class().is(vm.ctx.types.getset_type) {
if vm.is_none(&annotations)
@@ -1521,8 +1632,7 @@ impl PyType {
)));
}
}
// Then try __annotations_cache__
if let Some(annotations) = attrs.get(identifier!(vm, __annotations_cache__)).cloned() {
if let Some(annotations) = attrs.get(annotations_cache_key).cloned() {
if vm.is_none(&annotations)
|| annotations.class().is(vm.ctx.types.dict_type)
|| self.slots.flags.has_feature(PyTypeFlags::HEAPTYPE)
@@ -1559,11 +1669,21 @@ impl PyType {
vm.ctx.new_dict().into()
};
// Cache the result in __annotations_cache__
self.attributes
.write()
.insert(identifier!(vm, __annotations_cache__), annotations.clone());
Ok(annotations)
let (result, _prev) = Self::with_type_lock(vm, || {
let mut attrs = self.attributes.write();
if let Some(existing) = attrs.get(annotations_key).cloned()
&& !existing.class().is(vm.ctx.types.getset_type)
{
return (existing, None);
}
if let Some(existing) = attrs.get(annotations_cache_key).cloned() {
return (existing, None);
}
self.modified_inner();
let prev = attrs.insert(annotations_cache_key, annotations.clone());
(annotations, prev)
});
Ok(result)
}
#[pygetset(setter)]
@@ -1579,43 +1699,43 @@ impl PyType {
)));
}
let mut attrs = self.attributes.write();
let has_annotations = attrs.contains_key(identifier!(vm, __annotations__));
let _prev_values = Self::with_type_lock(vm, || {
self.modified_inner();
let mut attrs = self.attributes.write();
let has_annotations = attrs.contains_key(identifier!(vm, __annotations__));
match value {
crate::function::PySetterValue::Assign(value) => {
// SET path: store the value (including None)
let key = if has_annotations {
identifier!(vm, __annotations__)
} else {
identifier!(vm, __annotations_cache__)
};
attrs.insert(key, value);
if has_annotations {
attrs.swap_remove(identifier!(vm, __annotations_cache__));
let mut prev = Vec::new();
match value {
crate::function::PySetterValue::Assign(value) => {
let key = if has_annotations {
identifier!(vm, __annotations__)
} else {
identifier!(vm, __annotations_cache__)
};
prev.extend(attrs.insert(key, value));
if has_annotations {
prev.extend(attrs.swap_remove(identifier!(vm, __annotations_cache__)));
}
}
crate::function::PySetterValue::Delete => {
let removed = if has_annotations {
attrs.swap_remove(identifier!(vm, __annotations__))
} else {
attrs.swap_remove(identifier!(vm, __annotations_cache__))
};
if removed.is_none() {
return Err(vm.new_attribute_error("__annotations__"));
}
prev.extend(removed);
if has_annotations {
prev.extend(attrs.swap_remove(identifier!(vm, __annotations_cache__)));
}
}
}
crate::function::PySetterValue::Delete => {
// DELETE path: remove the key
let removed = if has_annotations {
attrs
.swap_remove(identifier!(vm, __annotations__))
.is_some()
} else {
attrs
.swap_remove(identifier!(vm, __annotations_cache__))
.is_some()
};
if !removed {
return Err(vm.new_attribute_error("__annotations__"));
}
if has_annotations {
attrs.swap_remove(identifier!(vm, __annotations_cache__));
}
}
}
attrs.swap_remove(identifier!(vm, __annotate_func__));
attrs.swap_remove(identifier!(vm, __annotate__));
prev.extend(attrs.swap_remove(identifier!(vm, __annotate_func__)));
prev.extend(attrs.swap_remove(identifier!(vm, __annotate__)));
Ok(prev)
})?;
Ok(())
}
@@ -1648,9 +1768,13 @@ impl PyType {
#[pygetset(setter)]
fn set___module__(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
self.check_set_special_type_attr(identifier!(vm, __module__), vm)?;
let mut attributes = self.attributes.write();
attributes.swap_remove(identifier!(vm, __firstlineno__));
attributes.insert(identifier!(vm, __module__), value);
let _prev_values = Self::with_type_lock(vm, || {
self.modified_inner();
let mut attributes = self.attributes.write();
let removed = attributes.swap_remove(identifier!(vm, __firstlineno__));
let prev = attributes.insert(identifier!(vm, __module__), value);
(removed, prev)
});
Ok(())
}
@@ -1772,24 +1896,26 @@ impl PyType {
value: PySetterValue<PyTupleRef>,
vm: &VirtualMachine,
) -> PyResult<()> {
let key = identifier!(vm, __type_params__);
match value {
PySetterValue::Assign(ref val) => {
let key = identifier!(vm, __type_params__);
PySetterValue::Assign(val) => {
self.check_set_special_type_attr(key, vm)?;
self.modified();
self.attributes.write().insert(key, val.clone().into());
let _prev_value = Self::with_type_lock(vm, || {
self.modified_inner();
self.attributes.write().insert(key, val.into())
});
}
PySetterValue::Delete => {
// For delete, we still need to check if the type is immutable
if self.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) {
return Err(vm.new_type_error(format!(
"cannot delete '__type_params__' attribute of immutable type '{}'",
self.slot_name()
)));
}
let key = identifier!(vm, __type_params__);
self.modified();
self.attributes.write().shift_remove(&key);
let _prev_value = Self::with_type_lock(vm, || {
self.modified_inner();
self.attributes.write().shift_remove(&key)
});
}
}
Ok(())
@@ -1868,16 +1994,14 @@ impl Constructor for PyType {
};
let qualname = dict
.get_item_opt(identifier!(vm, __qualname__), vm)?
.pop_item(identifier!(vm, __qualname__).as_object(), vm)?
.map(|obj| downcast_qualname(obj, vm))
.transpose()?
.unwrap_or_else(|| {
// If __qualname__ is not provided, we can use the name as default
name.clone().into_wtf8()
});
let mut attributes = dict.to_attributes(vm);
attributes.shift_remove(identifier!(vm, __qualname__));
// Check __doc__ for surrogates - raises UnicodeEncodeError during type creation
if let Some(doc) = attributes.get(identifier!(vm, __doc__))
@@ -2135,29 +2259,15 @@ impl Constructor for PyType {
}
}
{
let mut attrs = typ.attributes.write();
if let Some(cell) = attrs.get(identifier!(vm, __classcell__)) {
let cell = PyCellRef::try_from_object(vm, cell.clone()).map_err(|_| {
vm.new_type_error(format!(
"__classcell__ must be a nonlocal cell, not {}",
cell.class().name()
))
})?;
cell.set(Some(typ.clone().into()));
attrs.shift_remove(identifier!(vm, __classcell__));
}
if let Some(cell) = attrs.get(identifier!(vm, __classdictcell__)) {
let cell = PyCellRef::try_from_object(vm, cell.clone()).map_err(|_| {
vm.new_type_error(format!(
"__classdictcell__ must be a nonlocal cell, not {}",
cell.class().name()
))
})?;
cell.set(Some(dict.clone().into()));
attrs.shift_remove(identifier!(vm, __classdictcell__));
}
}
if let Some(cell) = typ.attributes.write().get(identifier!(vm, __classcell__)) {
let cell = PyCellRef::try_from_object(vm, cell.clone()).map_err(|_| {
vm.new_type_error(format!(
"__classcell__ must be a nonlocal cell, not {}",
cell.class().name()
))
})?;
cell.set(Some(typ.clone().into()));
};
// All *classes* should have a dict. Exceptions are *instances* of
// classes that define __slots__ and instances of built-in classes
@@ -2413,10 +2523,12 @@ impl Py<PyType> {
// Check if we can set this special type attribute
self.check_set_special_type_attr(identifier!(vm, __doc__), vm)?;
// Set the __doc__ in the type's dict
self.attributes
.write()
.insert(identifier!(vm, __doc__), value);
let _prev_value = PyType::with_type_lock(vm, || {
self.modified_inner();
self.attributes
.write()
.insert(identifier!(vm, __doc__), value)
});
Ok(())
}
@@ -2478,23 +2590,29 @@ impl SetAttr for PyType {
}
let assign = value.is_assign();
// Invalidate inline caches before modifying attributes.
// This ensures other threads see the version invalidation before
// any attribute changes, preventing use-after-free of cached descriptors.
zelf.modified();
// Drop old value OUTSIDE the type lock to avoid deadlock:
// dropping may trigger weakref callbacks → method calls →
// LOAD_ATTR specialization → version_for_specialization → type lock.
let _prev_value = Self::with_type_lock(vm, || {
// Invalidate inline caches before modifying attributes.
// This ensures other threads see the version invalidation before
// any attribute changes, preventing use-after-free of cached descriptors.
zelf.modified_inner();
if let PySetterValue::Assign(value) = value {
zelf.attributes.write().insert(attr_name, value);
} else {
let prev_value = zelf.attributes.write().shift_remove(attr_name); // TODO: swap_remove applicable?
if prev_value.is_none() {
return Err(vm.new_attribute_error(format!(
"type object '{}' has no attribute '{}'",
zelf.name(),
attr_name,
)));
if let PySetterValue::Assign(value) = value {
Ok(zelf.attributes.write().insert(attr_name, value))
} else {
let prev_value = zelf.attributes.write().shift_remove(attr_name); // TODO: swap_remove applicable?
if prev_value.is_none() {
return Err(vm.new_attribute_error(format!(
"type object '{}' has no attribute '{}'",
zelf.name(),
attr_name,
)));
}
Ok(prev_value)
}
}
})?;
if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") {
if assign {

View File

@@ -237,18 +237,6 @@ impl PyBytesInner {
vm.new_overflow_error("bytes object is too large to make repr")
}
pub(crate) fn warn_on_str(message: &'static str, vm: &VirtualMachine) -> PyResult<()> {
if vm.state.config.settings.bytes_warning > 0 {
crate::stdlib::_warnings::warn(
vm.ctx.exceptions.bytes_warning,
message.to_owned(),
1,
vm,
)?;
}
Ok(())
}
pub fn repr_with_name(&self, class_name: &str, vm: &VirtualMachine) -> PyResult<String> {
const DECORATION_LEN: isize = 2 + 3; // 2 for (), 3 for b"" => bytearray(b"")
let escape = crate::literal::escape::AsciiEscape::new_repr(&self.elements);

View File

@@ -2397,22 +2397,6 @@ pub(super) mod types {
.downcast::<crate::builtins::PyTuple>()
{
let location_tup_len = location_tuple.len();
match location_tup_len {
4 | 6 => {}
5 => {
return Err(vm.new_type_error(
"end_offset must be provided when end_lineno is provided".to_owned(),
));
}
_ => {
return Err(vm.new_type_error(format!(
"function takes exactly 4 or 6 arguments ({} given)",
location_tup_len
)));
}
}
for (i, &attr) in [
"filename",
"lineno",

File diff suppressed because it is too large Load Diff

View File

@@ -457,20 +457,12 @@ impl GcState {
}
// Step 3: Subtract internal references
// Pre-compute referent pointers once per object so that both step 3
// (subtract refs) and step 4 (BFS reachability) see the same snapshot
// of each object's children. Without this, a dict whose write lock is
// held during one traversal but not the other can yield inconsistent
// results, causing live objects to be incorrectly collected.
let mut referents_map: std::collections::HashMap<GcPtr, Vec<NonNull<PyObject>>> =
std::collections::HashMap::new();
for &ptr in &collecting {
let obj = unsafe { ptr.0.as_ref() };
if obj.strong_count() == 0 {
continue;
}
let referent_ptrs = unsafe { obj.gc_get_referent_ptrs() };
referents_map.insert(ptr, referent_ptrs.clone());
for child_ptr in referent_ptrs {
let gc_ptr = GcPtr(child_ptr);
if collecting.contains(&gc_ptr)
@@ -495,13 +487,7 @@ impl GcState {
while let Some(ptr) = worklist.pop() {
let obj = unsafe { ptr.0.as_ref() };
if obj.is_gc_tracked() {
// Reuse the pre-computed referent pointers from step 3.
// For objects that were skipped in step 3 (strong_count was 0),
// compute them now as a fallback.
let referent_ptrs = referents_map
.get(&ptr)
.cloned()
.unwrap_or_else(|| unsafe { obj.gc_get_referent_ptrs() });
let referent_ptrs = unsafe { obj.gc_get_referent_ptrs() };
for child_ptr in referent_ptrs {
let gc_ptr = GcPtr(child_ptr);
if collecting.contains(&gc_ptr) && reachable.insert(gc_ptr) {

Some files were not shown because too many files have changed in this diff Show More