From ea1f72e92dc8399d2faea73ac59feda3bf6d7fc2 Mon Sep 17 00:00:00 2001
From: Daniel Chiquito <daniel.chiquito@gmail.com>
Date: Fri, 9 Feb 2024 16:12:47 -0500
Subject: [PATCH] Replace rust-cpython with pyo3 in benches

Update the actual benchmark harnesses.

Because the internal APIs previously used are no longer available, I
opted to use `compile` and `exec` from within the CPython context to
compile and execute code. There's probably more overhead to that than
the internal API had, but that overhead should be consistent per
benchmark.

If anyone cares about hyperoptimizing benchmarks then they can optimize
the harness as well.
---
 benches/execution.rs       |  84 ++++++++---------------
 benches/microbenchmarks.rs | 133 +++++++++++++++----------------------
 2 files changed, 82 insertions(+), 135 deletions(-)
diff --git a/benches/execution.rs b/benches/execution.rs
index 14fadfc2a..de81cd080 100644
--- a/benches/execution.rs
+++ b/benches/execution.rs
@@ -1,30 +1,32 @@
 use criterion::measurement::WallTime;
 use criterion::{
-    criterion_group, criterion_main, Bencher, BenchmarkGroup, BenchmarkId, Criterion, Throughput,
+    black_box, criterion_group, criterion_main, Bencher, BenchmarkGroup, BenchmarkId, Criterion,
+    Throughput,
 };
 use rustpython_compiler::Mode;
 use rustpython_parser::ast;
 use rustpython_parser::Parse;
-use rustpython_vm::{Interpreter, PyResult};
+use rustpython_vm::{Interpreter, PyResult, Settings};
 use std::collections::HashMap;
 use std::path::Path;
 
 fn bench_cpython_code(b: &mut Bencher, source: &str) {
-    let gil = cpython::Python::acquire_gil();
-    let python = gil.python();
-
-    b.iter(|| {
-        let res: cpython::PyResult<()> = python.run(source, None, None);
-        if let Err(e) = res {
-            e.print(python);
-            panic!("Error running source")
-        }
-    });
+    pyo3::Python::with_gil(|py| {
+        b.iter(|| {
+            let module =
+                pyo3::types::PyModule::from_code(py, source, "", "").expect("Error running source");
+            black_box(module);
+        })
+    })
 }
 
 fn bench_rustpy_code(b: &mut Bencher, name: &str, source: &str) {
     // NOTE: Take long time.
-    Interpreter::without_stdlib(Default::default()).enter(|vm| {
+    let mut settings = Settings::default();
+    settings.path_list.push("Lib/".to_string());
+    settings.dont_write_bytecode = true;
+    settings.no_user_site = true;
+    Interpreter::without_stdlib(settings).enter(|vm| {
         // Note: bench_cpython is both compiling and executing the code.
         // As such we compile the code in the benchmark loop as well.
         b.iter(|| {
@@ -36,16 +38,12 @@ fn bench_rustpy_code(b: &mut Bencher, name: &str, source: &str) {
     })
 }
 
-pub fn benchmark_file_execution(
-    group: &mut BenchmarkGroup<WallTime>,
-    name: &str,
-    contents: &String,
-) {
+pub fn benchmark_file_execution(group: &mut BenchmarkGroup<WallTime>, name: &str, contents: &str) {
     group.bench_function(BenchmarkId::new(name, "cpython"), |b| {
-        bench_cpython_code(b, &contents)
+        bench_cpython_code(b, contents)
     });
     group.bench_function(BenchmarkId::new(name, "rustpython"), |b| {
-        bench_rustpy_code(b, name, &contents)
+        bench_rustpy_code(b, name, contents)
     });
 }
 
@@ -55,44 +53,20 @@ pub fn benchmark_file_parsing(group: &mut BenchmarkGroup<WallTime>, name: &str,
         b.iter(|| ast::Suite::parse(contents, name).unwrap())
     });
     group.bench_function(BenchmarkId::new("cpython", name), |b| {
-        let gil = cpython::Python::acquire_gil();
-        let py = gil.python();
-
-        let code = std::ffi::CString::new(contents).unwrap();
-        let fname = cpython::PyString::new(py, name);
-
-        b.iter(|| parse_program_cpython(py, &code, &fname))
+        pyo3::Python::with_gil(|py| {
+            let builtins =
+                pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+            let compile = builtins.getattr("compile").expect("no compile in builtins");
+            b.iter(|| {
+                let x = compile
+                    .call1((contents, name, "exec"))
+                    .expect("Failed to parse code");
+                black_box(x);
+            })
+        })
     });
 }
 
-fn parse_program_cpython(
-    py: cpython::Python<'_>,
-    code: &std::ffi::CStr,
-    fname: &cpython::PyString,
-) {
-    extern "C" {
-        fn PyArena_New() -> *mut python3_sys::PyArena;
-        fn PyArena_Free(arena: *mut python3_sys::PyArena);
-    }
-    use cpython::PythonObject;
-    let fname = fname.as_object();
-    unsafe {
-        let arena = PyArena_New();
-        assert!(!arena.is_null());
-        let ret = python3_sys::PyParser_ASTFromStringObject(
-            code.as_ptr() as _,
-            fname.as_ptr(),
-            python3_sys::Py_file_input,
-            std::ptr::null_mut(),
-            arena,
-        );
-        if ret.is_null() {
-            cpython::PyErr::fetch(py).print(py);
-        }
-        PyArena_Free(arena);
-    }
-}
-
 pub fn benchmark_pystone(group: &mut BenchmarkGroup<WallTime>, contents: String) {
     // Default is 50_000. This takes a while, so reduce it to 30k.
     for idx in (10_000..=30_000).step_by(10_000) {
diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs
index c30d86722..befdc63fd 100644
--- a/benches/microbenchmarks.rs
+++ b/benches/microbenchmarks.rs
@@ -5,7 +5,7 @@ use criterion::{
 use rustpython_compiler::Mode;
 use rustpython_vm::{AsObject, Interpreter, PyResult, Settings};
 use std::{
-    ffi, fs, io,
+    fs, io,
     path::{Path, PathBuf},
 };
 
@@ -36,95 +36,68 @@ pub struct MicroBenchmark {
 }
 
 fn bench_cpython_code(group: &mut BenchmarkGroup<WallTime>, bench: &MicroBenchmark) {
-    let gil = cpython::Python::acquire_gil();
-    let py = gil.python();
+    pyo3::Python::with_gil(|py| {
+        let setup_name = format!("{}_setup", bench.name);
+        let setup_code = cpy_compile_code(py, &bench.setup, &setup_name).unwrap();
 
-    let setup_code = ffi::CString::new(&*bench.setup).unwrap();
-    let setup_name = ffi::CString::new(format!("{}_setup", bench.name)).unwrap();
-    let setup_code = cpy_compile_code(py, &setup_code, &setup_name).unwrap();
+        let code = cpy_compile_code(py, &bench.code, &bench.name).unwrap();
 
-    let code = ffi::CString::new(&*bench.code).unwrap();
-    let name = ffi::CString::new(&*bench.name).unwrap();
-    let code = cpy_compile_code(py, &code, &name).unwrap();
+        // Grab the exec function in advance so we don't have lookups in the hot code
+        let builtins =
+            pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+        let exec = builtins.getattr("exec").expect("no exec in builtins");
 
-    let bench_func = |(globals, locals): &mut (cpython::PyDict, cpython::PyDict)| {
-        let res = cpy_run_code(py, &code, globals, locals);
-        if let Err(e) = res {
-            e.print(py);
-            panic!("Error running microbenchmark")
-        }
-    };
+        let bench_func = |(globals, locals): &mut (&pyo3::types::PyDict, &pyo3::types::PyDict)| {
+            let res = exec.call((code, &*globals, &*locals), None);
+            if let Err(e) = res {
+                e.print(py);
+                panic!("Error running microbenchmark")
+            }
+        };
 
-    let bench_setup = |iterations| {
-        let globals = cpython::PyDict::new(py);
-        // setup the __builtins__ attribute - no other way to do this (other than manually) as far
-        // as I can tell
-        let _ = py.run("", Some(&globals), None);
-        let locals = cpython::PyDict::new(py);
-        if let Some(idx) = iterations {
-            globals.set_item(py, "ITERATIONS", idx).unwrap();
-        }
+        let bench_setup = |iterations| {
+            let globals = pyo3::types::PyDict::new(py);
+            let locals = pyo3::types::PyDict::new(py);
+            if let Some(idx) = iterations {
+                globals.set_item("ITERATIONS", idx).unwrap();
+            }
 
-        let res = cpy_run_code(py, &setup_code, &globals, &locals);
-        if let Err(e) = res {
-            e.print(py);
-            panic!("Error running microbenchmark setup code")
-        }
-        (globals, locals)
-    };
+            let res = exec.call((setup_code, &globals, &locals), None);
+            if let Err(e) = res {
+                e.print(py);
+                panic!("Error running microbenchmark setup code")
+            }
+            (globals, locals)
+        };
 
-    if bench.iterate {
-        for idx in (100..=1_000).step_by(200) {
-            group.throughput(Throughput::Elements(idx as u64));
-            group.bench_with_input(BenchmarkId::new("cpython", &bench.name), &idx, |b, idx| {
-                b.iter_batched_ref(
-                    || bench_setup(Some(*idx)),
-                    bench_func,
-                    BatchSize::LargeInput,
-                );
+        if bench.iterate {
+            for idx in (100..=1_000).step_by(200) {
+                group.throughput(Throughput::Elements(idx as u64));
+                group.bench_with_input(BenchmarkId::new("cpython", &bench.name), &idx, |b, idx| {
+                    b.iter_batched_ref(
+                        || bench_setup(Some(*idx)),
+                        bench_func,
+                        BatchSize::LargeInput,
+                    );
+                });
+            }
+        } else {
+            group.bench_function(BenchmarkId::new("cpython", &bench.name), move |b| {
+                b.iter_batched_ref(|| bench_setup(None), bench_func, BatchSize::LargeInput);
             });
         }
-    } else {
-        group.bench_function(BenchmarkId::new("cpython", &bench.name), move |b| {
-            b.iter_batched_ref(|| bench_setup(None), bench_func, BatchSize::LargeInput);
-        });
-    }
+    })
 }
 
-unsafe fn cpy_res(
-    py: cpython::Python<'_>,
-    x: *mut python3_sys::PyObject,
-) -> cpython::PyResult<cpython::PyObject> {
-    cpython::PyObject::from_owned_ptr_opt(py, x).ok_or_else(|| cpython::PyErr::fetch(py))
-}
-
-fn cpy_compile_code(
-    py: cpython::Python<'_>,
-    s: &ffi::CStr,
-    fname: &ffi::CStr,
-) -> cpython::PyResult<cpython::PyObject> {
-    unsafe {
-        let res =
-            python3_sys::Py_CompileString(s.as_ptr(), fname.as_ptr(), python3_sys::Py_file_input);
-        cpy_res(py, res)
-    }
-}
-
-fn cpy_run_code(
-    py: cpython::Python<'_>,
-    code: &cpython::PyObject,
-    locals: &cpython::PyDict,
-    globals: &cpython::PyDict,
-) -> cpython::PyResult<cpython::PyObject> {
-    use cpython::PythonObject;
-    unsafe {
-        let res = python3_sys::PyEval_EvalCode(
-            code.as_ptr(),
-            locals.as_object().as_ptr(),
-            globals.as_object().as_ptr(),
-        );
-        cpy_res(py, res)
-    }
+fn cpy_compile_code<'a>(
+    py: pyo3::Python<'a>,
+    code: &str,
+    name: &str,
+) -> pyo3::PyResult<&'a pyo3::types::PyCode> {
+    let builtins =
+        pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+    let compile = builtins.getattr("compile").expect("no compile in builtins");
+    compile.call1((code, name, "exec"))?.extract()
 }
 
 fn bench_rustpy_code(group: &mut BenchmarkGroup<WallTime>, bench: &MicroBenchmark) {