mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Added _csv module and implemented reader function.
This commit is contained in:
47
Cargo.lock
generated
47
Cargo.lock
generated
@@ -179,6 +179,17 @@ dependencies = [
|
||||
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "build_const"
|
||||
version = "0.2.1"
|
||||
@@ -305,6 +316,26 @@ dependencies = [
|
||||
"subtle 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.11"
|
||||
@@ -668,6 +699,9 @@ dependencies = [
|
||||
name = "memchr"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
@@ -1041,6 +1075,14 @@ dependencies = [
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.5.6"
|
||||
@@ -1170,6 +1212,7 @@ dependencies = [
|
||||
"chrono 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"exitcode 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -2036,6 +2079,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum blake2b_simd 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "5850aeee1552f495dd0250014cf64b82b7c8879a89d83b33bbdace2cc4f63182"
|
||||
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
|
||||
"checksum block-padding 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6d4dc3af3ee2e12f3e5d224e5e1e3d73668abbeb69e566d361f7d5563a4fdf09"
|
||||
"checksum bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245"
|
||||
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
|
||||
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
|
||||
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
|
||||
@@ -2053,6 +2097,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
|
||||
"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
|
||||
"checksum crypto-mac 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4434400df11d95d556bac068ddfedd482915eb18fe8bea89bc80b6e4b1c179e5"
|
||||
"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d"
|
||||
"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c"
|
||||
"checksum diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "3c2b69f912779fbb121ceb775d74d51e915af17aaebc38d28a592843a2dd0a3a"
|
||||
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
||||
"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
|
||||
@@ -2142,6 +2188,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"
|
||||
"checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
|
||||
"checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
|
||||
"checksum regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9"
|
||||
"checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
|
||||
"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
|
||||
"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
|
||||
|
||||
@@ -64,6 +64,7 @@ libc = "0.2"
|
||||
nix = "0.14.1"
|
||||
wtf8 = "0.0.3"
|
||||
arr_macro = "0.1.2"
|
||||
csv = "1.1.1"
|
||||
|
||||
flame = { version = "0.2", optional = true }
|
||||
flamer = { version = "0.3", optional = true }
|
||||
|
||||
168
vm/src/stdlib/csv.rs
Normal file
168
vm/src/stdlib/csv.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use std::cell::RefCell;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
|
||||
use csv as rust_csv;
|
||||
use itertools::join;
|
||||
|
||||
use crate::obj::objiter;
|
||||
use crate::obj::objstr::PyString;
|
||||
use crate::obj::objtype::PyClassRef;
|
||||
use crate::pyobject::{IntoPyObject, TryFromObject, TypeProtocol};
|
||||
use crate::pyobject::{PyClassImpl, PyIterable, PyObjectRef, PyRef, PyResult, PyValue};
|
||||
use crate::types::create_type;
|
||||
use crate::VirtualMachine;
|
||||
|
||||
#[repr(i32)]
|
||||
pub enum QuoteStyle {
|
||||
QuoteMinimal,
|
||||
QuoteAll,
|
||||
QuoteNonnumeric,
|
||||
QuoteNone,
|
||||
}
|
||||
|
||||
pub fn build_reader(iterable: PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
|
||||
Reader::new(iterable).into_ref(vm).into_pyobject(vm)
|
||||
}
|
||||
|
||||
fn into_strings(iterable: &PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult<Vec<String>> {
|
||||
iterable
|
||||
.iter(vm)?
|
||||
.map(|py_obj_ref| {
|
||||
match_class!(match py_obj_ref? {
|
||||
py_str @ PyString => Ok(py_str.as_str().trim().to_owned()),
|
||||
obj => {
|
||||
let msg = format!(
|
||||
"iterator should return strings, not {} (did you open the file in text mode?)",
|
||||
obj.class().name
|
||||
);
|
||||
Err(vm.new_type_error(msg))
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<PyResult<Vec<String>>>()
|
||||
}
|
||||
|
||||
type MemIO = std::io::Cursor<Vec<u8>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
enum ReadState {
|
||||
PyIter(PyIterable<PyObjectRef>),
|
||||
CsvIter(rust_csv::StringRecordsIntoIter<MemIO>),
|
||||
}
|
||||
|
||||
impl ReadState {
|
||||
fn new(iter: PyIterable) -> Self {
|
||||
ReadState::PyIter(iter)
|
||||
}
|
||||
|
||||
fn cast_to_reader(&mut self, vm: &VirtualMachine) -> PyResult<()> {
|
||||
if let ReadState::PyIter(ref iterable) = self {
|
||||
let lines = into_strings(iterable, vm)?;
|
||||
let contents = join(lines, "\n");
|
||||
|
||||
let bytes = Vec::from(contents.as_bytes());
|
||||
let reader = MemIO::new(bytes);
|
||||
|
||||
let csv_iter = rust_csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader)
|
||||
.into_records();
|
||||
|
||||
*self = ReadState::CsvIter(csv_iter);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(name = "Reader")]
|
||||
struct Reader {
|
||||
state: RefCell<ReadState>,
|
||||
}
|
||||
|
||||
impl Debug for Reader {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "_csv.reader")
|
||||
}
|
||||
}
|
||||
|
||||
impl PyValue for Reader {
|
||||
fn class(vm: &VirtualMachine) -> PyClassRef {
|
||||
vm.class("_csv", "Reader")
|
||||
}
|
||||
}
|
||||
|
||||
impl Reader {
|
||||
fn new(iter: PyIterable<PyObjectRef>) -> Self {
|
||||
let state = RefCell::new(ReadState::new(iter));
|
||||
Reader { state }
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl Reader {
|
||||
#[pymethod(name = "__iter__")]
|
||||
fn iter(this: PyRef<Self>, vm: &VirtualMachine) -> PyResult {
|
||||
this.state.borrow_mut().cast_to_reader(vm)?;
|
||||
this.into_pyobject(vm)
|
||||
}
|
||||
|
||||
#[pymethod(name = "__next__")]
|
||||
fn next(&self, vm: &VirtualMachine) -> PyResult {
|
||||
let mut state = self.state.borrow_mut();
|
||||
state.cast_to_reader(vm)?;
|
||||
|
||||
if let ReadState::CsvIter(ref mut reader) = &mut *state {
|
||||
if let Some(row) = reader.next() {
|
||||
match row {
|
||||
Ok(records) => {
|
||||
let iter = records
|
||||
.into_iter()
|
||||
.map(|bytes| bytes.into_pyobject(vm))
|
||||
.collect::<PyResult<Vec<_>>>()?;
|
||||
Ok(vm.ctx.new_list(iter))
|
||||
}
|
||||
Err(_) => {
|
||||
let msg = String::from("Decode Error");
|
||||
let decode_error = vm.new_unicode_decode_error(msg);
|
||||
Err(decode_error)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(objiter::new_stop_iteration(vm))
|
||||
}
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn csv_reader(fp: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
||||
if let Ok(iterable) = PyIterable::<PyObjectRef>::try_from_object(vm, fp) {
|
||||
build_reader(iterable, vm)
|
||||
} else {
|
||||
Err(vm.new_type_error("argument 1 must be an iterator".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
let ctx = &vm.ctx;
|
||||
|
||||
let reader_type = Reader::make_class(ctx);
|
||||
|
||||
let error = create_type(
|
||||
"Error",
|
||||
&ctx.types.type_type,
|
||||
&ctx.exceptions.exception_type,
|
||||
);
|
||||
|
||||
py_module!(vm, "_csv", {
|
||||
"reader" => ctx.new_rustfunc(csv_reader),
|
||||
"Reader" => reader_type,
|
||||
"Error" => error,
|
||||
// constants
|
||||
"QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::QuoteMinimal as i32),
|
||||
"QUOTE_ALL" => ctx.new_int(QuoteStyle::QuoteAll as i32),
|
||||
"QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::QuoteNonnumeric as i32),
|
||||
"QUOTE_NONE" => ctx.new_int(QuoteStyle::QuoteNone as i32),
|
||||
})
|
||||
}
|
||||
@@ -4,6 +4,7 @@ mod ast;
|
||||
mod binascii;
|
||||
mod codecs;
|
||||
mod collections;
|
||||
mod csv;
|
||||
mod dis;
|
||||
mod errno;
|
||||
mod functools;
|
||||
@@ -60,6 +61,7 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
|
||||
"dis".to_string() => Box::new(dis::make_module),
|
||||
"_codecs".to_string() => Box::new(codecs::make_module),
|
||||
"_collections".to_string() => Box::new(collections::make_module),
|
||||
"_csv".to_string() => Box::new(csv::make_module),
|
||||
"_functools".to_string() => Box::new(functools::make_module),
|
||||
"errno".to_string() => Box::new(errno::make_module),
|
||||
"hashlib".to_string() => Box::new(hashlib::make_module),
|
||||
|
||||
Reference in New Issue
Block a user