From 6f7d76f74b8280cbd277b996bf9f54a6728fc28a Mon Sep 17 00:00:00 2001 From: kluid Date: Wed, 9 Oct 2019 12:27:41 +0900 Subject: [PATCH] Added _csv module and implemented reader function. --- Cargo.lock | 47 ++++++++++++ vm/Cargo.toml | 1 + vm/src/stdlib/csv.rs | 168 +++++++++++++++++++++++++++++++++++++++++++ vm/src/stdlib/mod.rs | 2 + 4 files changed, 218 insertions(+) create mode 100644 vm/src/stdlib/csv.rs diff --git a/Cargo.lock b/Cargo.lock index d78a65543..1d57f57fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,6 +179,17 @@ dependencies = [ "byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bstr" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "build_const" version = "0.2.1" @@ -305,6 +316,26 @@ dependencies = [ "subtle 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "csv" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "csv-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "diff" version = "0.1.11" @@ -668,6 +699,9 @@ dependencies = [ name = "memchr" version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "miniz_oxide" @@ -1041,6 +1075,14 @@ dependencies = [ "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "regex-automata" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex-syntax" version = "0.5.6" @@ -1170,6 +1212,7 @@ dependencies = [ "chrono 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "exitcode 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2036,6 +2079,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum blake2b_simd 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "5850aeee1552f495dd0250014cf64b82b7c8879a89d83b33bbdace2cc4f63182" "checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" "checksum block-padding 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6d4dc3af3ee2e12f3e5d224e5e1e3d73668abbeb69e566d361f7d5563a4fdf09" +"checksum bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708" "checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" @@ -2053,6 +2097,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015" "checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" "checksum crypto-mac 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4434400df11d95d556bac068ddfedd482915eb18fe8bea89bc80b6e4b1c179e5" +"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d" +"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c" "checksum diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "3c2b69f912779fbb121ceb775d74d51e915af17aaebc38d28a592843a2dd0a3a" "checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" "checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" @@ -2142,6 +2188,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" "checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" "checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd" +"checksum regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9" "checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" "checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716" "checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 2ded048f5..352107fe5 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -64,6 +64,7 @@ libc = "0.2" nix = "0.14.1" wtf8 = "0.0.3" arr_macro = "0.1.2" +csv = "1.1.1" flame = { version = "0.2", optional = true } flamer = { version = "0.3", optional = true } diff --git a/vm/src/stdlib/csv.rs b/vm/src/stdlib/csv.rs new file mode 100644 index 000000000..c00fb8890 --- /dev/null +++ b/vm/src/stdlib/csv.rs @@ -0,0 +1,168 @@ +use std::cell::RefCell; +use std::fmt::{self, Debug, Formatter}; + +use csv as rust_csv; +use itertools::join; + +use crate::obj::objiter; +use crate::obj::objstr::PyString; +use crate::obj::objtype::PyClassRef; +use crate::pyobject::{IntoPyObject, TryFromObject, TypeProtocol}; +use crate::pyobject::{PyClassImpl, PyIterable, PyObjectRef, PyRef, PyResult, PyValue}; +use crate::types::create_type; +use crate::VirtualMachine; + +#[repr(i32)] +pub enum QuoteStyle { + QuoteMinimal, + QuoteAll, + QuoteNonnumeric, + QuoteNone, +} + +pub fn build_reader(iterable: PyIterable, vm: &VirtualMachine) -> PyResult { + Reader::new(iterable).into_ref(vm).into_pyobject(vm) +} + +fn into_strings(iterable: &PyIterable, vm: &VirtualMachine) -> PyResult> { + iterable + .iter(vm)? + .map(|py_obj_ref| { + match_class!(match py_obj_ref? { + py_str @ PyString => Ok(py_str.as_str().trim().to_owned()), + obj => { + let msg = format!( + "iterator should return strings, not {} (did you open the file in text mode?)", + obj.class().name + ); + Err(vm.new_type_error(msg)) + } + }) + }) + .collect::>>() +} + +type MemIO = std::io::Cursor>; + +#[allow(dead_code)] +enum ReadState { + PyIter(PyIterable), + CsvIter(rust_csv::StringRecordsIntoIter), +} + +impl ReadState { + fn new(iter: PyIterable) -> Self { + ReadState::PyIter(iter) + } + + fn cast_to_reader(&mut self, vm: &VirtualMachine) -> PyResult<()> { + if let ReadState::PyIter(ref iterable) = self { + let lines = into_strings(iterable, vm)?; + let contents = join(lines, "\n"); + + let bytes = Vec::from(contents.as_bytes()); + let reader = MemIO::new(bytes); + + let csv_iter = rust_csv::ReaderBuilder::new() + .has_headers(false) + .from_reader(reader) + .into_records(); + + *self = ReadState::CsvIter(csv_iter); + } + Ok(()) + } +} + +#[pyclass(name = "Reader")] +struct Reader { + state: RefCell, +} + +impl Debug for Reader { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "_csv.reader") + } +} + +impl PyValue for Reader { + fn class(vm: &VirtualMachine) -> PyClassRef { + vm.class("_csv", "Reader") + } +} + +impl Reader { + fn new(iter: PyIterable) -> Self { + let state = RefCell::new(ReadState::new(iter)); + Reader { state } + } +} + +#[pyimpl] +impl Reader { + #[pymethod(name = "__iter__")] + fn iter(this: PyRef, vm: &VirtualMachine) -> PyResult { + this.state.borrow_mut().cast_to_reader(vm)?; + this.into_pyobject(vm) + } + + #[pymethod(name = "__next__")] + fn next(&self, vm: &VirtualMachine) -> PyResult { + let mut state = self.state.borrow_mut(); + state.cast_to_reader(vm)?; + + if let ReadState::CsvIter(ref mut reader) = &mut *state { + if let Some(row) = reader.next() { + match row { + Ok(records) => { + let iter = records + .into_iter() + .map(|bytes| bytes.into_pyobject(vm)) + .collect::>>()?; + Ok(vm.ctx.new_list(iter)) + } + Err(_) => { + let msg = String::from("Decode Error"); + let decode_error = vm.new_unicode_decode_error(msg); + Err(decode_error) + } + } + } else { + Err(objiter::new_stop_iteration(vm)) + } + } else { + unreachable!() + } + } +} + +fn csv_reader(fp: PyObjectRef, vm: &VirtualMachine) -> PyResult { + if let Ok(iterable) = PyIterable::::try_from_object(vm, fp) { + build_reader(iterable, vm) + } else { + Err(vm.new_type_error("argument 1 must be an iterator".to_string())) + } +} + +pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { + let ctx = &vm.ctx; + + let reader_type = Reader::make_class(ctx); + + let error = create_type( + "Error", + &ctx.types.type_type, + &ctx.exceptions.exception_type, + ); + + py_module!(vm, "_csv", { + "reader" => ctx.new_rustfunc(csv_reader), + "Reader" => reader_type, + "Error" => error, + // constants + "QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::QuoteMinimal as i32), + "QUOTE_ALL" => ctx.new_int(QuoteStyle::QuoteAll as i32), + "QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::QuoteNonnumeric as i32), + "QUOTE_NONE" => ctx.new_int(QuoteStyle::QuoteNone as i32), + }) +} diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index 1d3b0afce..140345af7 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -4,6 +4,7 @@ mod ast; mod binascii; mod codecs; mod collections; +mod csv; mod dis; mod errno; mod functools; @@ -60,6 +61,7 @@ pub fn get_module_inits() -> HashMap { "dis".to_string() => Box::new(dis::make_module), "_codecs".to_string() => Box::new(codecs::make_module), "_collections".to_string() => Box::new(collections::make_module), + "_csv".to_string() => Box::new(csv::make_module), "_functools".to_string() => Box::new(functools::make_module), "errno".to_string() => Box::new(errno::make_module), "hashlib".to_string() => Box::new(hashlib::make_module),