Added _csv module and implemented reader function.

This commit is contained in:
kluid
2019-10-09 12:27:41 +09:00
parent ad8a182a4c
commit 6f7d76f74b
4 changed files with 218 additions and 0 deletions

47
Cargo.lock generated
View File

@@ -179,6 +179,17 @@ dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bstr"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "build_const"
version = "0.2.1"
@@ -305,6 +316,26 @@ dependencies = [
"subtle 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "csv"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.100 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "csv-core"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "diff"
version = "0.1.11"
@@ -668,6 +699,9 @@ dependencies = [
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "miniz_oxide"
@@ -1041,6 +1075,14 @@ dependencies = [
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-automata"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.5.6"
@@ -1170,6 +1212,7 @@ dependencies = [
"chrono 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)",
"crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"exitcode 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2036,6 +2079,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum blake2b_simd 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "5850aeee1552f495dd0250014cf64b82b7c8879a89d83b33bbdace2cc4f63182"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6d4dc3af3ee2e12f3e5d224e5e1e3d73668abbeb69e566d361f7d5563a4fdf09"
"checksum bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245"
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
@@ -2053,6 +2097,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
"checksum crypto-mac 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4434400df11d95d556bac068ddfedd482915eb18fe8bea89bc80b6e4b1c179e5"
"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d"
"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c"
"checksum diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "3c2b69f912779fbb121ceb775d74d51e915af17aaebc38d28a592843a2dd0a3a"
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
@@ -2142,6 +2188,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"
"checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
"checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
"checksum regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9"
"checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"

View File

@@ -64,6 +64,7 @@ libc = "0.2"
nix = "0.14.1"
wtf8 = "0.0.3"
arr_macro = "0.1.2"
csv = "1.1.1"
flame = { version = "0.2", optional = true }
flamer = { version = "0.3", optional = true }

168
vm/src/stdlib/csv.rs Normal file
View File

@@ -0,0 +1,168 @@
use std::cell::RefCell;
use std::fmt::{self, Debug, Formatter};
use csv as rust_csv;
use itertools::join;
use crate::obj::objiter;
use crate::obj::objstr::PyString;
use crate::obj::objtype::PyClassRef;
use crate::pyobject::{IntoPyObject, TryFromObject, TypeProtocol};
use crate::pyobject::{PyClassImpl, PyIterable, PyObjectRef, PyRef, PyResult, PyValue};
use crate::types::create_type;
use crate::VirtualMachine;
#[repr(i32)]
pub enum QuoteStyle {
QuoteMinimal,
QuoteAll,
QuoteNonnumeric,
QuoteNone,
}
pub fn build_reader(iterable: PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
Reader::new(iterable).into_ref(vm).into_pyobject(vm)
}
fn into_strings(iterable: &PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult<Vec<String>> {
iterable
.iter(vm)?
.map(|py_obj_ref| {
match_class!(match py_obj_ref? {
py_str @ PyString => Ok(py_str.as_str().trim().to_owned()),
obj => {
let msg = format!(
"iterator should return strings, not {} (did you open the file in text mode?)",
obj.class().name
);
Err(vm.new_type_error(msg))
}
})
})
.collect::<PyResult<Vec<String>>>()
}
type MemIO = std::io::Cursor<Vec<u8>>;
#[allow(dead_code)]
enum ReadState {
PyIter(PyIterable<PyObjectRef>),
CsvIter(rust_csv::StringRecordsIntoIter<MemIO>),
}
impl ReadState {
fn new(iter: PyIterable) -> Self {
ReadState::PyIter(iter)
}
fn cast_to_reader(&mut self, vm: &VirtualMachine) -> PyResult<()> {
if let ReadState::PyIter(ref iterable) = self {
let lines = into_strings(iterable, vm)?;
let contents = join(lines, "\n");
let bytes = Vec::from(contents.as_bytes());
let reader = MemIO::new(bytes);
let csv_iter = rust_csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(reader)
.into_records();
*self = ReadState::CsvIter(csv_iter);
}
Ok(())
}
}
#[pyclass(name = "Reader")]
struct Reader {
state: RefCell<ReadState>,
}
impl Debug for Reader {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "_csv.reader")
}
}
impl PyValue for Reader {
fn class(vm: &VirtualMachine) -> PyClassRef {
vm.class("_csv", "Reader")
}
}
impl Reader {
fn new(iter: PyIterable<PyObjectRef>) -> Self {
let state = RefCell::new(ReadState::new(iter));
Reader { state }
}
}
#[pyimpl]
impl Reader {
#[pymethod(name = "__iter__")]
fn iter(this: PyRef<Self>, vm: &VirtualMachine) -> PyResult {
this.state.borrow_mut().cast_to_reader(vm)?;
this.into_pyobject(vm)
}
#[pymethod(name = "__next__")]
fn next(&self, vm: &VirtualMachine) -> PyResult {
let mut state = self.state.borrow_mut();
state.cast_to_reader(vm)?;
if let ReadState::CsvIter(ref mut reader) = &mut *state {
if let Some(row) = reader.next() {
match row {
Ok(records) => {
let iter = records
.into_iter()
.map(|bytes| bytes.into_pyobject(vm))
.collect::<PyResult<Vec<_>>>()?;
Ok(vm.ctx.new_list(iter))
}
Err(_) => {
let msg = String::from("Decode Error");
let decode_error = vm.new_unicode_decode_error(msg);
Err(decode_error)
}
}
} else {
Err(objiter::new_stop_iteration(vm))
}
} else {
unreachable!()
}
}
}
fn csv_reader(fp: PyObjectRef, vm: &VirtualMachine) -> PyResult {
if let Ok(iterable) = PyIterable::<PyObjectRef>::try_from_object(vm, fp) {
build_reader(iterable, vm)
} else {
Err(vm.new_type_error("argument 1 must be an iterator".to_string()))
}
}
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
let ctx = &vm.ctx;
let reader_type = Reader::make_class(ctx);
let error = create_type(
"Error",
&ctx.types.type_type,
&ctx.exceptions.exception_type,
);
py_module!(vm, "_csv", {
"reader" => ctx.new_rustfunc(csv_reader),
"Reader" => reader_type,
"Error" => error,
// constants
"QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::QuoteMinimal as i32),
"QUOTE_ALL" => ctx.new_int(QuoteStyle::QuoteAll as i32),
"QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::QuoteNonnumeric as i32),
"QUOTE_NONE" => ctx.new_int(QuoteStyle::QuoteNone as i32),
})
}

View File

@@ -4,6 +4,7 @@ mod ast;
mod binascii;
mod codecs;
mod collections;
mod csv;
mod dis;
mod errno;
mod functools;
@@ -60,6 +61,7 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
"dis".to_string() => Box::new(dis::make_module),
"_codecs".to_string() => Box::new(codecs::make_module),
"_collections".to_string() => Box::new(collections::make_module),
"_csv".to_string() => Box::new(csv::make_module),
"_functools".to_string() => Box::new(functools::make_module),
"errno".to_string() => Box::new(errno::make_module),
"hashlib".to_string() => Box::new(hashlib::make_module),