mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Merge pull request #3278 from youknowone/pymodule2
_csv, unicodedate uses #[pymodule]
This commit is contained in:
@@ -1,317 +1,327 @@
|
||||
use crate::common::lock::PyMutex;
|
||||
use crate::vm::{
|
||||
builtins::{PyStr, PyStrRef},
|
||||
function::{ArgIterable, ArgumentError, FromArgs, FuncArgs},
|
||||
match_class, named_function,
|
||||
protocol::{PyIter, PyIterReturn},
|
||||
py_module,
|
||||
slots::{IteratorIterable, SlotIterator},
|
||||
types::create_simple_type,
|
||||
PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
|
||||
VirtualMachine,
|
||||
};
|
||||
use itertools::{self, Itertools};
|
||||
use std::fmt;
|
||||
use crate::vm::{PyClassImpl, PyObjectRef, VirtualMachine};
|
||||
|
||||
#[repr(i32)]
|
||||
pub enum QuoteStyle {
|
||||
Minimal = 0,
|
||||
All = 1,
|
||||
Nonnumeric = 2,
|
||||
None = 3,
|
||||
pub(crate) fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
let ctx = &vm.ctx;
|
||||
_csv::Reader::make_class(ctx);
|
||||
_csv::Writer::make_class(ctx);
|
||||
_csv::make_module(vm)
|
||||
}
|
||||
|
||||
struct FormatOptions {
|
||||
delimiter: u8,
|
||||
quotechar: u8,
|
||||
}
|
||||
#[pymodule]
|
||||
mod _csv {
|
||||
use crate::common::lock::PyMutex;
|
||||
use crate::vm::{
|
||||
builtins::{PyStr, PyStrRef, PyTypeRef},
|
||||
function::{ArgIterable, ArgumentError, FromArgs, FuncArgs},
|
||||
match_class,
|
||||
protocol::{PyIter, PyIterReturn},
|
||||
slots::{IteratorIterable, SlotIterator},
|
||||
types::create_simple_type,
|
||||
PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, VirtualMachine,
|
||||
};
|
||||
use itertools::{self, Itertools};
|
||||
use std::fmt;
|
||||
|
||||
impl FromArgs for FormatOptions {
|
||||
fn from_args(vm: &VirtualMachine, args: &mut FuncArgs) -> Result<Self, ArgumentError> {
|
||||
let delimiter = if let Some(delimiter) = args.kwargs.remove("delimiter") {
|
||||
PyStrRef::try_from_object(vm, delimiter)?
|
||||
.as_str()
|
||||
.bytes()
|
||||
.exactly_one()
|
||||
.map_err(|_| {
|
||||
let msg = r#""delimiter" must be a 1-character string"#;
|
||||
vm.new_type_error(msg.to_owned())
|
||||
})?
|
||||
} else {
|
||||
b','
|
||||
};
|
||||
#[pyattr]
|
||||
const QUOTE_MINIMAL: i32 = QuoteStyle::Minimal as i32;
|
||||
#[pyattr]
|
||||
const QUOTE_ALL: i32 = QuoteStyle::All as i32;
|
||||
#[pyattr]
|
||||
const QUOTE_NONNUMERIC: i32 = QuoteStyle::Nonnumeric as i32;
|
||||
#[pyattr]
|
||||
const QUOTE_NONE: i32 = QuoteStyle::None as i32;
|
||||
|
||||
let quotechar = if let Some(quotechar) = args.kwargs.remove("quotechar") {
|
||||
PyStrRef::try_from_object(vm, quotechar)?
|
||||
.as_str()
|
||||
.bytes()
|
||||
.exactly_one()
|
||||
.map_err(|_| {
|
||||
let msg = r#""quotechar" must be a 1-character string"#;
|
||||
vm.new_type_error(msg.to_owned())
|
||||
})?
|
||||
} else {
|
||||
b'"'
|
||||
};
|
||||
#[pyattr(name = "Error")]
|
||||
fn error(vm: &VirtualMachine) -> PyTypeRef {
|
||||
create_simple_type("Error", &vm.ctx.exceptions.exception_type)
|
||||
}
|
||||
|
||||
Ok(FormatOptions {
|
||||
delimiter,
|
||||
quotechar,
|
||||
#[pyfunction]
|
||||
fn reader(
|
||||
iter: PyIter,
|
||||
options: FormatOptions,
|
||||
// TODO: handle quote style, etc
|
||||
_rest: FuncArgs,
|
||||
_vm: &VirtualMachine,
|
||||
) -> PyResult<Reader> {
|
||||
Ok(Reader {
|
||||
iter,
|
||||
state: PyMutex::new(ReadState {
|
||||
buffer: vec![0; 1024],
|
||||
output_ends: vec![0; 16],
|
||||
reader: options.to_reader(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatOptions {
|
||||
fn to_reader(&self) -> csv_core::Reader {
|
||||
csv_core::ReaderBuilder::new()
|
||||
.delimiter(self.delimiter)
|
||||
.quote(self.quotechar)
|
||||
.terminator(csv_core::Terminator::CRLF)
|
||||
.build()
|
||||
}
|
||||
fn to_writer(&self) -> csv_core::Writer {
|
||||
csv_core::WriterBuilder::new()
|
||||
.delimiter(self.delimiter)
|
||||
.quote(self.quotechar)
|
||||
.terminator(csv_core::Terminator::CRLF)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadState {
|
||||
buffer: Vec<u8>,
|
||||
output_ends: Vec<usize>,
|
||||
reader: csv_core::Reader,
|
||||
}
|
||||
|
||||
#[pyclass(module = "_csv", name = "reader")]
|
||||
#[derive(PyValue)]
|
||||
struct Reader {
|
||||
iter: PyIter,
|
||||
state: PyMutex<ReadState>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Reader {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "_csv.reader")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl(with(SlotIterator))]
|
||||
impl Reader {}
|
||||
impl IteratorIterable for Reader {}
|
||||
impl SlotIterator for Reader {
|
||||
fn next(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
|
||||
let string = match zelf.iter.next(vm)? {
|
||||
PyIterReturn::Return(obj) => obj,
|
||||
PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)),
|
||||
#[pyfunction]
|
||||
fn writer(
|
||||
file: PyObjectRef,
|
||||
options: FormatOptions,
|
||||
// TODO: handle quote style, etc
|
||||
_rest: FuncArgs,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<Writer> {
|
||||
let write = match vm.get_attribute_opt(file.clone(), "write")? {
|
||||
Some(write_meth) => write_meth,
|
||||
None if vm.is_callable(&file) => file,
|
||||
None => {
|
||||
return Err(vm.new_type_error("argument 1 must have a \"write\" method".to_owned()))
|
||||
}
|
||||
};
|
||||
let string = string.downcast::<PyStr>().map_err(|obj| {
|
||||
vm.new_type_error(format!(
|
||||
|
||||
Ok(Writer {
|
||||
write,
|
||||
state: PyMutex::new(WriteState {
|
||||
buffer: vec![0; 1024],
|
||||
writer: options.to_writer(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resize_buf<T: num_traits::PrimInt>(buf: &mut Vec<T>) {
|
||||
let new_size = buf.len() * 2;
|
||||
buf.resize(new_size, T::zero());
|
||||
}
|
||||
|
||||
#[repr(i32)]
|
||||
pub enum QuoteStyle {
|
||||
Minimal = 0,
|
||||
All = 1,
|
||||
Nonnumeric = 2,
|
||||
None = 3,
|
||||
}
|
||||
|
||||
struct FormatOptions {
|
||||
delimiter: u8,
|
||||
quotechar: u8,
|
||||
}
|
||||
|
||||
impl FromArgs for FormatOptions {
|
||||
fn from_args(vm: &VirtualMachine, args: &mut FuncArgs) -> Result<Self, ArgumentError> {
|
||||
let delimiter = if let Some(delimiter) = args.kwargs.remove("delimiter") {
|
||||
PyStrRef::try_from_object(vm, delimiter)?
|
||||
.as_str()
|
||||
.bytes()
|
||||
.exactly_one()
|
||||
.map_err(|_| {
|
||||
let msg = r#""delimiter" must be a 1-character string"#;
|
||||
vm.new_type_error(msg.to_owned())
|
||||
})?
|
||||
} else {
|
||||
b','
|
||||
};
|
||||
|
||||
let quotechar = if let Some(quotechar) = args.kwargs.remove("quotechar") {
|
||||
PyStrRef::try_from_object(vm, quotechar)?
|
||||
.as_str()
|
||||
.bytes()
|
||||
.exactly_one()
|
||||
.map_err(|_| {
|
||||
let msg = r#""quotechar" must be a 1-character string"#;
|
||||
vm.new_type_error(msg.to_owned())
|
||||
})?
|
||||
} else {
|
||||
b'"'
|
||||
};
|
||||
|
||||
Ok(FormatOptions {
|
||||
delimiter,
|
||||
quotechar,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatOptions {
|
||||
fn to_reader(&self) -> csv_core::Reader {
|
||||
csv_core::ReaderBuilder::new()
|
||||
.delimiter(self.delimiter)
|
||||
.quote(self.quotechar)
|
||||
.terminator(csv_core::Terminator::CRLF)
|
||||
.build()
|
||||
}
|
||||
fn to_writer(&self) -> csv_core::Writer {
|
||||
csv_core::WriterBuilder::new()
|
||||
.delimiter(self.delimiter)
|
||||
.quote(self.quotechar)
|
||||
.terminator(csv_core::Terminator::CRLF)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadState {
|
||||
buffer: Vec<u8>,
|
||||
output_ends: Vec<usize>,
|
||||
reader: csv_core::Reader,
|
||||
}
|
||||
|
||||
#[pyclass(noattr, module = "_csv", name = "reader")]
|
||||
#[derive(PyValue)]
|
||||
pub(super) struct Reader {
|
||||
iter: PyIter,
|
||||
state: PyMutex<ReadState>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Reader {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "_csv.reader")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl(with(SlotIterator))]
|
||||
impl Reader {}
|
||||
impl IteratorIterable for Reader {}
|
||||
impl SlotIterator for Reader {
|
||||
fn next(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
|
||||
let string = match zelf.iter.next(vm)? {
|
||||
PyIterReturn::Return(obj) => obj,
|
||||
PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)),
|
||||
};
|
||||
let string = string.downcast::<PyStr>().map_err(|obj| {
|
||||
vm.new_type_error(format!(
|
||||
"iterator should return strings, not {} (the file should be opened in text mode)",
|
||||
obj.class().name()
|
||||
))
|
||||
})?;
|
||||
let input = string.as_str().as_bytes();
|
||||
})?;
|
||||
let input = string.as_str().as_bytes();
|
||||
|
||||
let mut state = zelf.state.lock();
|
||||
let ReadState {
|
||||
buffer,
|
||||
output_ends,
|
||||
reader,
|
||||
} = &mut *state;
|
||||
|
||||
let mut input_offset = 0;
|
||||
let mut output_offset = 0;
|
||||
let mut output_ends_offset = 0;
|
||||
|
||||
loop {
|
||||
let (res, nread, nwritten, nends) = reader.read_record(
|
||||
&input[input_offset..],
|
||||
&mut buffer[output_offset..],
|
||||
&mut output_ends[output_ends_offset..],
|
||||
);
|
||||
input_offset += nread;
|
||||
output_offset += nwritten;
|
||||
output_ends_offset += nends;
|
||||
match res {
|
||||
csv_core::ReadRecordResult::InputEmpty => {}
|
||||
csv_core::ReadRecordResult::OutputFull => resize_buf(buffer),
|
||||
csv_core::ReadRecordResult::OutputEndsFull => resize_buf(output_ends),
|
||||
csv_core::ReadRecordResult::Record => break,
|
||||
csv_core::ReadRecordResult::End => return Ok(PyIterReturn::StopIteration(None)),
|
||||
}
|
||||
}
|
||||
let rest = &input[input_offset..];
|
||||
if !rest.iter().all(|&c| matches!(c, b'\r' | b'\n')) {
|
||||
return Err(vm.new_value_error(
|
||||
"new-line character seen in unquoted field - \
|
||||
do you need to open the file in universal-newline mode?"
|
||||
.to_owned(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut prev_end = 0;
|
||||
let out = output_ends[..output_ends_offset]
|
||||
.iter()
|
||||
.map(|&end| {
|
||||
let range = prev_end..end;
|
||||
prev_end = end;
|
||||
let s = std::str::from_utf8(&buffer[range])
|
||||
// not sure if this is possible - the input was all strings
|
||||
.map_err(|_e| vm.new_unicode_decode_error("csv not utf8".to_owned()))?;
|
||||
Ok(vm.ctx.new_str(s).into())
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
Ok(PyIterReturn::Return(vm.ctx.new_list(out).into()))
|
||||
}
|
||||
}
|
||||
|
||||
fn _csv_reader(
|
||||
iter: PyIter,
|
||||
options: FormatOptions,
|
||||
// TODO: handle quote style, etc
|
||||
_rest: FuncArgs,
|
||||
_vm: &VirtualMachine,
|
||||
) -> PyResult<Reader> {
|
||||
Ok(Reader {
|
||||
iter,
|
||||
state: PyMutex::new(ReadState {
|
||||
buffer: vec![0; 1024],
|
||||
output_ends: vec![0; 16],
|
||||
reader: options.to_reader(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
struct WriteState {
|
||||
buffer: Vec<u8>,
|
||||
writer: csv_core::Writer,
|
||||
}
|
||||
|
||||
#[pyclass(module = "_csv", name = "writer")]
|
||||
#[derive(PyValue)]
|
||||
struct Writer {
|
||||
write: PyObjectRef,
|
||||
state: PyMutex<WriteState>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Writer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "_csv.writer")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl Writer {
|
||||
#[pymethod]
|
||||
fn writerow(&self, row: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
||||
let mut state = self.state.lock();
|
||||
let WriteState { buffer, writer } = &mut *state;
|
||||
|
||||
let mut buffer_offset = 0;
|
||||
|
||||
macro_rules! handle_res {
|
||||
($x:expr) => {{
|
||||
let (res, nwritten) = $x;
|
||||
buffer_offset += nwritten;
|
||||
match res {
|
||||
csv_core::WriteResult::InputEmpty => break,
|
||||
csv_core::WriteResult::OutputFull => resize_buf(buffer),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
let row = ArgIterable::try_from_object(vm, row)?;
|
||||
for field in row.iter(vm)? {
|
||||
let field: PyObjectRef = field?;
|
||||
let stringified;
|
||||
let data: &[u8] = match_class!(match field {
|
||||
ref s @ PyStr => s.as_str().as_bytes(),
|
||||
crate::builtins::PyNone => b"",
|
||||
ref obj => {
|
||||
stringified = vm.to_str(obj)?;
|
||||
stringified.as_str().as_bytes()
|
||||
}
|
||||
});
|
||||
let mut state = zelf.state.lock();
|
||||
let ReadState {
|
||||
buffer,
|
||||
output_ends,
|
||||
reader,
|
||||
} = &mut *state;
|
||||
|
||||
let mut input_offset = 0;
|
||||
let mut output_offset = 0;
|
||||
let mut output_ends_offset = 0;
|
||||
|
||||
loop {
|
||||
let (res, nread, nwritten) =
|
||||
writer.field(&data[input_offset..], &mut buffer[buffer_offset..]);
|
||||
let (res, nread, nwritten, nends) = reader.read_record(
|
||||
&input[input_offset..],
|
||||
&mut buffer[output_offset..],
|
||||
&mut output_ends[output_ends_offset..],
|
||||
);
|
||||
input_offset += nread;
|
||||
handle_res!((res, nwritten));
|
||||
output_offset += nwritten;
|
||||
output_ends_offset += nends;
|
||||
match res {
|
||||
csv_core::ReadRecordResult::InputEmpty => {}
|
||||
csv_core::ReadRecordResult::OutputFull => resize_buf(buffer),
|
||||
csv_core::ReadRecordResult::OutputEndsFull => resize_buf(output_ends),
|
||||
csv_core::ReadRecordResult::Record => break,
|
||||
csv_core::ReadRecordResult::End => {
|
||||
return Ok(PyIterReturn::StopIteration(None))
|
||||
}
|
||||
}
|
||||
}
|
||||
let rest = &input[input_offset..];
|
||||
if !rest.iter().all(|&c| matches!(c, b'\r' | b'\n')) {
|
||||
return Err(vm.new_value_error(
|
||||
"new-line character seen in unquoted field - \
|
||||
do you need to open the file in universal-newline mode?"
|
||||
.to_owned(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut prev_end = 0;
|
||||
let out = output_ends[..output_ends_offset]
|
||||
.iter()
|
||||
.map(|&end| {
|
||||
let range = prev_end..end;
|
||||
prev_end = end;
|
||||
let s = std::str::from_utf8(&buffer[range])
|
||||
// not sure if this is possible - the input was all strings
|
||||
.map_err(|_e| vm.new_unicode_decode_error("csv not utf8".to_owned()))?;
|
||||
Ok(vm.ctx.new_str(s).into())
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
Ok(PyIterReturn::Return(vm.ctx.new_list(out).into()))
|
||||
}
|
||||
}
|
||||
|
||||
struct WriteState {
|
||||
buffer: Vec<u8>,
|
||||
writer: csv_core::Writer,
|
||||
}
|
||||
|
||||
#[pyclass(noattr, module = "_csv", name = "writer")]
|
||||
#[derive(PyValue)]
|
||||
pub(super) struct Writer {
|
||||
write: PyObjectRef,
|
||||
state: PyMutex<WriteState>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Writer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "_csv.writer")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl Writer {
|
||||
#[pymethod]
|
||||
fn writerow(&self, row: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
||||
let mut state = self.state.lock();
|
||||
let WriteState { buffer, writer } = &mut *state;
|
||||
|
||||
let mut buffer_offset = 0;
|
||||
|
||||
macro_rules! handle_res {
|
||||
($x:expr) => {{
|
||||
let (res, nwritten) = $x;
|
||||
buffer_offset += nwritten;
|
||||
match res {
|
||||
csv_core::WriteResult::InputEmpty => break,
|
||||
csv_core::WriteResult::OutputFull => resize_buf(buffer),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
let row = ArgIterable::try_from_object(vm, row)?;
|
||||
for field in row.iter(vm)? {
|
||||
let field: PyObjectRef = field?;
|
||||
let stringified;
|
||||
let data: &[u8] = match_class!(match field {
|
||||
ref s @ PyStr => s.as_str().as_bytes(),
|
||||
crate::builtins::PyNone => b"",
|
||||
ref obj => {
|
||||
stringified = vm.to_str(obj)?;
|
||||
stringified.as_str().as_bytes()
|
||||
}
|
||||
});
|
||||
|
||||
let mut input_offset = 0;
|
||||
|
||||
loop {
|
||||
let (res, nread, nwritten) =
|
||||
writer.field(&data[input_offset..], &mut buffer[buffer_offset..]);
|
||||
input_offset += nread;
|
||||
handle_res!((res, nwritten));
|
||||
}
|
||||
|
||||
loop {
|
||||
handle_res!(writer.delimiter(&mut buffer[buffer_offset..]));
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
handle_res!(writer.delimiter(&mut buffer[buffer_offset..]));
|
||||
handle_res!(writer.terminator(&mut buffer[buffer_offset..]));
|
||||
}
|
||||
|
||||
let s = std::str::from_utf8(&buffer[..buffer_offset])
|
||||
.map_err(|_| vm.new_unicode_decode_error("csv not utf8".to_owned()))?;
|
||||
|
||||
vm.invoke(&self.write, (s.to_owned(),))
|
||||
}
|
||||
|
||||
loop {
|
||||
handle_res!(writer.terminator(&mut buffer[buffer_offset..]));
|
||||
#[pymethod]
|
||||
fn writerows(&self, rows: ArgIterable, vm: &VirtualMachine) -> PyResult<()> {
|
||||
for row in rows.iter(vm)? {
|
||||
self.writerow(row?, vm)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let s = std::str::from_utf8(&buffer[..buffer_offset])
|
||||
.map_err(|_| vm.new_unicode_decode_error("csv not utf8".to_owned()))?;
|
||||
|
||||
vm.invoke(&self.write, (s.to_owned(),))
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn writerows(&self, rows: ArgIterable, vm: &VirtualMachine) -> PyResult<()> {
|
||||
for row in rows.iter(vm)? {
|
||||
self.writerow(row?, vm)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn _csv_writer(
|
||||
file: PyObjectRef,
|
||||
options: FormatOptions,
|
||||
// TODO: handle quote style, etc
|
||||
_rest: FuncArgs,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<Writer> {
|
||||
let write = match vm.get_attribute_opt(file.clone(), "write")? {
|
||||
Some(write_meth) => write_meth,
|
||||
None if vm.is_callable(&file) => file,
|
||||
None => return Err(vm.new_type_error("argument 1 must have a \"write\" method".to_owned())),
|
||||
};
|
||||
|
||||
Ok(Writer {
|
||||
write,
|
||||
state: PyMutex::new(WriteState {
|
||||
buffer: vec![0; 1024],
|
||||
writer: options.to_writer(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resize_buf<T: num_traits::PrimInt>(buf: &mut Vec<T>) {
|
||||
let new_size = buf.len() * 2;
|
||||
buf.resize(new_size, T::zero());
|
||||
}
|
||||
|
||||
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
let ctx = &vm.ctx;
|
||||
|
||||
Reader::make_class(ctx);
|
||||
Writer::make_class(ctx);
|
||||
|
||||
let error = create_simple_type("Error", &ctx.exceptions.exception_type);
|
||||
|
||||
py_module!(vm, "_csv", {
|
||||
"reader" => named_function!(ctx, _csv, reader),
|
||||
"writer" => named_function!(ctx, _csv, writer),
|
||||
"Error" => error,
|
||||
// constants
|
||||
"QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::Minimal as i32),
|
||||
"QUOTE_ALL" => ctx.new_int(QuoteStyle::All as i32),
|
||||
"QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::Nonnumeric as i32),
|
||||
"QUOTE_NONE" => ctx.new_int(QuoteStyle::None as i32),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,156 +1,160 @@
|
||||
/* Access to the unicode database.
|
||||
See also: https://docs.python.org/3/library/unicodedata.html
|
||||
*/
|
||||
|
||||
use crate::vm::{
|
||||
builtins::PyStrRef, extend_module, function::OptionalArg, py_module, PyClassImpl, PyObject,
|
||||
PyObjectRef, PyResult, PyValue, VirtualMachine,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use unic_char_property::EnumeratedCharProperty;
|
||||
use unic_normal::StrNormalForm;
|
||||
use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION};
|
||||
use unic_ucd_bidi::BidiClass;
|
||||
use unic_ucd_category::GeneralCategory;
|
||||
use crate::vm::{PyObjectRef, PyValue, VirtualMachine};
|
||||
|
||||
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
|
||||
let ctx = &vm.ctx;
|
||||
let module = unicodedata::make_module(vm);
|
||||
|
||||
let ucd_class = PyUCD::make_class(ctx);
|
||||
|
||||
let ucd = PyObject::new(PyUCD::default(), ucd_class.clone(), None);
|
||||
|
||||
let ucd_3_2_0 = PyObject::new(
|
||||
PyUCD {
|
||||
unic_version: UnicodeVersion {
|
||||
major: 3,
|
||||
minor: 2,
|
||||
micro: 0,
|
||||
},
|
||||
},
|
||||
ucd_class.clone(),
|
||||
None,
|
||||
);
|
||||
|
||||
let module = py_module!(vm, "unicodedata", {
|
||||
"UCD" => ucd_class,
|
||||
"ucd_3_2_0" => ucd_3_2_0,
|
||||
// we do unidata_version here because the getter tries to do PyUCD::class() before
|
||||
// the module is in the VM
|
||||
"unidata_version" => ctx.new_str(PyUCD::default().unic_version.to_string()),
|
||||
});
|
||||
let ucd = unicodedata::Ucd::new(unic_ucd_age::UNICODE_VERSION).into_ref(vm);
|
||||
|
||||
for attr in ["category", "lookup", "name", "bidirectional", "normalize"]
|
||||
.iter()
|
||||
.copied()
|
||||
{
|
||||
extend_module!(vm, &module, {
|
||||
attr => vm.get_attribute(ucd.clone(), attr).unwrap(),
|
||||
crate::vm::extend_module!(vm, &module, {
|
||||
attr => vm.get_attribute(ucd.clone().into(), attr).unwrap(),
|
||||
});
|
||||
}
|
||||
|
||||
module
|
||||
}
|
||||
|
||||
#[pyclass(module = "unicodedata", name = "UCD")]
|
||||
#[derive(Debug, PyValue)]
|
||||
struct PyUCD {
|
||||
unic_version: UnicodeVersion,
|
||||
}
|
||||
#[pymodule]
|
||||
mod unicodedata {
|
||||
use crate::vm::{
|
||||
builtins::PyStrRef, function::OptionalArg, PyObjectRef, PyRef, PyResult, PyValue,
|
||||
VirtualMachine,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use unic_char_property::EnumeratedCharProperty;
|
||||
use unic_normal::StrNormalForm;
|
||||
use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION};
|
||||
use unic_ucd_bidi::BidiClass;
|
||||
use unic_ucd_category::GeneralCategory;
|
||||
|
||||
impl Default for PyUCD {
|
||||
#[inline(always)]
|
||||
fn default() -> Self {
|
||||
PyUCD {
|
||||
unic_version: UNICODE_VERSION,
|
||||
#[pyattr]
|
||||
#[pyclass(name = "UCD")]
|
||||
#[derive(Debug, PyValue)]
|
||||
pub(super) struct Ucd {
|
||||
unic_version: UnicodeVersion,
|
||||
}
|
||||
|
||||
impl Ucd {
|
||||
pub fn new(unic_version: UnicodeVersion) -> Self {
|
||||
Self { unic_version }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl PyUCD {
|
||||
fn check_age(&self, c: char) -> bool {
|
||||
Age::of(c).map_or(false, |age| age.actual() <= self.unic_version)
|
||||
}
|
||||
|
||||
fn extract_char(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<Option<char>> {
|
||||
let c = character.as_str().chars().exactly_one().map_err(|_| {
|
||||
vm.new_type_error("argument must be an unicode character, not str".to_owned())
|
||||
})?;
|
||||
|
||||
if self.check_age(c) {
|
||||
Ok(Some(c))
|
||||
} else {
|
||||
Ok(None)
|
||||
fn check_age(&self, c: char) -> bool {
|
||||
Age::of(c).map_or(false, |age| age.actual() <= self.unic_version)
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn category(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
Ok(self
|
||||
.extract_char(character, vm)?
|
||||
.map_or(GeneralCategory::Unassigned, GeneralCategory::of)
|
||||
.abbr_name()
|
||||
.to_owned())
|
||||
}
|
||||
fn extract_char(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<Option<char>> {
|
||||
let c = character.as_str().chars().exactly_one().map_err(|_| {
|
||||
vm.new_type_error("argument must be an unicode character, not str".to_owned())
|
||||
})?;
|
||||
|
||||
#[pymethod]
|
||||
fn lookup(&self, name: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
if let Some(character) = unicode_names2::character(name.as_str()) {
|
||||
if self.check_age(character) {
|
||||
return Ok(character.to_string());
|
||||
if self.check_age(c) {
|
||||
Ok(Some(c))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
Err(vm.new_lookup_error(format!("undefined character name '{}'", name)))
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn name(
|
||||
&self,
|
||||
character: PyStrRef,
|
||||
default: OptionalArg<PyObjectRef>,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult {
|
||||
let c = self.extract_char(character, vm)?;
|
||||
#[pyimpl]
|
||||
impl Ucd {
|
||||
#[pymethod]
|
||||
fn category(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
Ok(self
|
||||
.extract_char(character, vm)?
|
||||
.map_or(GeneralCategory::Unassigned, GeneralCategory::of)
|
||||
.abbr_name()
|
||||
.to_owned())
|
||||
}
|
||||
|
||||
if let Some(c) = c {
|
||||
if self.check_age(c) {
|
||||
if let Some(name) = unicode_names2::name(c) {
|
||||
return Ok(vm.ctx.new_str(name.to_string()).into());
|
||||
#[pymethod]
|
||||
fn lookup(&self, name: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
if let Some(character) = unicode_names2::character(name.as_str()) {
|
||||
if self.check_age(character) {
|
||||
return Ok(character.to_string());
|
||||
}
|
||||
}
|
||||
Err(vm.new_lookup_error(format!("undefined character name '{}'", name)))
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn name(
|
||||
&self,
|
||||
character: PyStrRef,
|
||||
default: OptionalArg<PyObjectRef>,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult {
|
||||
let c = self.extract_char(character, vm)?;
|
||||
|
||||
if let Some(c) = c {
|
||||
if self.check_age(c) {
|
||||
if let Some(name) = unicode_names2::name(c) {
|
||||
return Ok(vm.ctx.new_str(name.to_string()).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
match default {
|
||||
OptionalArg::Present(obj) => Ok(obj),
|
||||
OptionalArg::Missing => {
|
||||
Err(vm.new_value_error("character name not found!".to_owned()))
|
||||
}
|
||||
}
|
||||
}
|
||||
match default {
|
||||
OptionalArg::Present(obj) => Ok(obj),
|
||||
OptionalArg::Missing => Err(vm.new_value_error("character name not found!".to_owned())),
|
||||
|
||||
#[pymethod]
|
||||
fn bidirectional(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
let bidi = match self.extract_char(character, vm)? {
|
||||
Some(c) => BidiClass::of(c).abbr_name(),
|
||||
None => "",
|
||||
};
|
||||
Ok(bidi.to_owned())
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn normalize(
|
||||
&self,
|
||||
form: PyStrRef,
|
||||
unistr: PyStrRef,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<String> {
|
||||
let text = unistr.as_str();
|
||||
let normalized_text = match form.as_str() {
|
||||
"NFC" => text.nfc().collect::<String>(),
|
||||
"NFKC" => text.nfkc().collect::<String>(),
|
||||
"NFD" => text.nfd().collect::<String>(),
|
||||
"NFKD" => text.nfkd().collect::<String>(),
|
||||
_ => return Err(vm.new_value_error("invalid normalization form".to_owned())),
|
||||
};
|
||||
|
||||
Ok(normalized_text)
|
||||
}
|
||||
|
||||
#[pyproperty]
|
||||
fn unidata_version(&self) -> String {
|
||||
self.unic_version.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn bidirectional(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
let bidi = match self.extract_char(character, vm)? {
|
||||
Some(c) => BidiClass::of(c).abbr_name(),
|
||||
None => "",
|
||||
};
|
||||
Ok(bidi.to_owned())
|
||||
#[pyattr]
|
||||
fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef<Ucd> {
|
||||
Ucd {
|
||||
unic_version: UnicodeVersion {
|
||||
major: 3,
|
||||
minor: 2,
|
||||
micro: 0,
|
||||
},
|
||||
}
|
||||
.into_ref(vm)
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn normalize(&self, form: PyStrRef, unistr: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
|
||||
let text = unistr.as_str();
|
||||
let normalized_text = match form.as_str() {
|
||||
"NFC" => text.nfc().collect::<String>(),
|
||||
"NFKC" => text.nfkc().collect::<String>(),
|
||||
"NFD" => text.nfd().collect::<String>(),
|
||||
"NFKD" => text.nfkd().collect::<String>(),
|
||||
_ => return Err(vm.new_value_error("invalid normalization form".to_owned())),
|
||||
};
|
||||
|
||||
Ok(normalized_text)
|
||||
}
|
||||
|
||||
#[pyproperty]
|
||||
fn unidata_version(&self) -> String {
|
||||
self.unic_version.to_string()
|
||||
#[pyattr]
|
||||
fn unidata_version(_vm: &VirtualMachine) -> String {
|
||||
UNICODE_VERSION.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::{ItemProtocol, PyClassImpl, PyObjectRef, PyResult, VirtualMachine};
|
||||
use crate::{
|
||||
function::IntoPyObject, ItemProtocol, PyClassImpl, PyObjectRef, PyResult, VirtualMachine,
|
||||
};
|
||||
|
||||
pub(crate) use sys::{MAXSIZE, MULTIARCH};
|
||||
|
||||
@@ -690,6 +692,7 @@ pub(crate) fn init_module(vm: &VirtualMachine, module: &PyObjectRef, builtins: &
|
||||
modules.set_item("sys", module.clone(), vm).unwrap();
|
||||
modules.set_item("builtins", builtins.clone(), vm).unwrap();
|
||||
extend_module!(vm, module, {
|
||||
"__doc__" => sys::DOC.to_owned().into_pyobject(vm),
|
||||
"modules" => modules,
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user