diff --git a/stdlib/src/csv.rs b/stdlib/src/csv.rs index c67828bb3..37bbb5781 100644 --- a/stdlib/src/csv.rs +++ b/stdlib/src/csv.rs @@ -1,317 +1,327 @@ -use crate::common::lock::PyMutex; -use crate::vm::{ - builtins::{PyStr, PyStrRef}, - function::{ArgIterable, ArgumentError, FromArgs, FuncArgs}, - match_class, named_function, - protocol::{PyIter, PyIterReturn}, - py_module, - slots::{IteratorIterable, SlotIterator}, - types::create_simple_type, - PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, - VirtualMachine, -}; -use itertools::{self, Itertools}; -use std::fmt; +use crate::vm::{PyClassImpl, PyObjectRef, VirtualMachine}; -#[repr(i32)] -pub enum QuoteStyle { - Minimal = 0, - All = 1, - Nonnumeric = 2, - None = 3, +pub(crate) fn make_module(vm: &VirtualMachine) -> PyObjectRef { + let ctx = &vm.ctx; + _csv::Reader::make_class(ctx); + _csv::Writer::make_class(ctx); + _csv::make_module(vm) } -struct FormatOptions { - delimiter: u8, - quotechar: u8, -} +#[pymodule] +mod _csv { + use crate::common::lock::PyMutex; + use crate::vm::{ + builtins::{PyStr, PyStrRef, PyTypeRef}, + function::{ArgIterable, ArgumentError, FromArgs, FuncArgs}, + match_class, + protocol::{PyIter, PyIterReturn}, + slots::{IteratorIterable, SlotIterator}, + types::create_simple_type, + PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, VirtualMachine, + }; + use itertools::{self, Itertools}; + use std::fmt; -impl FromArgs for FormatOptions { - fn from_args(vm: &VirtualMachine, args: &mut FuncArgs) -> Result { - let delimiter = if let Some(delimiter) = args.kwargs.remove("delimiter") { - PyStrRef::try_from_object(vm, delimiter)? - .as_str() - .bytes() - .exactly_one() - .map_err(|_| { - let msg = r#""delimiter" must be a 1-character string"#; - vm.new_type_error(msg.to_owned()) - })? - } else { - b',' - }; + #[pyattr] + const QUOTE_MINIMAL: i32 = QuoteStyle::Minimal as i32; + #[pyattr] + const QUOTE_ALL: i32 = QuoteStyle::All as i32; + #[pyattr] + const QUOTE_NONNUMERIC: i32 = QuoteStyle::Nonnumeric as i32; + #[pyattr] + const QUOTE_NONE: i32 = QuoteStyle::None as i32; - let quotechar = if let Some(quotechar) = args.kwargs.remove("quotechar") { - PyStrRef::try_from_object(vm, quotechar)? - .as_str() - .bytes() - .exactly_one() - .map_err(|_| { - let msg = r#""quotechar" must be a 1-character string"#; - vm.new_type_error(msg.to_owned()) - })? - } else { - b'"' - }; + #[pyattr(name = "Error")] + fn error(vm: &VirtualMachine) -> PyTypeRef { + create_simple_type("Error", &vm.ctx.exceptions.exception_type) + } - Ok(FormatOptions { - delimiter, - quotechar, + #[pyfunction] + fn reader( + iter: PyIter, + options: FormatOptions, + // TODO: handle quote style, etc + _rest: FuncArgs, + _vm: &VirtualMachine, + ) -> PyResult { + Ok(Reader { + iter, + state: PyMutex::new(ReadState { + buffer: vec![0; 1024], + output_ends: vec![0; 16], + reader: options.to_reader(), + }), }) } -} -impl FormatOptions { - fn to_reader(&self) -> csv_core::Reader { - csv_core::ReaderBuilder::new() - .delimiter(self.delimiter) - .quote(self.quotechar) - .terminator(csv_core::Terminator::CRLF) - .build() - } - fn to_writer(&self) -> csv_core::Writer { - csv_core::WriterBuilder::new() - .delimiter(self.delimiter) - .quote(self.quotechar) - .terminator(csv_core::Terminator::CRLF) - .build() - } -} - -struct ReadState { - buffer: Vec, - output_ends: Vec, - reader: csv_core::Reader, -} - -#[pyclass(module = "_csv", name = "reader")] -#[derive(PyValue)] -struct Reader { - iter: PyIter, - state: PyMutex, -} - -impl fmt::Debug for Reader { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "_csv.reader") - } -} - -#[pyimpl(with(SlotIterator))] -impl Reader {} -impl IteratorIterable for Reader {} -impl SlotIterator for Reader { - fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { - let string = match zelf.iter.next(vm)? { - PyIterReturn::Return(obj) => obj, - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), + #[pyfunction] + fn writer( + file: PyObjectRef, + options: FormatOptions, + // TODO: handle quote style, etc + _rest: FuncArgs, + vm: &VirtualMachine, + ) -> PyResult { + let write = match vm.get_attribute_opt(file.clone(), "write")? { + Some(write_meth) => write_meth, + None if vm.is_callable(&file) => file, + None => { + return Err(vm.new_type_error("argument 1 must have a \"write\" method".to_owned())) + } }; - let string = string.downcast::().map_err(|obj| { - vm.new_type_error(format!( + + Ok(Writer { + write, + state: PyMutex::new(WriteState { + buffer: vec![0; 1024], + writer: options.to_writer(), + }), + }) + } + + #[inline] + fn resize_buf(buf: &mut Vec) { + let new_size = buf.len() * 2; + buf.resize(new_size, T::zero()); + } + + #[repr(i32)] + pub enum QuoteStyle { + Minimal = 0, + All = 1, + Nonnumeric = 2, + None = 3, + } + + struct FormatOptions { + delimiter: u8, + quotechar: u8, + } + + impl FromArgs for FormatOptions { + fn from_args(vm: &VirtualMachine, args: &mut FuncArgs) -> Result { + let delimiter = if let Some(delimiter) = args.kwargs.remove("delimiter") { + PyStrRef::try_from_object(vm, delimiter)? + .as_str() + .bytes() + .exactly_one() + .map_err(|_| { + let msg = r#""delimiter" must be a 1-character string"#; + vm.new_type_error(msg.to_owned()) + })? + } else { + b',' + }; + + let quotechar = if let Some(quotechar) = args.kwargs.remove("quotechar") { + PyStrRef::try_from_object(vm, quotechar)? + .as_str() + .bytes() + .exactly_one() + .map_err(|_| { + let msg = r#""quotechar" must be a 1-character string"#; + vm.new_type_error(msg.to_owned()) + })? + } else { + b'"' + }; + + Ok(FormatOptions { + delimiter, + quotechar, + }) + } + } + + impl FormatOptions { + fn to_reader(&self) -> csv_core::Reader { + csv_core::ReaderBuilder::new() + .delimiter(self.delimiter) + .quote(self.quotechar) + .terminator(csv_core::Terminator::CRLF) + .build() + } + fn to_writer(&self) -> csv_core::Writer { + csv_core::WriterBuilder::new() + .delimiter(self.delimiter) + .quote(self.quotechar) + .terminator(csv_core::Terminator::CRLF) + .build() + } + } + + struct ReadState { + buffer: Vec, + output_ends: Vec, + reader: csv_core::Reader, + } + + #[pyclass(noattr, module = "_csv", name = "reader")] + #[derive(PyValue)] + pub(super) struct Reader { + iter: PyIter, + state: PyMutex, + } + + impl fmt::Debug for Reader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "_csv.reader") + } + } + + #[pyimpl(with(SlotIterator))] + impl Reader {} + impl IteratorIterable for Reader {} + impl SlotIterator for Reader { + fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { + let string = match zelf.iter.next(vm)? { + PyIterReturn::Return(obj) => obj, + PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), + }; + let string = string.downcast::().map_err(|obj| { + vm.new_type_error(format!( "iterator should return strings, not {} (the file should be opened in text mode)", obj.class().name() )) - })?; - let input = string.as_str().as_bytes(); + })?; + let input = string.as_str().as_bytes(); - let mut state = zelf.state.lock(); - let ReadState { - buffer, - output_ends, - reader, - } = &mut *state; - - let mut input_offset = 0; - let mut output_offset = 0; - let mut output_ends_offset = 0; - - loop { - let (res, nread, nwritten, nends) = reader.read_record( - &input[input_offset..], - &mut buffer[output_offset..], - &mut output_ends[output_ends_offset..], - ); - input_offset += nread; - output_offset += nwritten; - output_ends_offset += nends; - match res { - csv_core::ReadRecordResult::InputEmpty => {} - csv_core::ReadRecordResult::OutputFull => resize_buf(buffer), - csv_core::ReadRecordResult::OutputEndsFull => resize_buf(output_ends), - csv_core::ReadRecordResult::Record => break, - csv_core::ReadRecordResult::End => return Ok(PyIterReturn::StopIteration(None)), - } - } - let rest = &input[input_offset..]; - if !rest.iter().all(|&c| matches!(c, b'\r' | b'\n')) { - return Err(vm.new_value_error( - "new-line character seen in unquoted field - \ - do you need to open the file in universal-newline mode?" - .to_owned(), - )); - } - - let mut prev_end = 0; - let out = output_ends[..output_ends_offset] - .iter() - .map(|&end| { - let range = prev_end..end; - prev_end = end; - let s = std::str::from_utf8(&buffer[range]) - // not sure if this is possible - the input was all strings - .map_err(|_e| vm.new_unicode_decode_error("csv not utf8".to_owned()))?; - Ok(vm.ctx.new_str(s).into()) - }) - .collect::>()?; - Ok(PyIterReturn::Return(vm.ctx.new_list(out).into())) - } -} - -fn _csv_reader( - iter: PyIter, - options: FormatOptions, - // TODO: handle quote style, etc - _rest: FuncArgs, - _vm: &VirtualMachine, -) -> PyResult { - Ok(Reader { - iter, - state: PyMutex::new(ReadState { - buffer: vec![0; 1024], - output_ends: vec![0; 16], - reader: options.to_reader(), - }), - }) -} - -struct WriteState { - buffer: Vec, - writer: csv_core::Writer, -} - -#[pyclass(module = "_csv", name = "writer")] -#[derive(PyValue)] -struct Writer { - write: PyObjectRef, - state: PyMutex, -} - -impl fmt::Debug for Writer { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "_csv.writer") - } -} - -#[pyimpl] -impl Writer { - #[pymethod] - fn writerow(&self, row: PyObjectRef, vm: &VirtualMachine) -> PyResult { - let mut state = self.state.lock(); - let WriteState { buffer, writer } = &mut *state; - - let mut buffer_offset = 0; - - macro_rules! handle_res { - ($x:expr) => {{ - let (res, nwritten) = $x; - buffer_offset += nwritten; - match res { - csv_core::WriteResult::InputEmpty => break, - csv_core::WriteResult::OutputFull => resize_buf(buffer), - } - }}; - } - - let row = ArgIterable::try_from_object(vm, row)?; - for field in row.iter(vm)? { - let field: PyObjectRef = field?; - let stringified; - let data: &[u8] = match_class!(match field { - ref s @ PyStr => s.as_str().as_bytes(), - crate::builtins::PyNone => b"", - ref obj => { - stringified = vm.to_str(obj)?; - stringified.as_str().as_bytes() - } - }); + let mut state = zelf.state.lock(); + let ReadState { + buffer, + output_ends, + reader, + } = &mut *state; let mut input_offset = 0; + let mut output_offset = 0; + let mut output_ends_offset = 0; loop { - let (res, nread, nwritten) = - writer.field(&data[input_offset..], &mut buffer[buffer_offset..]); + let (res, nread, nwritten, nends) = reader.read_record( + &input[input_offset..], + &mut buffer[output_offset..], + &mut output_ends[output_ends_offset..], + ); input_offset += nread; - handle_res!((res, nwritten)); + output_offset += nwritten; + output_ends_offset += nends; + match res { + csv_core::ReadRecordResult::InputEmpty => {} + csv_core::ReadRecordResult::OutputFull => resize_buf(buffer), + csv_core::ReadRecordResult::OutputEndsFull => resize_buf(output_ends), + csv_core::ReadRecordResult::Record => break, + csv_core::ReadRecordResult::End => { + return Ok(PyIterReturn::StopIteration(None)) + } + } + } + let rest = &input[input_offset..]; + if !rest.iter().all(|&c| matches!(c, b'\r' | b'\n')) { + return Err(vm.new_value_error( + "new-line character seen in unquoted field - \ + do you need to open the file in universal-newline mode?" + .to_owned(), + )); + } + + let mut prev_end = 0; + let out = output_ends[..output_ends_offset] + .iter() + .map(|&end| { + let range = prev_end..end; + prev_end = end; + let s = std::str::from_utf8(&buffer[range]) + // not sure if this is possible - the input was all strings + .map_err(|_e| vm.new_unicode_decode_error("csv not utf8".to_owned()))?; + Ok(vm.ctx.new_str(s).into()) + }) + .collect::>()?; + Ok(PyIterReturn::Return(vm.ctx.new_list(out).into())) + } + } + + struct WriteState { + buffer: Vec, + writer: csv_core::Writer, + } + + #[pyclass(noattr, module = "_csv", name = "writer")] + #[derive(PyValue)] + pub(super) struct Writer { + write: PyObjectRef, + state: PyMutex, + } + + impl fmt::Debug for Writer { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "_csv.writer") + } + } + + #[pyimpl] + impl Writer { + #[pymethod] + fn writerow(&self, row: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let mut state = self.state.lock(); + let WriteState { buffer, writer } = &mut *state; + + let mut buffer_offset = 0; + + macro_rules! handle_res { + ($x:expr) => {{ + let (res, nwritten) = $x; + buffer_offset += nwritten; + match res { + csv_core::WriteResult::InputEmpty => break, + csv_core::WriteResult::OutputFull => resize_buf(buffer), + } + }}; + } + + let row = ArgIterable::try_from_object(vm, row)?; + for field in row.iter(vm)? { + let field: PyObjectRef = field?; + let stringified; + let data: &[u8] = match_class!(match field { + ref s @ PyStr => s.as_str().as_bytes(), + crate::builtins::PyNone => b"", + ref obj => { + stringified = vm.to_str(obj)?; + stringified.as_str().as_bytes() + } + }); + + let mut input_offset = 0; + + loop { + let (res, nread, nwritten) = + writer.field(&data[input_offset..], &mut buffer[buffer_offset..]); + input_offset += nread; + handle_res!((res, nwritten)); + } + + loop { + handle_res!(writer.delimiter(&mut buffer[buffer_offset..])); + } } loop { - handle_res!(writer.delimiter(&mut buffer[buffer_offset..])); + handle_res!(writer.terminator(&mut buffer[buffer_offset..])); } + + let s = std::str::from_utf8(&buffer[..buffer_offset]) + .map_err(|_| vm.new_unicode_decode_error("csv not utf8".to_owned()))?; + + vm.invoke(&self.write, (s.to_owned(),)) } - loop { - handle_res!(writer.terminator(&mut buffer[buffer_offset..])); + #[pymethod] + fn writerows(&self, rows: ArgIterable, vm: &VirtualMachine) -> PyResult<()> { + for row in rows.iter(vm)? { + self.writerow(row?, vm)?; + } + Ok(()) } - - let s = std::str::from_utf8(&buffer[..buffer_offset]) - .map_err(|_| vm.new_unicode_decode_error("csv not utf8".to_owned()))?; - - vm.invoke(&self.write, (s.to_owned(),)) - } - - #[pymethod] - fn writerows(&self, rows: ArgIterable, vm: &VirtualMachine) -> PyResult<()> { - for row in rows.iter(vm)? { - self.writerow(row?, vm)?; - } - Ok(()) } } - -fn _csv_writer( - file: PyObjectRef, - options: FormatOptions, - // TODO: handle quote style, etc - _rest: FuncArgs, - vm: &VirtualMachine, -) -> PyResult { - let write = match vm.get_attribute_opt(file.clone(), "write")? { - Some(write_meth) => write_meth, - None if vm.is_callable(&file) => file, - None => return Err(vm.new_type_error("argument 1 must have a \"write\" method".to_owned())), - }; - - Ok(Writer { - write, - state: PyMutex::new(WriteState { - buffer: vec![0; 1024], - writer: options.to_writer(), - }), - }) -} - -#[inline] -fn resize_buf(buf: &mut Vec) { - let new_size = buf.len() * 2; - buf.resize(new_size, T::zero()); -} - -pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { - let ctx = &vm.ctx; - - Reader::make_class(ctx); - Writer::make_class(ctx); - - let error = create_simple_type("Error", &ctx.exceptions.exception_type); - - py_module!(vm, "_csv", { - "reader" => named_function!(ctx, _csv, reader), - "writer" => named_function!(ctx, _csv, writer), - "Error" => error, - // constants - "QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::Minimal as i32), - "QUOTE_ALL" => ctx.new_int(QuoteStyle::All as i32), - "QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::Nonnumeric as i32), - "QUOTE_NONE" => ctx.new_int(QuoteStyle::None as i32), - }) -} diff --git a/stdlib/src/unicodedata.rs b/stdlib/src/unicodedata.rs index 3a0b60a49..ad3604c97 100644 --- a/stdlib/src/unicodedata.rs +++ b/stdlib/src/unicodedata.rs @@ -1,156 +1,160 @@ /* Access to the unicode database. See also: https://docs.python.org/3/library/unicodedata.html */ - -use crate::vm::{ - builtins::PyStrRef, extend_module, function::OptionalArg, py_module, PyClassImpl, PyObject, - PyObjectRef, PyResult, PyValue, VirtualMachine, -}; -use itertools::Itertools; -use unic_char_property::EnumeratedCharProperty; -use unic_normal::StrNormalForm; -use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION}; -use unic_ucd_bidi::BidiClass; -use unic_ucd_category::GeneralCategory; +use crate::vm::{PyObjectRef, PyValue, VirtualMachine}; pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { - let ctx = &vm.ctx; + let module = unicodedata::make_module(vm); - let ucd_class = PyUCD::make_class(ctx); - - let ucd = PyObject::new(PyUCD::default(), ucd_class.clone(), None); - - let ucd_3_2_0 = PyObject::new( - PyUCD { - unic_version: UnicodeVersion { - major: 3, - minor: 2, - micro: 0, - }, - }, - ucd_class.clone(), - None, - ); - - let module = py_module!(vm, "unicodedata", { - "UCD" => ucd_class, - "ucd_3_2_0" => ucd_3_2_0, - // we do unidata_version here because the getter tries to do PyUCD::class() before - // the module is in the VM - "unidata_version" => ctx.new_str(PyUCD::default().unic_version.to_string()), - }); + let ucd = unicodedata::Ucd::new(unic_ucd_age::UNICODE_VERSION).into_ref(vm); for attr in ["category", "lookup", "name", "bidirectional", "normalize"] .iter() .copied() { - extend_module!(vm, &module, { - attr => vm.get_attribute(ucd.clone(), attr).unwrap(), + crate::vm::extend_module!(vm, &module, { + attr => vm.get_attribute(ucd.clone().into(), attr).unwrap(), }); } module } -#[pyclass(module = "unicodedata", name = "UCD")] -#[derive(Debug, PyValue)] -struct PyUCD { - unic_version: UnicodeVersion, -} +#[pymodule] +mod unicodedata { + use crate::vm::{ + builtins::PyStrRef, function::OptionalArg, PyObjectRef, PyRef, PyResult, PyValue, + VirtualMachine, + }; + use itertools::Itertools; + use unic_char_property::EnumeratedCharProperty; + use unic_normal::StrNormalForm; + use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION}; + use unic_ucd_bidi::BidiClass; + use unic_ucd_category::GeneralCategory; -impl Default for PyUCD { - #[inline(always)] - fn default() -> Self { - PyUCD { - unic_version: UNICODE_VERSION, + #[pyattr] + #[pyclass(name = "UCD")] + #[derive(Debug, PyValue)] + pub(super) struct Ucd { + unic_version: UnicodeVersion, + } + + impl Ucd { + pub fn new(unic_version: UnicodeVersion) -> Self { + Self { unic_version } } - } -} -#[pyimpl] -impl PyUCD { - fn check_age(&self, c: char) -> bool { - Age::of(c).map_or(false, |age| age.actual() <= self.unic_version) - } - - fn extract_char(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult> { - let c = character.as_str().chars().exactly_one().map_err(|_| { - vm.new_type_error("argument must be an unicode character, not str".to_owned()) - })?; - - if self.check_age(c) { - Ok(Some(c)) - } else { - Ok(None) + fn check_age(&self, c: char) -> bool { + Age::of(c).map_or(false, |age| age.actual() <= self.unic_version) } - } - #[pymethod] - fn category(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult { - Ok(self - .extract_char(character, vm)? - .map_or(GeneralCategory::Unassigned, GeneralCategory::of) - .abbr_name() - .to_owned()) - } + fn extract_char(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult> { + let c = character.as_str().chars().exactly_one().map_err(|_| { + vm.new_type_error("argument must be an unicode character, not str".to_owned()) + })?; - #[pymethod] - fn lookup(&self, name: PyStrRef, vm: &VirtualMachine) -> PyResult { - if let Some(character) = unicode_names2::character(name.as_str()) { - if self.check_age(character) { - return Ok(character.to_string()); + if self.check_age(c) { + Ok(Some(c)) + } else { + Ok(None) } } - Err(vm.new_lookup_error(format!("undefined character name '{}'", name))) } - #[pymethod] - fn name( - &self, - character: PyStrRef, - default: OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - let c = self.extract_char(character, vm)?; + #[pyimpl] + impl Ucd { + #[pymethod] + fn category(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult { + Ok(self + .extract_char(character, vm)? + .map_or(GeneralCategory::Unassigned, GeneralCategory::of) + .abbr_name() + .to_owned()) + } - if let Some(c) = c { - if self.check_age(c) { - if let Some(name) = unicode_names2::name(c) { - return Ok(vm.ctx.new_str(name.to_string()).into()); + #[pymethod] + fn lookup(&self, name: PyStrRef, vm: &VirtualMachine) -> PyResult { + if let Some(character) = unicode_names2::character(name.as_str()) { + if self.check_age(character) { + return Ok(character.to_string()); + } + } + Err(vm.new_lookup_error(format!("undefined character name '{}'", name))) + } + + #[pymethod] + fn name( + &self, + character: PyStrRef, + default: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + let c = self.extract_char(character, vm)?; + + if let Some(c) = c { + if self.check_age(c) { + if let Some(name) = unicode_names2::name(c) { + return Ok(vm.ctx.new_str(name.to_string()).into()); + } + } + } + match default { + OptionalArg::Present(obj) => Ok(obj), + OptionalArg::Missing => { + Err(vm.new_value_error("character name not found!".to_owned())) } } } - match default { - OptionalArg::Present(obj) => Ok(obj), - OptionalArg::Missing => Err(vm.new_value_error("character name not found!".to_owned())), + + #[pymethod] + fn bidirectional(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult { + let bidi = match self.extract_char(character, vm)? { + Some(c) => BidiClass::of(c).abbr_name(), + None => "", + }; + Ok(bidi.to_owned()) + } + + #[pymethod] + fn normalize( + &self, + form: PyStrRef, + unistr: PyStrRef, + vm: &VirtualMachine, + ) -> PyResult { + let text = unistr.as_str(); + let normalized_text = match form.as_str() { + "NFC" => text.nfc().collect::(), + "NFKC" => text.nfkc().collect::(), + "NFD" => text.nfd().collect::(), + "NFKD" => text.nfkd().collect::(), + _ => return Err(vm.new_value_error("invalid normalization form".to_owned())), + }; + + Ok(normalized_text) + } + + #[pyproperty] + fn unidata_version(&self) -> String { + self.unic_version.to_string() } } - #[pymethod] - fn bidirectional(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult { - let bidi = match self.extract_char(character, vm)? { - Some(c) => BidiClass::of(c).abbr_name(), - None => "", - }; - Ok(bidi.to_owned()) + #[pyattr] + fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef { + Ucd { + unic_version: UnicodeVersion { + major: 3, + minor: 2, + micro: 0, + }, + } + .into_ref(vm) } - #[pymethod] - fn normalize(&self, form: PyStrRef, unistr: PyStrRef, vm: &VirtualMachine) -> PyResult { - let text = unistr.as_str(); - let normalized_text = match form.as_str() { - "NFC" => text.nfc().collect::(), - "NFKC" => text.nfkc().collect::(), - "NFD" => text.nfd().collect::(), - "NFKD" => text.nfkd().collect::(), - _ => return Err(vm.new_value_error("invalid normalization form".to_owned())), - }; - - Ok(normalized_text) - } - - #[pyproperty] - fn unidata_version(&self) -> String { - self.unic_version.to_string() + #[pyattr] + fn unidata_version(_vm: &VirtualMachine) -> String { + UNICODE_VERSION.to_string() } } diff --git a/vm/src/stdlib/sys.rs b/vm/src/stdlib/sys.rs index 9ed01f0cd..9521805d4 100644 --- a/vm/src/stdlib/sys.rs +++ b/vm/src/stdlib/sys.rs @@ -1,4 +1,6 @@ -use crate::{ItemProtocol, PyClassImpl, PyObjectRef, PyResult, VirtualMachine}; +use crate::{ + function::IntoPyObject, ItemProtocol, PyClassImpl, PyObjectRef, PyResult, VirtualMachine, +}; pub(crate) use sys::{MAXSIZE, MULTIARCH}; @@ -690,6 +692,7 @@ pub(crate) fn init_module(vm: &VirtualMachine, module: &PyObjectRef, builtins: & modules.set_item("sys", module.clone(), vm).unwrap(); modules.set_item("builtins", builtins.clone(), vm).unwrap(); extend_module!(vm, module, { + "__doc__" => sys::DOC.to_owned().into_pyobject(vm), "modules" => modules, }); }