Merge pull request #5443 from coolreader18/incremental-newline-decoder

Implement IncrementalNewlineDecoder in rust
This commit is contained in:
Noa
2024-12-03 16:28:00 -06:00
committed by GitHub
16 changed files with 324 additions and 122 deletions

11
Lib/io.py vendored
View File

@@ -57,9 +57,7 @@ import abc
from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation,
open, open_code, FileIO, BytesIO, StringIO, BufferedReader,
BufferedWriter, BufferedRWPair, BufferedRandom,
# XXX RUSTPYTHON TODO: IncrementalNewlineDecoder
# IncrementalNewlineDecoder,
text_encoding, TextIOWrapper)
IncrementalNewlineDecoder, text_encoding, TextIOWrapper)
# Pretend this exception was created here.
UnsupportedOperation.__module__ = "io"
@@ -100,10 +98,3 @@ except ImportError:
pass
else:
RawIOBase.register(_WindowsConsoleIO)
# XXX: RUSTPYTHON; borrow IncrementalNewlineDecoder from _pyio
try:
from _pyio import IncrementalNewlineDecoder
except ImportError:
pass

View File

@@ -186,7 +186,6 @@ just fits in two lineS yup!!
the end"""
class TestSFpatches(unittest.TestCase):
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_html_diff(self):
# Check SF patch 914575 for generating HTML differences
f1a = ((patch914575_from1 + '123\n'*10)*3)

View File

@@ -280,8 +280,6 @@ class FileInputTests(BaseTests, unittest.TestCase):
fi.readline()
self.assertTrue(custom_open_hook.invoked, "openhook not invoked")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_readline(self):
with open(TESTFN, 'wb') as f:
f.write(b'A\nB\r\nC\r')

View File

@@ -84,7 +84,6 @@ class ImportTests(unittest.TestCase):
with self.assertRaises(SyntaxError):
imp.find_module('badsyntax_pep3120', path)
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_issue1267(self):
for mod, encoding, _ in self.test_strings:
fp, filename, info = imp.find_module('module_' + mod,

View File

@@ -701,8 +701,6 @@ class SourceLoaderTestHarness:
class SourceOnlyLoaderTests(SourceLoaderTestHarness):
"""Test importlib.abc.SourceLoader for source-only loading."""
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_get_source(self):
# Verify the source code is returned as a string.
# If an OSError is raised by get_data then raise ImportError.
@@ -761,8 +759,6 @@ class SourceOnlyLoaderTests(SourceLoaderTestHarness):
self.verify_module(module)
self.assertFalse(hasattr(module, '__path__'))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_get_source_encoding(self):
# Source is considered encoded in UTF-8 by default unless otherwise
# specified by an encoding line.
@@ -882,8 +878,6 @@ class SourceLoaderGetSourceTests:
"""Tests for importlib.abc.SourceLoader.get_source()."""
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_default_encoding(self):
# Should have no problems with UTF-8 text.
name = 'mod'
@@ -893,8 +887,6 @@ class SourceLoaderGetSourceTests:
returned_source = mock.get_source(name)
self.assertEqual(returned_source, source)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decoded_source(self):
# Decoding should work.
name = 'mod'
@@ -905,8 +897,6 @@ class SourceLoaderGetSourceTests:
returned_source = mock.get_source(name)
self.assertEqual(returned_source, source)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_universal_newlines(self):
# PEP 302 says universal newlines should be used.
name = 'mod'

View File

@@ -36,22 +36,16 @@ class DecodeSourceBytesTests:
source = "string ='ü'"
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_ut8_default(self):
source_bytes = self.source.encode('utf-8')
self.assertEqual(self.util.decode_source(source_bytes), self.source)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_specified_encoding(self):
source = '# coding=latin-1\n' + self.source
source_bytes = source.encode('latin-1')
assert source_bytes != source.encode('utf-8')
self.assertEqual(self.util.decode_source(source_bytes), source)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_universal_newlines(self):
source = '\r\n'.join([self.source, self.source])
source_bytes = source.encode('utf-8')

40
Lib/test/test_io.py vendored
View File

@@ -4121,41 +4121,6 @@ class CTextIOWrapperTest(TextIOWrapperTest):
def test_newlines_input(self):
super().test_newlines_input()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_read_one_by_one(self):
super().test_read_one_by_one()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_read_by_chunk(self):
super().test_read_by_chunk()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue1395_1(self):
super().test_issue1395_1()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue1395_2(self):
super().test_issue1395_2()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue1395_3(self):
super().test_issue1395_3()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue1395_4(self):
super().test_issue1395_4()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue1395_5(self):
super().test_issue1395_5()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_reconfigure_write_through(self):
@@ -4812,11 +4777,6 @@ class CMiscIOTest(MiscIOTest):
name_of_module = "io", "_io"
extra_exported = "BlockingIOError",
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test___all__(self):
super().test___all__()
def test_readinto_buffer_overflow(self):
# Issue #18025
class BadReader(self.io.BufferedIOBase):

View File

@@ -187,8 +187,6 @@ class LineCacheTests(unittest.TestCase):
self.assertEqual(False, linecache.lazycache(FILENAME, None))
self.assertEqual(lines, linecache.getlines(FILENAME))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_lazycache_smoke(self):
lines = linecache.getlines(NONEXISTENT_FILENAME, globals())
linecache.clearcache()
@@ -199,8 +197,6 @@ class LineCacheTests(unittest.TestCase):
# globals: this would error if the lazy value wasn't resolved.
self.assertEqual(lines, linecache.getlines(NONEXISTENT_FILENAME))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_lazycache_provide_after_failed_lookup(self):
linecache.clearcache()
lines = linecache.getlines(NONEXISTENT_FILENAME, globals())
@@ -219,8 +215,6 @@ class LineCacheTests(unittest.TestCase):
self.assertEqual(False, linecache.lazycache('', globals()))
self.assertEqual(False, linecache.lazycache('<foo>', globals()))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_lazycache_already_cached(self):
linecache.clearcache()
lines = linecache.getlines(NONEXISTENT_FILENAME, globals())

View File

@@ -54,12 +54,10 @@ class PopenTest(unittest.TestCase):
else:
self.assertEqual(os.waitstatus_to_exitcode(status), 42)
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_contextmanager(self):
with os.popen("echo hello") as f:
self.assertEqual(f.read(), "hello\n")
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_iterating(self):
with os.popen("echo hello") as f:
self.assertEqual(list(f), ["hello\n"])

View File

@@ -1056,8 +1056,6 @@ class ProcessTestCase(BaseTestCase):
self.assertEqual(stdout, b"bananasplit")
self.assertEqual(stderr, b"")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_universal_newlines_and_text(self):
args = [
sys.executable, "-c",
@@ -1097,7 +1095,6 @@ class ProcessTestCase(BaseTestCase):
self.assertEqual(p.stdout.read(),
"line4\nline5\nline6\nline7\nline8")
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_universal_newlines_communicate(self):
# universal newlines through communicate()
p = subprocess.Popen([sys.executable, "-c",
@@ -1149,7 +1146,6 @@ class ProcessTestCase(BaseTestCase):
p.communicate()
self.assertEqual(p.returncode, 0)
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_universal_newlines_communicate_stdin_stdout_stderr(self):
# universal newlines through communicate(), with stdin, stdout, stderr
p = subprocess.Popen([sys.executable, "-c",
@@ -3805,7 +3801,6 @@ class MiscTests(unittest.TestCase):
raise KeyboardInterrupt # Test how __exit__ handles ^C.
self._test_keyboardinterrupt_no_kill(popen_via_context_manager)
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_getoutput(self):
self.assertEqual(subprocess.getoutput('echo xyzzy'), 'xyzzy')
self.assertEqual(subprocess.getstatusoutput('echo xyzzy'),

View File

@@ -1040,8 +1040,6 @@ class MiscTracebackCases(unittest.TestCase):
class TestFrame(unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_basics(self):
linecache.clearcache()
linecache.lazycache("f", globals())
@@ -1059,8 +1057,6 @@ class TestFrame(unittest.TestCase):
self.assertNotEqual(f, object())
self.assertEqual(f, ALWAYS_EQ)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_lazy_lines(self):
linecache.clearcache()
f = traceback.FrameSummary("f", 1, "dummy", lookup_line=False)
@@ -1109,8 +1105,6 @@ class TestStack(unittest.TestCase):
s = traceback.StackSummary.extract(traceback.walk_stack(None), limit=5)
self.assertEqual(len(s), 5)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_extract_stack_lookup_lines(self):
linecache.clearcache()
linecache.updatecache('/foo.py', globals())
@@ -1120,8 +1114,6 @@ class TestStack(unittest.TestCase):
linecache.clearcache()
self.assertEqual(s[0].line, "import sys")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_extract_stackup_deferred_lookup_lines(self):
linecache.clearcache()
c = test_code('/foo.py', 'method')
@@ -1153,8 +1145,6 @@ class TestStack(unittest.TestCase):
[' File "foo.py", line 1, in fred\n line\n'],
s.format())
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_locals(self):
linecache.updatecache('/foo.py', globals())
c = test_code('/foo.py', 'method')
@@ -1162,8 +1152,6 @@ class TestStack(unittest.TestCase):
s = traceback.StackSummary.extract(iter([(f, 6)]), capture_locals=True)
self.assertEqual(s[0].locals, {'something': '1'})
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_no_locals(self):
linecache.updatecache('/foo.py', globals())
c = test_code('/foo.py', 'method')
@@ -1444,8 +1432,6 @@ class TestTracebackException(unittest.TestCase):
traceback.walk_tb(exc_info[2]), limit=5)
self.assertEqual(expected_stack, exc.stack)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_lookup_lines(self):
linecache.clearcache()
e = Exception("uh oh")
@@ -1457,8 +1443,6 @@ class TestTracebackException(unittest.TestCase):
linecache.updatecache('/foo.py', globals())
self.assertEqual(exc.stack[0].line, "import sys")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_locals(self):
linecache.updatecache('/foo.py', globals())
e = Exception("uh oh")
@@ -1470,8 +1454,6 @@ class TestTracebackException(unittest.TestCase):
self.assertEqual(
exc.stack[0].locals, {'something': '1', 'other': "'string'"})
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_no_locals(self):
linecache.updatecache('/foo.py', globals())
e = Exception("uh oh")

View File

@@ -92,6 +92,11 @@ impl<R: RawMutex, G: GetThreadId, T: Default> Default for ThreadMutex<R, G, T> {
Self::new(T::default())
}
}
impl<R: RawMutex, G: GetThreadId, T> From<T> for ThreadMutex<R, G, T> {
fn from(val: T) -> Self {
Self::new(val)
}
}
impl<R: RawMutex, G: GetThreadId, T: ?Sized> ThreadMutex<R, G, T> {
pub fn lock(&self) -> Option<ThreadMutexGuard<R, G, T>> {
if self.raw.lock() {

View File

@@ -1,5 +1,6 @@
use proc_macro2::TokenStream;
use quote::{quote, ToTokens};
use syn::ext::IdentExt;
use syn::{
parse_quote, Attribute, Data, DeriveInput, Expr, Field, Ident, Lit, Meta, NestedMeta, Result,
};
@@ -138,7 +139,7 @@ fn generate_field((i, field): (usize, &Field)) -> Result<TokenStream> {
};
let name = field.ident.as_ref();
let name_string = name.map(Ident::to_string);
let name_string = name.map(|ident| ident.unraw().to_string());
if matches!(&name_string, Some(s) if s.starts_with("_phantom")) {
return Ok(quote! {
#name: ::std::marker::PhantomData,

View File

@@ -56,11 +56,30 @@ impl IntoPyTuple for Vec<PyObjectRef> {
}
}
macro_rules! impl_into_pyobj_tuple {
($(($T:ident, $idx:tt)),+) => {
pub trait FromPyTuple<'a>: Sized {
fn from_pytuple(tuple: &'a PyTuple, vm: &VirtualMachine) -> PyResult<Self>;
}
macro_rules! impl_from_into_pytuple {
($($T:ident),+) => {
impl<$($T: ToPyObject),*> IntoPyTuple for ($($T,)*) {
fn into_pytuple(self, vm: &VirtualMachine) -> PyTupleRef {
PyTuple::new_ref(vec![$(self.$idx.to_pyobject(vm)),*], &vm.ctx)
#[allow(non_snake_case)]
let ($($T,)*) = self;
PyTuple::new_ref(vec![$($T.to_pyobject(vm)),*], &vm.ctx)
}
}
// TODO: figure out a way to let PyObjectRef implement TryFromBorrowedObject, and
// have this be a TryFromBorrowedObject bound
impl<'a, $($T: TryFromObject),*> FromPyTuple<'a> for ($($T,)*) {
fn from_pytuple(tuple: &'a PyTuple, vm: &VirtualMachine) -> PyResult<Self> {
#[allow(non_snake_case)]
let &[$(ref $T),+] = tuple.as_slice().try_into().map_err(|_| {
vm.new_type_error(format!("expected tuple with {} elements", impl_from_into_pytuple!(@count $($T)+)))
})?;
Ok(($($T::try_from_object(vm, $T.clone())?,)+))
}
}
@@ -70,15 +89,21 @@ macro_rules! impl_into_pyobj_tuple {
}
}
};
(@count $($T:ident)+) => {
0 $(+ impl_from_into_pytuple!(@discard $T))+
};
(@discard $T:ident) => {
1
};
}
impl_into_pyobj_tuple!((A, 0));
impl_into_pyobj_tuple!((A, 0), (B, 1));
impl_into_pyobj_tuple!((A, 0), (B, 1), (C, 2));
impl_into_pyobj_tuple!((A, 0), (B, 1), (C, 2), (D, 3));
impl_into_pyobj_tuple!((A, 0), (B, 1), (C, 2), (D, 3), (E, 4));
impl_into_pyobj_tuple!((A, 0), (B, 1), (C, 2), (D, 3), (E, 4), (F, 5));
impl_into_pyobj_tuple!((A, 0), (B, 1), (C, 2), (D, 3), (E, 4), (F, 5), (G, 6));
impl_from_into_pytuple!(A);
impl_from_into_pytuple!(A, B);
impl_from_into_pytuple!(A, B, C);
impl_from_into_pytuple!(A, B, C, D);
impl_from_into_pytuple!(A, B, C, D, E);
impl_from_into_pytuple!(A, B, C, D, E, F);
impl_from_into_pytuple!(A, B, C, D, E, F, G);
impl PyTuple {
pub(crate) fn fast_getitem(&self, idx: usize) -> PyObjectRef {
@@ -185,6 +210,10 @@ impl PyTuple {
Self { elements }.into_ref(&vm.ctx)
})
}
pub fn extract_tuple<'a, T: FromPyTuple<'a>>(&'a self, vm: &VirtualMachine) -> PyResult<T> {
T::from_pytuple(self, vm)
}
}
#[pyclass(

View File

@@ -120,7 +120,7 @@ mod _io {
use crate::{
builtins::{
PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyMemoryView, PyStr,
PyStrRef, PyType, PyTypeRef,
PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef,
},
class::StaticType,
common::lock::{
@@ -148,6 +148,7 @@ mod _io {
use malachite_bigint::{BigInt, BigUint};
use num_traits::ToPrimitive;
use std::{
borrow::Cow,
io::{self, prelude::*, Cursor, SeekFrom},
ops::Range,
};
@@ -2243,7 +2244,9 @@ mod _io {
let has_read1 = vm.get_attribute_opt(buffer.clone(), "read1")?.is_some();
let seekable = vm.call_method(&buffer, "seekable", ())?.try_to_bool(vm)?;
let (encoder, decoder) = Self::find_coder(&buffer, encoding.as_str(), &errors, vm)?;
let newline = args.newline.unwrap_or_default();
let (encoder, decoder) =
Self::find_coder(&buffer, encoding.as_str(), &errors, newline, vm)?;
*data = Some(TextIOData {
buffer,
@@ -2251,7 +2254,7 @@ mod _io {
decoder,
encoding,
errors,
newline: args.newline.unwrap_or_default(),
newline,
line_buffering: args.line_buffering.unwrap_or_default(),
write_through: args.write_through.unwrap_or_default(),
chunk_size: 8192,
@@ -2291,6 +2294,7 @@ mod _io {
buffer: &PyObject,
encoding: &str,
errors: &Py<PyStr>,
newline: Newlines,
vm: &VirtualMachine,
) -> PyResult<(
Option<(PyObjectRef, Option<EncodeFunc>)>,
@@ -2315,10 +2319,17 @@ mod _io {
};
let decoder = if vm.call_method(buffer, "readable", ())?.try_to_bool(vm)? {
let incremental_decoder =
codec.get_incremental_decoder(Some(errors.to_owned()), vm)?;
// TODO: wrap in IncrementalNewlineDecoder if newlines == Universal | Passthrough
Some(incremental_decoder)
let decoder = codec.get_incremental_decoder(Some(errors.to_owned()), vm)?;
if let Newlines::Universal | Newlines::Passthrough = newline {
let args = IncrementalNewlineDecoderArgs {
decoder,
translate: matches!(newline, Newlines::Universal),
errors: None,
};
Some(IncrementalNewlineDecoder::construct_and_init(args, vm)?.into())
} else {
Some(decoder)
}
} else {
None
};
@@ -2333,8 +2344,13 @@ mod _io {
let mut data = self.data.lock().unwrap();
if let Some(data) = data.as_mut() {
if let Some(encoding) = args.encoding {
let (encoder, decoder) =
Self::find_coder(&data.buffer, encoding.as_str(), &data.errors, vm)?;
let (encoder, decoder) = Self::find_coder(
&data.buffer,
encoding.as_str(),
&data.errors,
data.newline,
vm,
)?;
data.encoding = encoding;
data.encoder = encoder;
data.decoder = decoder;
@@ -2370,6 +2386,25 @@ mod _io {
vm.call_method(&textio.buffer, "writable", ())
}
#[pygetset]
fn line_buffering(&self, vm: &VirtualMachine) -> PyResult<bool> {
Ok(self.lock(vm)?.line_buffering)
}
#[pygetset]
fn write_through(&self, vm: &VirtualMachine) -> PyResult<bool> {
Ok(self.lock(vm)?.write_through)
}
#[pygetset]
fn newlines(&self, vm: &VirtualMachine) -> PyResult<Option<PyObjectRef>> {
let data = self.lock(vm)?;
let Some(decoder) = &data.decoder else {
return Ok(None);
};
vm.get_attribute_opt(decoder.clone(), "newlines")
}
#[pygetset(name = "_CHUNK_SIZE")]
fn chunksize(&self, vm: &VirtualMachine) -> PyResult<usize> {
Ok(self.lock(vm)?.chunk_size)
@@ -3122,6 +3157,229 @@ mod _io {
}
}
#[pyattr]
#[pyclass(name)]
#[derive(Debug, PyPayload, Default)]
struct IncrementalNewlineDecoder {
// TODO: Traverse
data: PyThreadMutex<Option<IncrementalNewlineDecoderData>>,
}
#[derive(Debug)]
struct IncrementalNewlineDecoderData {
decoder: PyObjectRef,
// afaict, this is used for nothing
// errors: PyObjectRef,
pendingcr: bool,
translate: bool,
seennl: SeenNewline,
}
bitflags! {
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
struct SeenNewline: u8 {
const LF = 1;
const CR = 2;
const CRLF = 4;
}
}
impl DefaultConstructor for IncrementalNewlineDecoder {}
#[derive(FromArgs)]
struct IncrementalNewlineDecoderArgs {
#[pyarg(any)]
decoder: PyObjectRef,
#[pyarg(any)]
translate: bool,
#[pyarg(any, default)]
errors: Option<PyObjectRef>,
}
impl Initializer for IncrementalNewlineDecoder {
type Args = IncrementalNewlineDecoderArgs;
fn init(zelf: PyRef<Self>, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> {
let _ = args.errors;
let mut data = zelf.lock_opt(vm)?;
*data = Some(IncrementalNewlineDecoderData {
decoder: args.decoder,
translate: args.translate,
pendingcr: false,
seennl: SeenNewline::empty(),
});
Ok(())
}
}
#[pyclass(with(Constructor, Initializer))]
impl IncrementalNewlineDecoder {
fn lock_opt(
&self,
vm: &VirtualMachine,
) -> PyResult<PyThreadMutexGuard<Option<IncrementalNewlineDecoderData>>> {
self.data
.lock()
.ok_or_else(|| vm.new_runtime_error("reentrant call inside nldecoder".to_owned()))
}
fn lock(
&self,
vm: &VirtualMachine,
) -> PyResult<PyMappedThreadMutexGuard<IncrementalNewlineDecoderData>> {
let lock = self.lock_opt(vm)?;
PyThreadMutexGuard::try_map(lock, |x| x.as_mut()).map_err(|_| {
vm.new_value_error("I/O operation on uninitialized nldecoder".to_owned())
})
}
#[pymethod]
fn decode(&self, args: NewlineDecodeArgs, vm: &VirtualMachine) -> PyResult<PyStrRef> {
self.lock(vm)?.decode(args.input, args.r#final, vm)
}
#[pymethod]
fn getstate(&self, vm: &VirtualMachine) -> PyResult<(PyObjectRef, u64)> {
let data = self.lock(vm)?;
let (buffer, flag) = if vm.is_none(&data.decoder) {
(vm.ctx.new_bytes(vec![]).into(), 0)
} else {
vm.call_method(&data.decoder, "getstate", ())?
.try_to_ref::<PyTuple>(vm)?
.extract_tuple::<(PyObjectRef, u64)>(vm)?
};
let flag = (flag << 1) | (data.pendingcr as u64);
Ok((buffer, flag))
}
#[pymethod]
fn setstate(&self, state: PyTupleRef, vm: &VirtualMachine) -> PyResult<()> {
let mut data = self.lock(vm)?;
let (buffer, flag) = state.extract_tuple::<(PyObjectRef, u64)>(vm)?;
data.pendingcr = flag & 1 != 0;
if !vm.is_none(&data.decoder) {
vm.call_method(&data.decoder, "setstate", ((buffer, flag >> 1),))?;
}
Ok(())
}
#[pymethod]
fn reset(&self, vm: &VirtualMachine) -> PyResult<()> {
let mut data = self.lock(vm)?;
data.seennl = SeenNewline::empty();
data.pendingcr = false;
if !vm.is_none(&data.decoder) {
vm.call_method(&data.decoder, "reset", ())?;
}
Ok(())
}
#[pygetset]
fn newlines(&self, vm: &VirtualMachine) -> PyResult {
let data = self.lock(vm)?;
Ok(match data.seennl.bits() {
1 => "\n".to_pyobject(vm),
2 => "\r".to_pyobject(vm),
3 => ("\r", "\n").to_pyobject(vm),
4 => "\r\n".to_pyobject(vm),
5 => ("\n", "\r\n").to_pyobject(vm),
6 => ("\r", "\r\n").to_pyobject(vm),
7 => ("\r", "\n", "\r\n").to_pyobject(vm),
_ => vm.ctx.none(),
})
}
}
#[derive(FromArgs)]
struct NewlineDecodeArgs {
#[pyarg(any)]
input: PyObjectRef,
#[pyarg(any, default)]
r#final: bool,
}
impl IncrementalNewlineDecoderData {
fn decode(
&mut self,
input: PyObjectRef,
final_: bool,
vm: &VirtualMachine,
) -> PyResult<PyStrRef> {
let output = if vm.is_none(&self.decoder) {
input
} else {
vm.call_method(&self.decoder, "decode", (input, final_))?
};
let orig_output: PyStrRef = output.try_into_value(vm)?;
// this being Cow::Owned means we need to allocate a new string
let mut output = Cow::Borrowed(orig_output.as_str());
if self.pendingcr && (final_ || !output.is_empty()) {
output = ["\r", &*output].concat().into();
self.pendingcr = false;
}
if !final_ {
if let Some(s) = output.strip_suffix('\r') {
output = s.to_owned().into();
self.pendingcr = true;
}
}
if output.is_empty() {
return Ok(vm.ctx.empty_str.to_owned());
}
if (self.seennl == SeenNewline::LF || self.seennl.is_empty()) && !output.contains('\r')
{
if self.seennl.is_empty() && output.contains('\n') {
self.seennl.insert(SeenNewline::LF);
}
} else if !self.translate {
let mut matches = output.match_indices(['\r', '\n']);
while !self.seennl.is_all() {
let Some((i, c)) = matches.next() else { break };
match c {
"\n" => self.seennl.insert(SeenNewline::LF),
// if c isn't \n, it can only be \r
_ if output[i + 1..].starts_with('\n') => {
matches.next();
self.seennl.insert(SeenNewline::CRLF);
}
_ => self.seennl.insert(SeenNewline::CR),
}
}
} else {
let mut chunks = output.match_indices(['\r', '\n']);
let mut new_string = String::with_capacity(output.len());
let mut last_modification_index = 0;
while let Some((cr_index, chunk)) = chunks.next() {
if chunk == "\r" {
// skip copying the CR
let mut next_chunk_index = cr_index + 1;
if output[cr_index + 1..].starts_with('\n') {
chunks.next();
self.seennl.insert(SeenNewline::CRLF);
// skip the LF too
next_chunk_index += 1;
} else {
self.seennl.insert(SeenNewline::CR);
}
new_string.push_str(&output[last_modification_index..cr_index]);
new_string.push('\n');
last_modification_index = next_chunk_index;
} else {
self.seennl.insert(SeenNewline::LF);
}
}
new_string.push_str(&output[last_modification_index..]);
output = new_string.into();
}
Ok(match output {
Cow::Borrowed(_) => orig_output,
Cow::Owned(s) => vm.ctx.new_str(s),
})
}
}
#[pyattr]
#[pyclass(name = "StringIO", base = "_TextIOBase")]
#[derive(Debug, PyPayload)]

View File

@@ -782,7 +782,16 @@ pub trait Constructor: PyPayload {
fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult;
}
pub trait DefaultConstructor: PyPayload + Default {}
pub trait DefaultConstructor: PyPayload + Default {
fn construct_and_init(args: Self::Args, vm: &VirtualMachine) -> PyResult<PyRef<Self>>
where
Self: Initializer,
{
let this = Self::default().into_ref(&vm.ctx);
Self::init(this.clone(), args, vm)?;
Ok(this)
}
}
/// For types that cannot be instantiated through Python code.
#[pyclass]