diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 00fd7f024..c0d007a90 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -12,6 +12,7 @@ baseinfo basetype binop boolop +BUFMAX BUILDSTDLIB bxor byteswap @@ -33,6 +34,8 @@ cmpop codedepth constevaluator CODEUNIT +CONIN +CONOUT CONVFUNC convparam copyslot @@ -105,6 +108,7 @@ metavars miscompiles mult multibytecodec +nameobj nameop nconsts newargs @@ -160,6 +164,7 @@ SETREF setresult setslice SLOTDEFINED +SMALLBUF SOABI SSLEOF stackdepth @@ -174,6 +179,7 @@ subscr sval swappedbytes templatelib +testconsole ticketer tmptype tok_oldval @@ -199,6 +205,7 @@ wbits weakreflist weakrefobject webpki +winconsoleio withitem withs xstat diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index ac57e71c4..4f195cf04 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -508,10 +508,6 @@ class PyAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _pyio.FileIO modulename = '_pyio' - @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON; _blksize mismatch on Windows') - def testBlksize(self): - return super().testBlksize() - class OtherFileTests: diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py new file mode 100644 index 000000000..1bae884ed --- /dev/null +++ b/Lib/test/test_winconsoleio.py @@ -0,0 +1,244 @@ +'''Tests for WindowsConsoleIO +''' + +import io +import os +import sys +import tempfile +import unittest +from test.support import os_helper, requires_resource + +if sys.platform != 'win32': + raise unittest.SkipTest("test only relevant on win32") + +from _testconsole import write_input + +ConIO = io._WindowsConsoleIO + +class WindowsConsoleIOTests(unittest.TestCase): + def test_abc(self): + self.assertIsSubclass(ConIO, io.RawIOBase) + self.assertNotIsSubclass(ConIO, io.BufferedIOBase) + self.assertNotIsSubclass(ConIO, io.TextIOBase) + + def test_open_fd(self): + self.assertRaisesRegex(ValueError, + "negative file descriptor", ConIO, -1) + + with tempfile.TemporaryFile() as tmpfile: + fd = tmpfile.fileno() + # Windows 10: "Cannot open non-console file" + # Earlier: "Cannot open console output buffer for reading" + self.assertRaisesRegex(ValueError, + "Cannot open (console|non-console file)", ConIO, fd) + + try: + f = ConIO(0) + except ValueError: + # cannot open console because it's not a real console + pass + else: + self.assertTrue(f.readable()) + self.assertFalse(f.writable()) + self.assertEqual(0, f.fileno()) + f.close() # multiple close should not crash + f.close() + with self.assertWarns(RuntimeWarning): + with ConIO(False): + pass + + try: + f = ConIO(1, 'w') + except ValueError: + # cannot open console because it's not a real console + pass + else: + self.assertFalse(f.readable()) + self.assertTrue(f.writable()) + self.assertEqual(1, f.fileno()) + f.close() + f.close() + with self.assertWarns(RuntimeWarning): + with ConIO(False): + pass + + try: + f = ConIO(2, 'w') + except ValueError: + # cannot open console because it's not a real console + pass + else: + self.assertFalse(f.readable()) + self.assertTrue(f.writable()) + self.assertEqual(2, f.fileno()) + f.close() + f.close() + + def test_open_name(self): + self.assertRaises(ValueError, ConIO, sys.executable) + + f = ConIO("CON") + self.assertTrue(f.readable()) + self.assertFalse(f.writable()) + self.assertIsNotNone(f.fileno()) + f.close() # multiple close should not crash + f.close() + + f = ConIO('CONIN$') + self.assertTrue(f.readable()) + self.assertFalse(f.writable()) + self.assertIsNotNone(f.fileno()) + f.close() + f.close() + + f = ConIO('CONOUT$', 'w') + self.assertFalse(f.readable()) + self.assertTrue(f.writable()) + self.assertIsNotNone(f.fileno()) + f.close() + f.close() + + # bpo-45354: Windows 11 changed MS-DOS device name handling + if sys.getwindowsversion()[:3] < (10, 0, 22000): + f = open('C:/con', 'rb', buffering=0) + self.assertIsInstance(f, ConIO) + f.close() + + def test_subclass_repr(self): + class TestSubclass(ConIO): + pass + + f = TestSubclass("CON") + with f: + self.assertIn(TestSubclass.__name__, repr(f)) + + self.assertIn(TestSubclass.__name__, repr(f)) + + @unittest.skipIf(sys.getwindowsversion()[:2] <= (6, 1), + "test does not work on Windows 7 and earlier") + def test_conin_conout_names(self): + f = open(r'\\.\conin$', 'rb', buffering=0) + self.assertIsInstance(f, ConIO) + f.close() + + f = open('//?/conout$', 'wb', buffering=0) + self.assertIsInstance(f, ConIO) + f.close() + + def test_conout_path(self): + temp_path = tempfile.mkdtemp() + self.addCleanup(os_helper.rmtree, temp_path) + + conout_path = os.path.join(temp_path, 'CONOUT$') + + with open(conout_path, 'wb', buffering=0) as f: + # bpo-45354: Windows 11 changed MS-DOS device name handling + if (6, 1) < sys.getwindowsversion()[:3] < (10, 0, 22000): + self.assertIsInstance(f, ConIO) + else: + self.assertNotIsInstance(f, ConIO) + + def test_write_empty_data(self): + with ConIO('CONOUT$', 'w') as f: + self.assertEqual(f.write(b''), 0) + + @requires_resource('console') + def test_write(self): + testcases = [] + with ConIO('CONOUT$', 'w') as f: + for a in [ + b'', + b'abc', + b'\xc2\xa7\xe2\x98\x83\xf0\x9f\x90\x8d', + b'\xff'*10, + ]: + for b in b'\xc2\xa7', b'\xe2\x98\x83', b'\xf0\x9f\x90\x8d': + testcases.append(a + b) + for i in range(1, len(b)): + data = a + b[:i] + testcases.append(data + b'z') + testcases.append(data + b'\xff') + # incomplete multibyte sequence + with self.subTest(data=data): + self.assertEqual(f.write(data), len(a)) + for data in testcases: + with self.subTest(data=data): + self.assertEqual(f.write(data), len(data)) + + def assertStdinRoundTrip(self, text): + stdin = open('CONIN$', 'r') + old_stdin = sys.stdin + try: + sys.stdin = stdin + write_input( + stdin.buffer.raw, + (text + '\r\n').encode('utf-16-le', 'surrogatepass') + ) + actual = input() + finally: + sys.stdin = old_stdin + self.assertEqual(actual, text) + + @requires_resource('console') + def test_input(self): + # ASCII + self.assertStdinRoundTrip('abc123') + # Non-ASCII + self.assertStdinRoundTrip('ϼўТλФЙ') + # Combining characters + self.assertStdinRoundTrip('A͏B ﬖ̳AA̝') + + # bpo-38325 + @unittest.skipIf(True, "Handling Non-BMP characters is broken") + def test_input_nonbmp(self): + # Non-BMP + self.assertStdinRoundTrip('\U00100000\U0010ffff\U0010fffd') + + @requires_resource('console') + def test_partial_reads(self): + # Test that reading less than 1 full character works when stdin + # contains multibyte UTF-8 sequences + source = 'ϼўТλФЙ\r\n'.encode('utf-16-le') + expected = 'ϼўТλФЙ\r\n'.encode('utf-8') + for read_count in range(1, 16): + with open('CONIN$', 'rb', buffering=0) as stdin: + write_input(stdin, source) + + actual = b'' + while not actual.endswith(b'\n'): + b = stdin.read(read_count) + actual += b + + self.assertEqual(actual, expected, 'stdin.read({})'.format(read_count)) + + # bpo-38325 + @unittest.skipIf(True, "Handling Non-BMP characters is broken") + def test_partial_surrogate_reads(self): + # Test that reading less than 1 full character works when stdin + # contains surrogate pairs that cannot be decoded to UTF-8 without + # reading an extra character. + source = '\U00101FFF\U00101001\r\n'.encode('utf-16-le') + expected = '\U00101FFF\U00101001\r\n'.encode('utf-8') + for read_count in range(1, 16): + with open('CONIN$', 'rb', buffering=0) as stdin: + write_input(stdin, source) + + actual = b'' + while not actual.endswith(b'\n'): + b = stdin.read(read_count) + actual += b + + self.assertEqual(actual, expected, 'stdin.read({})'.format(read_count)) + + @requires_resource('console') + def test_ctrl_z(self): + with open('CONIN$', 'rb', buffering=0) as stdin: + source = '\xC4\x1A\r\n'.encode('utf-16-le') + expected = '\xC4'.encode('utf-8') + write_input(stdin, source) + a, b = stdin.read(1), stdin.readall() + self.assertEqual(expected[0:1], a) + self.assertEqual(expected[1:], b) + +if __name__ == "__main__": + unittest.main() diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index 3fc8b305a..768df4cb7 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -29,6 +29,8 @@ fn file_closed(file: &PyObject, vm: &VirtualMachine) -> PyResult { file.get_attr("closed", vm)?.try_to_bool(vm) } +const DEFAULT_BUFFER_SIZE: usize = 128 * 1024; + /// iobase_finalize in Modules/_io/iobase.c fn iobase_finalize(zelf: &PyObject, vm: &VirtualMachine) { // If `closed` doesn't exist or can't be evaluated as bool, then the @@ -244,7 +246,7 @@ mod _io { } #[pyattr] - const DEFAULT_BUFFER_SIZE: usize = 8 * 1024; + const DEFAULT_BUFFER_SIZE: usize = super::DEFAULT_BUFFER_SIZE; pub(super) fn seekfrom( vm: &VirtualMachine, @@ -5252,7 +5254,7 @@ mod fileio { closefd: AtomicCell::new(true), mode: AtomicCell::new(Mode::empty()), seekable: AtomicCell::new(None), - blksize: AtomicCell::new(8 * 1024), // DEFAULT_BUFFER_SIZE + blksize: AtomicCell::new(super::DEFAULT_BUFFER_SIZE as _), finalizing: AtomicCell::new(false), } } @@ -5828,7 +5830,10 @@ mod winconsoleio { let Ok(name) = path_or_fd.str(vm) else { return '\0'; }; - let name_str = name.as_str(); + let Some(name_str) = name.to_str() else { + // Surrogate strings can't be console device names + return '\0'; + }; if name_str.eq_ignore_ascii_case("CONIN$") { return 'r'; @@ -5928,7 +5933,7 @@ mod winconsoleio { writable: AtomicCell::new(false), closefd: AtomicCell::new(false), finalizing: AtomicCell::new(false), - blksize: AtomicCell::new(8 * 1024), + blksize: AtomicCell::new(super::DEFAULT_BUFFER_SIZE as _), buf: PyMutex::new([0u8; SMALLBUF]), } } @@ -6131,7 +6136,7 @@ mod winconsoleio { return Err(vm.new_value_error("Cannot open console output buffer for reading")); } - zelf.blksize.store(8 * 1024); + zelf.blksize.store(super::DEFAULT_BUFFER_SIZE as _); *zelf.buf.lock() = [0u8; SMALLBUF]; zelf.as_object().set_attr("name", nameobj, vm)?;