Merge pull request #5563 from coolreader18/fix-zlib-tests

Fix a bunch of zlib tests & update gzip.py to Python 3.13
This commit is contained in:
Jeong, YunWon
2025-02-26 15:47:52 +09:00
committed by GitHub
5 changed files with 666 additions and 380 deletions

130
Lib/gzip.py vendored
View File

@@ -15,12 +15,16 @@ __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
READ, WRITE = 1, 2
READ = 'rb'
WRITE = 'wb'
_COMPRESS_LEVEL_FAST = 1
_COMPRESS_LEVEL_TRADEOFF = 6
_COMPRESS_LEVEL_BEST = 9
READ_BUFFER_SIZE = 128 * 1024
_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
encoding=None, errors=None, newline=None):
@@ -118,6 +122,21 @@ class BadGzipFile(OSError):
"""Exception raised in some cases for invalid gzip files."""
class _WriteBufferStream(io.RawIOBase):
"""Minimal object to pass WriteBuffer flushes into GzipFile"""
def __init__(self, gzip_file):
self.gzip_file = gzip_file
def write(self, data):
return self.gzip_file._write_raw(data)
def seekable(self):
return False
def writable(self):
return True
class GzipFile(_compression.BaseStream):
"""The GzipFile class simulates most of the methods of a file object with
the exception of the truncate() method.
@@ -160,9 +179,10 @@ class GzipFile(_compression.BaseStream):
and 9 is slowest and produces the most compression. 0 is no compression
at all. The default is 9.
The mtime argument is an optional numeric timestamp to be written
to the last modification time field in the stream when compressing.
If omitted or None, the current time is used.
The optional mtime argument is the timestamp requested by gzip. The time
is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970.
If mtime is omitted or None, the current time is used. Use mtime = 0
to generate a compressed stream that does not depend on creation time.
"""
@@ -182,6 +202,7 @@ class GzipFile(_compression.BaseStream):
if mode is None:
mode = getattr(fileobj, 'mode', 'rb')
if mode.startswith('r'):
self.mode = READ
raw = _GzipReader(fileobj)
@@ -204,6 +225,9 @@ class GzipFile(_compression.BaseStream):
zlib.DEF_MEM_LEVEL,
0)
self._write_mtime = mtime
self._buffer_size = _WRITE_BUFFER_SIZE
self._buffer = io.BufferedWriter(_WriteBufferStream(self),
buffer_size=self._buffer_size)
else:
raise ValueError("Invalid mode: {!r}".format(mode))
@@ -212,14 +236,6 @@ class GzipFile(_compression.BaseStream):
if self.mode == WRITE:
self._write_gzip_header(compresslevel)
@property
def filename(self):
import warnings
warnings.warn("use the name attribute", DeprecationWarning, 2)
if self.mode == WRITE and self.name[-3:] != ".gz":
return self.name + ".gz"
return self.name
@property
def mtime(self):
"""Last modification time read from stream, or None"""
@@ -237,6 +253,11 @@ class GzipFile(_compression.BaseStream):
self.bufsize = 0
self.offset = 0 # Current file offset for seek(), tell(), etc
def tell(self):
self._check_not_closed()
self._buffer.flush()
return super().tell()
def _write_gzip_header(self, compresslevel):
self.fileobj.write(b'\037\213') # magic header
self.fileobj.write(b'\010') # compression method
@@ -278,6 +299,10 @@ class GzipFile(_compression.BaseStream):
if self.fileobj is None:
raise ValueError("write() on closed GzipFile object")
return self._buffer.write(data)
def _write_raw(self, data):
# Called by our self._buffer underlying WriteBufferStream.
if isinstance(data, (bytes, bytearray)):
length = len(data)
else:
@@ -326,11 +351,11 @@ class GzipFile(_compression.BaseStream):
def close(self):
fileobj = self.fileobj
if fileobj is None:
if fileobj is None or self._buffer.closed:
return
self.fileobj = None
try:
if self.mode == WRITE:
self._buffer.flush()
fileobj.write(self.compress.flush())
write32u(fileobj, self.crc)
# self.size may exceed 2 GiB, or even 4 GiB
@@ -338,6 +363,7 @@ class GzipFile(_compression.BaseStream):
elif self.mode == READ:
self._buffer.close()
finally:
self.fileobj = None
myfileobj = self.myfileobj
if myfileobj:
self.myfileobj = None
@@ -346,6 +372,7 @@ class GzipFile(_compression.BaseStream):
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
self._check_not_closed()
if self.mode == WRITE:
self._buffer.flush()
# Ensure the compressor's buffer is flushed
self.fileobj.write(self.compress.flush(zlib_mode))
self.fileobj.flush()
@@ -376,6 +403,9 @@ class GzipFile(_compression.BaseStream):
def seek(self, offset, whence=io.SEEK_SET):
if self.mode == WRITE:
self._check_not_closed()
# Flush buffer to ensure validity of self.offset
self._buffer.flush()
if whence != io.SEEK_SET:
if whence == io.SEEK_CUR:
offset = self.offset + offset
@@ -384,10 +414,10 @@ class GzipFile(_compression.BaseStream):
if offset < self.offset:
raise OSError('Negative seek in write mode')
count = offset - self.offset
chunk = b'\0' * 1024
for i in range(count // 1024):
chunk = b'\0' * self._buffer_size
for i in range(count // self._buffer_size):
self.write(chunk)
self.write(b'\0' * (count % 1024))
self.write(b'\0' * (count % self._buffer_size))
elif self.mode == READ:
self._check_not_closed()
return self._buffer.seek(offset, whence)
@@ -454,7 +484,7 @@ def _read_gzip_header(fp):
class _GzipReader(_compression.DecompressReader):
def __init__(self, fp):
super().__init__(_PaddedFile(fp), zlib.decompressobj,
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
wbits=-zlib.MAX_WBITS)
# Set flag indicating start of a new member
self._new_member = True
@@ -502,12 +532,13 @@ class _GzipReader(_compression.DecompressReader):
self._new_member = False
# Read a chunk of data from the file
buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
if self._decompressor.needs_input:
buf = self._fp.read(READ_BUFFER_SIZE)
uncompress = self._decompressor.decompress(buf, size)
else:
uncompress = self._decompressor.decompress(b"", size)
uncompress = self._decompressor.decompress(buf, size)
if self._decompressor.unconsumed_tail != b"":
self._fp.prepend(self._decompressor.unconsumed_tail)
elif self._decompressor.unused_data != b"":
if self._decompressor.unused_data != b"":
# Prepend the already read bytes to the fileobj so they can
# be seen by _read_eof() and _read_gzip_header()
self._fp.prepend(self._decompressor.unused_data)
@@ -518,14 +549,11 @@ class _GzipReader(_compression.DecompressReader):
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
self._add_read_data( uncompress )
self._crc = zlib.crc32(uncompress, self._crc)
self._stream_size += len(uncompress)
self._pos += len(uncompress)
return uncompress
def _add_read_data(self, data):
self._crc = zlib.crc32(data, self._crc)
self._stream_size = self._stream_size + len(data)
def _read_eof(self):
# We've read to the end of the file
# We check that the computed CRC and size of the
@@ -552,43 +580,21 @@ class _GzipReader(_compression.DecompressReader):
self._new_member = True
def _create_simple_gzip_header(compresslevel: int,
mtime = None) -> bytes:
"""
Write a simple gzip header with no extra fields.
:param compresslevel: Compresslevel used to determine the xfl bytes.
:param mtime: The mtime (must support conversion to a 32-bit integer).
:return: A bytes object representing the gzip header.
"""
if mtime is None:
mtime = time.time()
if compresslevel == _COMPRESS_LEVEL_BEST:
xfl = 2
elif compresslevel == _COMPRESS_LEVEL_FAST:
xfl = 4
else:
xfl = 0
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
# fields added to header), mtime, xfl and os (255 for unknown OS).
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
"""Compress data in one shot and return the compressed string.
compresslevel sets the compression level in range of 0-9.
mtime can be used to set the modification time. The modification time is
set to the current time by default.
mtime can be used to set the modification time.
The modification time is set to 0 by default, for reproducibility.
"""
if mtime == 0:
# Use zlib as it creates the header with 0 mtime by default.
# This is faster and with less overhead.
return zlib.compress(data, level=compresslevel, wbits=31)
header = _create_simple_gzip_header(compresslevel, mtime)
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
# Wbits=-15 creates a raw deflate block.
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
trailer)
# Wbits=31 automatically includes a gzip header and trailer.
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
if mtime is None:
mtime = time.time()
# Reuse gzip header created by zlib, replace mtime and OS byte for
# consistency.
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
return header + gzip_data[10:]
def decompress(data):
@@ -655,7 +661,7 @@ def main():
f = builtins.open(arg, "rb")
g = open(arg + ".gz", "wb")
while True:
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
chunk = f.read(READ_BUFFER_SIZE)
if not chunk:
break
g.write(chunk)

270
Lib/test/test_gzip.py vendored
View File

@@ -5,7 +5,6 @@ import array
import functools
import io
import os
import pathlib
import struct
import sys
import unittest
@@ -16,6 +15,7 @@ from test.support import _4G, bigmemtest, requires_subprocess
from test.support.script_helper import assert_python_ok, assert_python_failure
gzip = import_helper.import_module('gzip')
zlib = import_helper.import_module('zlib')
data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
PyObject *RetVal;
@@ -78,16 +78,18 @@ class TestGzip(BaseTest):
f.close()
def test_write_read_with_pathlike_file(self):
filename = pathlib.Path(self.filename)
filename = os_helper.FakePath(self.filename)
with gzip.GzipFile(filename, 'w') as f:
f.write(data1 * 50)
self.assertIsInstance(f.name, str)
self.assertEqual(f.name, self.filename)
with gzip.GzipFile(filename, 'a') as f:
f.write(data1)
with gzip.GzipFile(filename) as f:
d = f.read()
self.assertEqual(d, data1 * 51)
self.assertIsInstance(f.name, str)
self.assertEqual(f.name, self.filename)
# The following test_write_xy methods test that write accepts
# the corresponding bytes-like object type as input
@@ -471,15 +473,122 @@ class TestGzip(BaseTest):
with io.TextIOWrapper(f, encoding="ascii") as t:
self.assertEqual(t.readlines(), lines)
def test_fileobj_with_name(self):
with open(self.filename, "xb") as raw:
with gzip.GzipFile(fileobj=raw, mode="x") as f:
f.write(b'one')
self.assertEqual(f.name, raw.name)
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, raw.name)
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
with open(self.filename, "wb") as raw:
with gzip.GzipFile(fileobj=raw, mode="w") as f:
f.write(b'two')
self.assertEqual(f.name, raw.name)
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, raw.name)
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
with open(self.filename, "ab") as raw:
with gzip.GzipFile(fileobj=raw, mode="a") as f:
f.write(b'three')
self.assertEqual(f.name, raw.name)
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, raw.name)
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
with open(self.filename, "rb") as raw:
with gzip.GzipFile(fileobj=raw, mode="r") as f:
self.assertEqual(f.read(), b'twothree')
self.assertEqual(f.name, raw.name)
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.mode, gzip.READ)
self.assertIs(f.readable(), True)
self.assertIs(f.writable(), False)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, raw.name)
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.READ)
self.assertIs(f.readable(), True)
self.assertIs(f.writable(), False)
self.assertIs(f.seekable(), True)
def test_fileobj_from_fdopen(self):
# Issue #13781: Opening a GzipFile for writing fails when using a
# fileobj created with os.fdopen().
fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT)
with os.fdopen(fd, "wb") as f:
with gzip.GzipFile(fileobj=f, mode="w") as g:
pass
fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
with os.fdopen(fd, "xb") as raw:
with gzip.GzipFile(fileobj=raw, mode="x") as f:
f.write(b'one')
self.assertEqual(f.name, '')
self.assertEqual(f.fileno(), raw.fileno())
self.assertIs(f.closed, True)
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
with os.fdopen(fd, "wb") as raw:
with gzip.GzipFile(fileobj=raw, mode="w") as f:
f.write(b'two')
self.assertEqual(f.name, '')
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT | os.O_APPEND)
with os.fdopen(fd, "ab") as raw:
with gzip.GzipFile(fileobj=raw, mode="a") as f:
f.write(b'three')
self.assertEqual(f.name, '')
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
fd = os.open(self.filename, os.O_RDONLY)
with os.fdopen(fd, "rb") as raw:
with gzip.GzipFile(fileobj=raw, mode="r") as f:
self.assertEqual(f.read(), b'twothree')
self.assertEqual(f.name, '')
self.assertEqual(f.fileno(), raw.fileno())
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
def test_fileobj_mode(self):
self.assertEqual(gzip.READ, 'rb')
self.assertEqual(gzip.WRITE, 'wb')
gzip.GzipFile(self.filename, "wb").close()
with open(self.filename, "r+b") as f:
with gzip.GzipFile(fileobj=f, mode='r') as g:
@@ -507,17 +616,69 @@ class TestGzip(BaseTest):
def test_bytes_filename(self):
str_filename = self.filename
try:
bytes_filename = str_filename.encode("ascii")
except UnicodeEncodeError:
self.skipTest("Temporary file name needs to be ASCII")
bytes_filename = os.fsencode(str_filename)
with gzip.GzipFile(bytes_filename, "wb") as f:
f.write(data1 * 50)
self.assertEqual(f.name, bytes_filename)
with gzip.GzipFile(bytes_filename, "rb") as f:
self.assertEqual(f.read(), data1 * 50)
self.assertEqual(f.name, bytes_filename)
# Sanity check that we are actually operating on the right file.
with gzip.GzipFile(str_filename, "rb") as f:
self.assertEqual(f.read(), data1 * 50)
self.assertEqual(f.name, str_filename)
def test_fileobj_without_name(self):
bio = io.BytesIO()
with gzip.GzipFile(fileobj=bio, mode='wb') as f:
f.write(data1 * 50)
self.assertEqual(f.name, '')
self.assertRaises(io.UnsupportedOperation, f.fileno)
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.WRITE)
self.assertIs(f.readable(), False)
self.assertIs(f.writable(), True)
self.assertIs(f.seekable(), True)
bio.seek(0)
with gzip.GzipFile(fileobj=bio, mode='rb') as f:
self.assertEqual(f.read(), data1 * 50)
self.assertEqual(f.name, '')
self.assertRaises(io.UnsupportedOperation, f.fileno)
self.assertEqual(f.mode, gzip.READ)
self.assertIs(f.readable(), True)
self.assertIs(f.writable(), False)
self.assertIs(f.seekable(), True)
self.assertIs(f.closed, False)
self.assertIs(f.closed, True)
self.assertEqual(f.name, '')
self.assertRaises(AttributeError, f.fileno)
self.assertEqual(f.mode, gzip.READ)
self.assertIs(f.readable(), True)
self.assertIs(f.writable(), False)
self.assertIs(f.seekable(), True)
def test_fileobj_and_filename(self):
filename2 = self.filename + 'new'
with (open(self.filename, 'wb') as fileobj,
gzip.GzipFile(fileobj=fileobj, filename=filename2, mode='wb') as f):
f.write(data1 * 50)
self.assertEqual(f.name, filename2)
with (open(self.filename, 'rb') as fileobj,
gzip.GzipFile(fileobj=fileobj, filename=filename2, mode='rb') as f):
self.assertEqual(f.read(), data1 * 50)
self.assertEqual(f.name, filename2)
# Sanity check that we are actually operating on the right file.
with gzip.GzipFile(self.filename, 'rb') as f:
self.assertEqual(f.read(), data1 * 50)
self.assertEqual(f.name, self.filename)
def test_decompress_limited(self):
"""Decompressed data buffering should be limited"""
@@ -552,8 +713,18 @@ class TestGzip(BaseTest):
f.read(1) # to set mtime attribute
self.assertEqual(f.mtime, mtime)
def test_compress_mtime_default(self):
# test for gh-125260
datac = gzip.compress(data1, mtime=0)
datac2 = gzip.compress(data1)
self.assertEqual(datac, datac2)
datac3 = gzip.compress(data1, mtime=None)
self.assertNotEqual(datac, datac3)
with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f:
f.read(1) # to set mtime attribute
self.assertGreater(f.mtime, 1)
def test_compress_correct_level(self):
# gzip.compress calls with mtime == 0 take a different code path.
for mtime in (0, 42):
with self.subTest(mtime=mtime):
nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime)
@@ -561,6 +732,17 @@ class TestGzip(BaseTest):
self.assertIn(data1, nocompress)
self.assertNotIn(data1, yescompress)
def test_issue112346(self):
# The OS byte should be 255, this should not change between Python versions.
for mtime in (0, 42):
with self.subTest(mtime=mtime):
compress = gzip.compress(data1, compresslevel=1, mtime=mtime)
self.assertEqual(
struct.unpack("<IxB", compress[4:10]),
(mtime, 255),
"Gzip header does not properly set either mtime or OS byte."
)
def test_decompress(self):
for data in (data1, data2):
buf = io.BytesIO()
@@ -616,6 +798,66 @@ class TestGzip(BaseTest):
self.assertEqual(f.write(q), LENGTH)
self.assertEqual(f.tell(), LENGTH)
def test_flush_flushes_compressor(self):
# See issue GH-105808.
b = io.BytesIO()
message = b"important message here."
with gzip.GzipFile(fileobj=b, mode='w') as f:
f.write(message)
f.flush()
partial_data = b.getvalue()
full_data = b.getvalue()
self.assertEqual(gzip.decompress(full_data), message)
# The partial data should contain the gzip header and the complete
# message, but not the end-of-stream markers (so we can't just
# decompress it directly).
with self.assertRaises(EOFError):
gzip.decompress(partial_data)
d = zlib.decompressobj(wbits=-zlib.MAX_WBITS)
f = io.BytesIO(partial_data)
gzip._read_gzip_header(f)
read_message = d.decompress(f.read())
self.assertEqual(read_message, message)
def test_flush_modes(self):
# Make sure the argument to flush is properly passed to the
# zlib.compressobj; see issue GH-105808.
class FakeCompressor:
def __init__(self):
self.modes = []
def compress(self, data):
return b''
def flush(self, mode=-1):
self.modes.append(mode)
return b''
b = io.BytesIO()
fc = FakeCompressor()
with gzip.GzipFile(fileobj=b, mode='w') as f:
f.compress = fc
f.flush()
f.flush(50)
f.flush(zlib_mode=100)
# The implicit close will also flush the compressor.
expected_modes = [
zlib.Z_SYNC_FLUSH,
50,
100,
-1,
]
self.assertEqual(fc.modes, expected_modes)
def test_write_seek_write(self):
# Make sure that offset is up-to-date before seeking
# See issue GH-108111
b = io.BytesIO()
message = b"important message here."
with gzip.GzipFile(fileobj=b, mode='w') as f:
f.write(message)
f.seek(len(message))
f.write(message)
data = b.getvalue()
self.assertEqual(gzip.decompress(data), message * 2)
class TestOpen(BaseTest):
def test_binary_modes(self):
@@ -646,13 +888,16 @@ class TestOpen(BaseTest):
self.assertEqual(file_data, uncompressed)
def test_pathlike_file(self):
filename = pathlib.Path(self.filename)
filename = os_helper.FakePath(self.filename)
with gzip.open(filename, "wb") as f:
f.write(data1 * 50)
self.assertEqual(f.name, self.filename)
with gzip.open(filename, "ab") as f:
f.write(data1)
self.assertEqual(f.name, self.filename)
with gzip.open(filename) as f:
self.assertEqual(f.read(), data1 * 51)
self.assertEqual(f.name, self.filename)
def test_implicit_binary_modes(self):
# Test implicit binary modes (no "b" or "t" in mode string).
@@ -682,7 +927,6 @@ class TestOpen(BaseTest):
file_data = gzip.decompress(f.read())
self.assertEqual(file_data, uncompressed)
@unittest.skipIf(sys.platform == 'win32', "TODO: RUSTPYTHON, windows text mode")
def test_text_modes(self):
uncompressed = data1.decode("ascii") * 50
uncompressed_raw = uncompressed.replace("\n", os.linesep)

52
Lib/test/test_zlib.py vendored
View File

@@ -20,18 +20,18 @@ requires_Decompress_copy = unittest.skipUnless(
'requires Decompress.copy()')
# def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION):
# # Register "1.2.3" as "1.2.3.0"
# # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux"
# v = zlib_version.split('-', 1)[0].split('.')
# if len(v) < 4:
# v.append('0')
# elif not v[-1].isnumeric():
# v[-1] = '0'
# return tuple(map(int, v))
#
#
# ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple()
def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION):
# Register "1.2.3" as "1.2.3.0"
# or "1.2.0-linux","1.2.0.f","1.2.0.f-linux"
v = zlib_version.split('-', 1)[0].split('.')
if len(v) < 4:
v.append('0')
elif not v[-1].isnumeric():
v[-1] = '0'
return tuple(map(int, v))
ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple()
# bpo-46623: When a hardware accelerator is used (currently only on s390x),
@@ -66,8 +66,6 @@ HW_ACCELERATED = is_s390x
class VersionTestCase(unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_library_version(self):
# Test that the major version of the actual library in use matches the
# major version that we were compiled against. We can't guarantee that
@@ -282,8 +280,6 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
# Test compression object
def test_pair(self):
# straightforward compress/decompress objects
@@ -307,8 +303,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
self.assertIsInstance(dco.unconsumed_tail, bytes)
self.assertIsInstance(dco.unused_data, bytes)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_keywords(self):
level = 2
method = zlib.DEFLATED
@@ -466,8 +460,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
def test_decompressmaxlenflush(self):
self.test_decompressmaxlen(flush=True)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_maxlenmisc(self):
# Misc tests of max_length
dco = zlib.decompressobj()
@@ -498,7 +490,7 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
ddata += dco.decompress(dco.unconsumed_tail)
self.assertEqual(dco.unconsumed_tail, b"")
# TODO: RUSTPYTHON
# TODO: RUSTPYTHON: Z_BLOCK support in flate2
@unittest.expectedFailure
def test_flushes(self):
# Test flush() with the various options, using all the
@@ -560,8 +552,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
dco = zlib.decompressobj()
self.assertEqual(dco.flush(), b"") # Returns nothing
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_dictionary(self):
h = HAMLET_SCENE
# Build a simulated dictionary out of the words in HAMLET.
@@ -578,8 +568,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
dco = zlib.decompressobj()
self.assertRaises(zlib.error, dco.decompress, cd)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_dictionary_streaming(self):
# This simulates the reuse of a compressor object for compressing
# several separate data streams.
@@ -652,8 +640,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
self.assertEqual(dco.unconsumed_tail, b'')
self.assertEqual(dco.unused_data, remainder)
# TODO: RUSTPYTHON
@unittest.expectedFailure
# issue27164
def test_decompress_raw_with_dictionary(self):
zdict = b'abcdefghijklmnopqrstuvwxyz'
@@ -829,7 +815,7 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
finally:
comp = uncomp = data = None
# TODO: RUSTPYTHON
# TODO: RUSTPYTHON: wbits=0 support in flate2
@unittest.expectedFailure
def test_wbits(self):
# wbits=0 only supported since zlib v1.2.3.5
@@ -997,8 +983,6 @@ class ZlibDecompressorTest(unittest.TestCase):
self.assertEqual(text, self.TEXT)
self.assertEqual(zlibd.unused_data, unused_data)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def testEOFError(self):
zlibd = zlib._ZlibDecompressor()
text = zlibd.decompress(self.DATA)
@@ -1029,8 +1013,6 @@ class ZlibDecompressorTest(unittest.TestCase):
with self.assertRaises(TypeError):
pickle.dumps(zlib._ZlibDecompressor(), proto)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def testDecompressorChunksMaxsize(self):
zlibd = zlib._ZlibDecompressor()
max_length = 100
@@ -1062,8 +1044,6 @@ class ZlibDecompressorTest(unittest.TestCase):
self.assertEqual(out, self.BIG_TEXT)
self.assertEqual(zlibd.unused_data, b"")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decompressor_inputbuf_1(self):
# Test reusing input buffer after moving existing
# contents to beginning
@@ -1086,8 +1066,6 @@ class ZlibDecompressorTest(unittest.TestCase):
out.append(zlibd.decompress(self.DATA[105:]))
self.assertEqual(b''.join(out), self.TEXT)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decompressor_inputbuf_2(self):
# Test reusing input buffer by appending data at the
# end right away
@@ -1109,8 +1087,6 @@ class ZlibDecompressorTest(unittest.TestCase):
out.append(zlibd.decompress(self.DATA[300:]))
self.assertEqual(b''.join(out), self.TEXT)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decompressor_inputbuf_3(self):
# Test reusing input buffer after extending it

View File

@@ -728,7 +728,7 @@ impl ModuleItem for AttributeItem {
(
quote_spanned! { ident.span() => {
#let_obj
for name in [(#(#names,)*)] {
for name in [#(#names),*] {
vm.__module_set_attr(module, vm.ctx.intern_str(name), obj.clone()).unwrap();
}
}},

View File

@@ -5,7 +5,7 @@ pub(crate) use zlib::make_module;
#[pymodule]
mod zlib {
use crate::vm::{
builtins::{PyBaseExceptionRef, PyBytes, PyBytesRef, PyIntRef, PyTypeRef},
builtins::{PyBaseExceptionRef, PyBytesRef, PyIntRef, PyTypeRef},
common::lock::PyMutex,
convert::TryFromBorrowedObject,
function::{ArgBytesLike, ArgPrimitiveIndex, ArgSize, OptionalArg},
@@ -13,7 +13,6 @@ mod zlib {
PyObject, PyPayload, PyResult, VirtualMachine,
};
use adler32::RollingAdler32 as Adler32;
use crossbeam_utils::atomic::AtomicCell;
use flate2::{
write::ZlibEncoder, Compress, Compression, Decompress, FlushCompress, FlushDecompress,
Status,
@@ -27,6 +26,17 @@ mod zlib {
Z_NO_COMPRESSION, Z_NO_FLUSH, Z_PARTIAL_FLUSH, Z_RLE, Z_SYNC_FLUSH, Z_TREES,
};
// we're statically linking libz-rs, so the compile-time and runtime
// versions will always be the same
#[pyattr(name = "ZLIB_RUNTIME_VERSION")]
#[pyattr]
const ZLIB_VERSION: &str = unsafe {
match std::ffi::CStr::from_ptr(libz_sys::zlibVersion()).to_str() {
Ok(s) => s,
Err(_) => unreachable!(),
}
};
// copied from zlibmodule.c (commit 530f506ac91338)
#[pyattr]
const MAX_WBITS: i8 = 15;
@@ -80,15 +90,10 @@ mod zlib {
} = args;
let level = level.ok_or_else(|| new_zlib_error("Bad compression level", vm))?;
let encoded_bytes = if args.wbits.value == MAX_WBITS {
let mut encoder = ZlibEncoder::new(Vec::new(), level);
data.with_ref(|input_bytes| encoder.write_all(input_bytes).unwrap());
encoder.finish().unwrap()
} else {
let mut inner = CompressInner::new(InitOptions::new(wbits.value, vm)?.compress(level));
data.with_ref(|input_bytes| inner.compress(input_bytes, vm))?;
inner.flush(vm)?
};
let compress = InitOptions::new(wbits.value, vm)?.compress(level);
let mut encoder = ZlibEncoder::new_with_compress(Vec::new(), compress);
data.with_ref(|input_bytes| encoder.write_all(input_bytes).unwrap());
let encoded_bytes = encoder.finish().unwrap();
Ok(vm.ctx.new_bytes(encoded_bytes))
}
@@ -109,6 +114,11 @@ mod zlib {
let header = wbits > 0;
let wbits = wbits.unsigned_abs();
match wbits {
// TODO: wbits = 0 should be a valid option:
// > windowBits can also be zero to request that inflate use the window size in
// > the zlib header of the compressed stream.
// but flate2 doesn't expose it
// 0 => ...
9..=15 => Ok(InitOptions::Standard { header, wbits }),
25..=31 => Ok(InitOptions::Gzip { wbits: wbits - 16 }),
_ => Err(vm.new_value_error("Invalid initialization option".to_owned())),
@@ -131,29 +141,81 @@ mod zlib {
}
}
#[derive(Clone)]
struct Chunker<'a> {
data1: &'a [u8],
data2: &'a [u8],
}
impl<'a> Chunker<'a> {
fn new(data: &'a [u8]) -> Self {
Self {
data1: data,
data2: &[],
}
}
fn chain(data1: &'a [u8], data2: &'a [u8]) -> Self {
if data1.is_empty() {
Self {
data1: data2,
data2: &[],
}
} else {
Self { data1, data2 }
}
}
fn len(&self) -> usize {
self.data1.len() + self.data2.len()
}
fn is_empty(&self) -> bool {
self.data1.is_empty()
}
fn to_vec(&self) -> Vec<u8> {
[self.data1, self.data2].concat()
}
fn chunk(&self) -> &'a [u8] {
self.data1.get(..CHUNKSIZE).unwrap_or(self.data1)
}
fn advance(&mut self, consumed: usize) {
self.data1 = &self.data1[consumed..];
if self.data1.is_empty() {
self.data1 = std::mem::take(&mut self.data2);
}
}
}
fn _decompress(
mut data: &[u8],
data: &[u8],
d: &mut Decompress,
bufsize: usize,
max_length: Option<usize>,
is_flush: bool,
zdict: Option<&ArgBytesLike>,
vm: &VirtualMachine,
) -> PyResult<(Vec<u8>, bool)> {
let mut data = Chunker::new(data);
_decompress_chunks(&mut data, d, bufsize, max_length, is_flush, zdict, vm)
}
fn _decompress_chunks(
data: &mut Chunker<'_>,
d: &mut Decompress,
bufsize: usize,
max_length: Option<usize>,
is_flush: bool,
zdict: Option<&ArgBytesLike>,
vm: &VirtualMachine,
) -> PyResult<(Vec<u8>, bool)> {
if data.is_empty() {
return Ok((Vec::new(), true));
}
let max_length = max_length.unwrap_or(usize::MAX);
let mut buf = Vec::new();
loop {
let final_chunk = data.len() <= CHUNKSIZE;
let chunk = if final_chunk {
data
} else {
&data[..CHUNKSIZE]
};
// if this is the final chunk, finish it
'outer: loop {
let chunk = data.chunk();
let flush = if is_flush {
if final_chunk {
// if this is the final chunk, finish it
if chunk.len() == data.len() {
FlushDecompress::Finish
} else {
FlushDecompress::None
@@ -162,34 +224,43 @@ mod zlib {
FlushDecompress::Sync
};
loop {
let additional = if let Some(max_length) = max_length {
std::cmp::min(bufsize, max_length - buf.capacity())
} else {
bufsize
};
let additional = std::cmp::min(bufsize, max_length - buf.capacity());
if additional == 0 {
return Ok((buf, false));
}
buf.reserve_exact(additional);
let prev_in = d.total_in();
let status = d
.decompress_vec(chunk, &mut buf, flush)
.map_err(|_| new_zlib_error("invalid input data", vm))?;
let res = d.decompress_vec(chunk, &mut buf, flush);
let consumed = d.total_in() - prev_in;
data = &data[consumed as usize..];
let stream_end = status == Status::StreamEnd;
if stream_end || data.is_empty() {
// we've reached the end of the stream, we're done
buf.shrink_to_fit();
return Ok((buf, stream_end));
} else if !chunk.is_empty() && consumed == 0 {
// we're gonna need a bigger buffer
continue;
} else {
// next chunk
break;
}
data.advance(consumed as usize);
match res {
Ok(status) => {
let stream_end = status == Status::StreamEnd;
if stream_end || data.is_empty() {
// we've reached the end of the stream, we're done
buf.shrink_to_fit();
return Ok((buf, stream_end));
} else if !chunk.is_empty() && consumed == 0 {
// we're gonna need a bigger buffer
continue;
} else {
// next chunk
continue 'outer;
}
}
Err(e) => {
let Some(zdict) = e.needs_dictionary().and(zdict) else {
return Err(new_zlib_error(&e.to_string(), vm));
};
d.set_dictionary(&zdict.borrow_buf())
.map_err(|_| new_zlib_error("failed to set dictionary", vm))?;
// now try the next chunk
continue 'outer;
}
};
}
}
}
@@ -214,7 +285,8 @@ mod zlib {
} = args;
data.with_ref(|data| {
let mut d = InitOptions::new(wbits.value, vm)?.decompress();
let (buf, stream_end) = _decompress(data, &mut d, bufsize.value, None, false, vm)?;
let (buf, stream_end) =
_decompress(data, &mut d, bufsize.value, None, false, None, vm)?;
if !stream_end {
return Err(new_zlib_error(
"Error -5 while decompressing data: incomplete or truncated stream",
@@ -230,101 +302,116 @@ mod zlib {
#[pyarg(any, default = "ArgPrimitiveIndex { value: MAX_WBITS }")]
wbits: ArgPrimitiveIndex<i8>,
#[pyarg(any, optional)]
_zdict: OptionalArg<ArgBytesLike>,
zdict: OptionalArg<ArgBytesLike>,
}
#[pyfunction]
fn decompressobj(args: DecompressobjArgs, vm: &VirtualMachine) -> PyResult<PyDecompress> {
#[allow(unused_mut)]
let mut decompress = InitOptions::new(args.wbits.value, vm)?.decompress();
if let OptionalArg::Present(_dict) = args._zdict {
// FIXME: always fails
// dict.with_ref(|d| decompress.set_dictionary(d));
let zdict = args.zdict.into_option();
if let Some(dict) = &zdict {
if args.wbits.value < 0 {
dict.with_ref(|d| decompress.set_dictionary(d))
.map_err(|_| new_zlib_error("failed to set dictionary", vm))?;
}
}
let inner = PyDecompressInner {
decompress: Some(decompress),
eof: false,
zdict,
unused_data: vm.ctx.empty_bytes.clone(),
unconsumed_tail: vm.ctx.empty_bytes.clone(),
};
Ok(PyDecompress {
decompress: PyMutex::new(decompress),
eof: AtomicCell::new(false),
unused_data: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)),
unconsumed_tail: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)),
inner: PyMutex::new(inner),
})
}
#[derive(Debug)]
struct PyDecompressInner {
decompress: Option<Decompress>,
zdict: Option<ArgBytesLike>,
eof: bool,
unused_data: PyBytesRef,
unconsumed_tail: PyBytesRef,
}
#[pyattr]
#[pyclass(name = "Decompress")]
#[derive(Debug, PyPayload)]
struct PyDecompress {
decompress: PyMutex<Decompress>,
eof: AtomicCell<bool>,
unused_data: PyMutex<PyBytesRef>,
unconsumed_tail: PyMutex<PyBytesRef>,
inner: PyMutex<PyDecompressInner>,
}
#[pyclass]
impl PyDecompress {
#[pygetset]
fn eof(&self) -> bool {
self.eof.load()
self.inner.lock().eof
}
#[pygetset]
fn unused_data(&self) -> PyBytesRef {
self.unused_data.lock().clone()
self.inner.lock().unused_data.clone()
}
#[pygetset]
fn unconsumed_tail(&self) -> PyBytesRef {
self.unconsumed_tail.lock().clone()
self.inner.lock().unconsumed_tail.clone()
}
fn save_unused_input(
&self,
d: &Decompress,
fn decompress_inner(
inner: &mut PyDecompressInner,
data: &[u8],
stream_end: bool,
orig_in: u64,
bufsize: usize,
max_length: Option<usize>,
is_flush: bool,
vm: &VirtualMachine,
) {
let leftover = &data[(d.total_in() - orig_in) as usize..];
) -> PyResult<(PyResult<Vec<u8>>, bool)> {
let Some(d) = &mut inner.decompress else {
return Err(new_zlib_error(USE_AFTER_FINISH_ERR, vm));
};
if stream_end && !leftover.is_empty() {
let mut unused_data = self.unused_data.lock();
let unused: Vec<_> = unused_data
.as_bytes()
.iter()
.chain(leftover)
.copied()
.collect();
*unused_data = vm.ctx.new_pyref(unused);
let zdict = if is_flush { None } else { inner.zdict.as_ref() };
let prev_in = d.total_in();
let (ret, stream_end) =
match _decompress(data, d, bufsize, max_length, is_flush, zdict, vm) {
Ok((buf, stream_end)) => (Ok(buf), stream_end),
Err(err) => (Err(err), false),
};
let consumed = (d.total_in() - prev_in) as usize;
// save unused input
let unconsumed = &data[consumed..];
if !unconsumed.is_empty() {
if stream_end {
let unused = [inner.unused_data.as_bytes(), unconsumed].concat();
inner.unused_data = vm.ctx.new_pyref(unused);
} else {
inner.unconsumed_tail = vm.ctx.new_bytes(unconsumed.to_vec());
}
} else if !inner.unconsumed_tail.is_empty() {
inner.unconsumed_tail = vm.ctx.empty_bytes.clone();
}
Ok((ret, stream_end))
}
#[pymethod]
fn decompress(&self, args: DecompressArgs, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let max_length = args.max_length.value;
let max_length: usize = args
.max_length
.map_or(0, |x| x.value)
.try_into()
.map_err(|_| vm.new_value_error("must be non-negative".to_owned()))?;
let max_length = (max_length != 0).then_some(max_length);
let data = args.data.borrow_buf();
let data = &*data;
let data = &*args.data.borrow_buf();
let mut d = self.decompress.lock();
let orig_in = d.total_in();
let inner = &mut *self.inner.lock();
let (ret, stream_end) =
match _decompress(data, &mut d, DEF_BUF_SIZE, max_length, false, vm) {
Ok((buf, true)) => {
self.eof.store(true);
(Ok(buf), true)
}
Ok((buf, false)) => (Ok(buf), false),
Err(err) => (Err(err), false),
};
self.save_unused_input(&d, data, stream_end, orig_in, vm);
Self::decompress_inner(inner, data, DEF_BUF_SIZE, max_length, false, vm)?;
let leftover = if stream_end {
b""
} else {
&data[(d.total_in() - orig_in) as usize..]
};
let mut unconsumed_tail = self.unconsumed_tail.lock();
if !leftover.is_empty() || !unconsumed_tail.is_empty() {
*unconsumed_tail = PyBytes::from(leftover.to_owned()).into_ref(&vm.ctx);
}
inner.eof |= stream_end;
ret
}
@@ -332,36 +419,22 @@ mod zlib {
#[pymethod]
fn flush(&self, length: OptionalArg<ArgSize>, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let length = match length {
OptionalArg::Present(l) => {
let l: isize = l.into();
if l <= 0 {
return Err(
vm.new_value_error("length must be greater than zero".to_owned())
);
} else {
l as usize
}
OptionalArg::Present(ArgSize { value }) if value <= 0 => {
return Err(vm.new_value_error("length must be greater than zero".to_owned()))
}
OptionalArg::Present(ArgSize { value }) => value as usize,
OptionalArg::Missing => DEF_BUF_SIZE,
};
let mut data = self.unconsumed_tail.lock();
let mut d = self.decompress.lock();
let inner = &mut *self.inner.lock();
let data = std::mem::replace(&mut inner.unconsumed_tail, vm.ctx.empty_bytes.clone());
let orig_in = d.total_in();
let (ret, _) = Self::decompress_inner(inner, &data, length, None, true, vm)?;
let (ret, stream_end) = match _decompress(&data, &mut d, length, None, true, vm) {
Ok((buf, stream_end)) => (Ok(buf), stream_end),
Err(err) => (Err(err), false),
};
self.save_unused_input(&d, &data, stream_end, orig_in, vm);
if inner.eof {
inner.decompress = None;
}
*data = PyBytes::from(Vec::new()).into_ref(&vm.ctx);
// TODO: drop the inner decompressor, somehow
// if stream_end {
//
// }
ret
}
}
@@ -370,11 +443,8 @@ mod zlib {
struct DecompressArgs {
#[pyarg(positional)]
data: ArgBytesLike,
#[pyarg(
any,
default = "rustpython_vm::function::ArgPrimitiveIndex { value: 0 }"
)]
max_length: ArgPrimitiveIndex<usize>,
#[pyarg(any, optional)]
max_length: OptionalArg<ArgSize>,
}
#[derive(FromArgs)]
@@ -384,13 +454,13 @@ mod zlib {
level: Level,
// only DEFLATED is valid right now, it's w/e
#[pyarg(any, default = "DEFLATED")]
_method: i32,
method: i32,
#[pyarg(any, default = "ArgPrimitiveIndex { value: MAX_WBITS }")]
wbits: ArgPrimitiveIndex<i8>,
#[pyarg(any, name = "_memLevel", default = "DEF_MEM_LEVEL")]
_mem_level: u8,
#[pyarg(any, name = "memLevel", default = "DEF_MEM_LEVEL")]
mem_level: u8,
#[pyarg(any, default = "Z_DEFAULT_STRATEGY")]
_strategy: i32,
strategy: i32,
#[pyarg(any, optional)]
zdict: Option<ArgBytesLike>,
}
@@ -417,8 +487,7 @@ mod zlib {
#[derive(Debug)]
struct CompressInner {
compress: Compress,
unconsumed: Vec<u8>,
compress: Option<Compress>,
}
#[pyattr]
@@ -436,10 +505,17 @@ mod zlib {
data.with_ref(|b| inner.compress(b, vm))
}
// TODO: mode argument isn't used
#[pymethod]
fn flush(&self, _mode: OptionalArg<i32>, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
self.inner.lock().flush(vm)
fn flush(&self, mode: OptionalArg<i32>, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let mode = match mode.unwrap_or(Z_FINISH) {
Z_NO_FLUSH => return Ok(vec![]),
Z_PARTIAL_FLUSH => FlushCompress::Partial,
Z_SYNC_FLUSH => FlushCompress::Sync,
Z_FULL_FLUSH => FlushCompress::Full,
Z_FINISH => FlushCompress::Finish,
_ => return Err(new_zlib_error("invalid mode", vm)),
};
self.inner.lock().flush(mode, vm)
}
// TODO: This is an optional feature of Compress
@@ -456,59 +532,55 @@ mod zlib {
impl CompressInner {
fn new(compress: Compress) -> Self {
Self {
compress,
unconsumed: Vec::new(),
compress: Some(compress),
}
}
fn get_compress(&mut self, vm: &VirtualMachine) -> PyResult<&mut Compress> {
self.compress
.as_mut()
.ok_or_else(|| new_zlib_error(USE_AFTER_FINISH_ERR, vm))
}
fn compress(&mut self, data: &[u8], vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let orig_in = self.compress.total_in() as usize;
let mut cur_in = 0;
let unconsumed = std::mem::take(&mut self.unconsumed);
let c = self.get_compress(vm)?;
let mut buf = Vec::new();
'outer: for chunk in unconsumed.chunks(CHUNKSIZE).chain(data.chunks(CHUNKSIZE)) {
while cur_in < chunk.len() {
for mut chunk in data.chunks(CHUNKSIZE) {
while !chunk.is_empty() {
buf.reserve(DEF_BUF_SIZE);
let status = self
.compress
.compress_vec(&chunk[cur_in..], &mut buf, FlushCompress::None)
.map_err(|_| {
self.unconsumed.extend_from_slice(&data[cur_in..]);
new_zlib_error("error while compressing", vm)
})?;
cur_in = (self.compress.total_in() as usize) - orig_in;
match status {
Status::Ok => continue,
Status::StreamEnd => break 'outer,
_ => break,
}
let prev_in = c.total_in();
c.compress_vec(chunk, &mut buf, FlushCompress::None)
.map_err(|_| new_zlib_error("error while compressing", vm))?;
let consumed = c.total_in() - prev_in;
chunk = &chunk[consumed as usize..];
}
}
self.unconsumed.extend_from_slice(&data[cur_in..]);
buf.shrink_to_fit();
Ok(buf)
}
// TODO: flush mode (FlushDecompress) parameter
fn flush(&mut self, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let data = std::mem::take(&mut self.unconsumed);
let mut data_it = data.chunks(CHUNKSIZE);
fn flush(&mut self, mode: FlushCompress, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let c = self.get_compress(vm)?;
let mut buf = Vec::new();
loop {
let chunk = data_it.next().unwrap_or(&[]);
let status = loop {
if buf.len() == buf.capacity() {
buf.reserve(DEF_BUF_SIZE);
}
let status = self
.compress
.compress_vec(chunk, &mut buf, FlushCompress::Finish)
let status = c
.compress_vec(&[], &mut buf, mode)
.map_err(|_| new_zlib_error("error while compressing", vm))?;
match status {
Status::StreamEnd => break,
_ => continue,
if buf.len() != buf.capacity() {
break status;
}
};
match status {
Status::Ok | Status::BufError => {}
Status::StreamEnd if mode == FlushCompress::Finish => self.compress = None,
Status::StreamEnd => return Err(new_zlib_error("unexpected eof", vm)),
}
buf.shrink_to_fit();
@@ -520,6 +592,8 @@ mod zlib {
vm.new_exception_msg(vm.class("zlib", "error"), message.to_owned())
}
const USE_AFTER_FINISH_ERR: &str = "Error -2: inconsistent stream state";
struct Level(Option<flate2::Compression>);
impl Level {
@@ -551,133 +625,119 @@ mod zlib {
#[pyattr]
#[pyclass(name = "_ZlibDecompressor")]
#[derive(Debug, PyPayload)]
pub struct ZlibDecompressor {
decompress: PyMutex<Decompress>,
unused_data: PyMutex<PyBytesRef>,
unconsumed_tail: PyMutex<PyBytesRef>,
struct ZlibDecompressor {
inner: PyMutex<ZlibDecompressorInner>,
}
#[derive(Debug)]
struct ZlibDecompressorInner {
decompress: Decompress,
unused_data: PyBytesRef,
input_buffer: Vec<u8>,
zdict: Option<ArgBytesLike>,
eof: bool,
needs_input: bool,
}
impl Constructor for ZlibDecompressor {
type Args = ();
type Args = DecompressobjArgs;
fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult {
let decompress = Decompress::new(true);
let zlib_decompressor = Self {
decompress: PyMutex::new(decompress),
unused_data: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)),
unconsumed_tail: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)),
fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
let mut decompress = InitOptions::new(args.wbits.value, vm)?.decompress();
let zdict = args.zdict.into_option();
if let Some(dict) = &zdict {
if args.wbits.value < 0 {
dict.with_ref(|d| decompress.set_dictionary(d))
.map_err(|_| new_zlib_error("failed to set dictionary", vm))?;
}
}
let inner = ZlibDecompressorInner {
decompress,
unused_data: vm.ctx.empty_bytes.clone(),
input_buffer: Vec::new(),
zdict,
eof: false,
needs_input: true,
};
zlib_decompressor
.into_ref_with_type(vm, cls)
.map(Into::into)
Self {
inner: PyMutex::new(inner),
}
.into_ref_with_type(vm, cls)
.map(Into::into)
}
}
#[pyclass(with(Constructor))]
impl ZlibDecompressor {
#[pygetset]
fn unused_data(&self) -> PyBytesRef {
self.unused_data.lock().clone()
fn eof(&self) -> bool {
self.inner.lock().eof
}
#[pygetset]
fn unconsumed_tail(&self) -> PyBytesRef {
self.unconsumed_tail.lock().clone()
fn unused_data(&self) -> PyBytesRef {
self.inner.lock().unused_data.clone()
}
fn save_unused_input(
&self,
d: &Decompress,
data: &[u8],
stream_end: bool,
orig_in: u64,
vm: &VirtualMachine,
) {
let leftover = &data[(d.total_in() - orig_in) as usize..];
if stream_end && !leftover.is_empty() {
let mut unused_data = self.unused_data.lock();
let unused: Vec<_> = unused_data
.as_bytes()
.iter()
.chain(leftover)
.copied()
.collect();
*unused_data = vm.ctx.new_pyref(unused);
}
#[pygetset]
fn needs_input(&self) -> bool {
self.inner.lock().needs_input
}
#[pymethod]
fn decompress(&self, args: PyBytesRef, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
// let max_length = args.max_length.value;
// let max_length = (max_length != 0).then_some(max_length);
let max_length = None;
let data = args.as_bytes();
fn decompress(&self, args: DecompressArgs, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let max_length = args
.max_length
.into_option()
.and_then(|ArgSize { value }| usize::try_from(value).ok());
let data = &*args.data.borrow_buf();
let mut d = self.decompress.lock();
let orig_in = d.total_in();
let inner = &mut *self.inner.lock();
if inner.eof {
return Err(vm.new_eof_error("End of stream already reached".to_owned()));
}
let input_buffer = &mut inner.input_buffer;
let d = &mut inner.decompress;
let mut chunks = Chunker::chain(input_buffer, data);
let zdict = inner.zdict.as_ref();
let bufsize = DEF_BUF_SIZE;
let prev_len = chunks.len();
let (ret, stream_end) =
match _decompress(data, &mut d, DEF_BUF_SIZE, max_length, false, vm) {
Ok((buf, true)) => {
// Eof is true
(Ok(buf), true)
}
Ok((buf, false)) => (Ok(buf), false),
match _decompress_chunks(&mut chunks, d, bufsize, max_length, false, zdict, vm) {
Ok((buf, stream_end)) => (Ok(buf), stream_end),
Err(err) => (Err(err), false),
};
self.save_unused_input(&d, data, stream_end, orig_in, vm);
let consumed = prev_len - chunks.len();
let leftover = if stream_end {
b""
inner.eof |= stream_end;
if inner.eof {
inner.needs_input = false;
if !chunks.is_empty() {
inner.unused_data = vm.ctx.new_bytes(chunks.to_vec());
}
} else if chunks.is_empty() {
input_buffer.clear();
inner.needs_input = true;
} else {
&data[(d.total_in() - orig_in) as usize..]
};
let mut unconsumed_tail = self.unconsumed_tail.lock();
if !leftover.is_empty() || !unconsumed_tail.is_empty() {
*unconsumed_tail = PyBytes::from(leftover.to_owned()).into_ref(&vm.ctx);
inner.needs_input = false;
if let Some(n_consumed_from_data) = consumed.checked_sub(input_buffer.len()) {
input_buffer.clear();
input_buffer.extend_from_slice(&data[n_consumed_from_data..]);
} else {
input_buffer.drain(..consumed);
input_buffer.extend_from_slice(data);
}
}
ret
}
#[pymethod]
fn flush(&self, length: OptionalArg<ArgSize>, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
let length = match length {
OptionalArg::Present(l) => {
let l: isize = l.into();
if l <= 0 {
return Err(
vm.new_value_error("length must be greater than zero".to_owned())
);
} else {
l as usize
}
}
OptionalArg::Missing => DEF_BUF_SIZE,
};
let mut data = self.unconsumed_tail.lock();
let mut d = self.decompress.lock();
let orig_in = d.total_in();
let (ret, stream_end) = match _decompress(&data, &mut d, length, None, true, vm) {
Ok((buf, stream_end)) => (Ok(buf), stream_end),
Err(err) => (Err(err), false),
};
self.save_unused_input(&d, &data, stream_end, orig_in, vm);
*data = PyBytes::from(Vec::new()).into_ref(&vm.ctx);
// TODO: drop the inner decompressor, somehow
// if stream_end {
//
// }
ret
}
// TODO: Wait for getstate pyslot to be fixed
// #[pyslot]
// fn getstate(zelf: &PyObject, vm: &VirtualMachine) -> PyResult<PyObject> {