Update tarfile from CPython 3.10.6

This commit is contained in:
CPython Developers
2022-08-14 23:34:09 +09:00
committed by Jeong YunWon
parent be1636a947
commit c05e9cd52d
2 changed files with 238 additions and 22 deletions

10
Lib/tarfile.py vendored
View File

@@ -1163,6 +1163,11 @@ class TarInfo(object):
# header information.
self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
# Remove redundant slashes from directories. This is to be consistent
# with frombuf().
if self.isdir():
self.name = self.name.rstrip("/")
return self
def _proc_gnulong(self, tarfile):
@@ -1185,6 +1190,11 @@ class TarInfo(object):
elif self.type == GNUTYPE_LONGLINK:
next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
# Remove redundant slashes from directories. This is to be consistent
# with frombuf().
if next.isdir():
next.name = next.name.removesuffix("/")
return next
def _proc_sparse(self, tarfile):

View File

@@ -220,6 +220,26 @@ class UstarReadTest(ReadTest, unittest.TestCase):
def test_issue14160(self):
self._test_fileobj_link("symtype2", "ustar/regtype")
def test_add_dir_getmember(self):
# bpo-21987
self.add_dir_and_getmember('bar')
self.add_dir_and_getmember('a'*101)
def add_dir_and_getmember(self, name):
with os_helper.temp_cwd():
with tarfile.open(tmpname, 'w') as tar:
tar.format = tarfile.USTAR_FORMAT
try:
os.mkdir(name)
tar.add(name)
finally:
os.rmdir(name)
with tarfile.open(tmpname) as tar:
self.assertEqual(
tar.getmember(name),
tar.getmember(name + '/')
)
class GzipUstarReadTest(GzipTest, UstarReadTest):
pass
@@ -330,6 +350,38 @@ class LzmaListTest(LzmaTest, ListTest):
class CommonReadTest(ReadTest):
def test_is_tarfile_erroneous(self):
with open(tmpname, "wb"):
pass
# is_tarfile works on filenames
self.assertFalse(tarfile.is_tarfile(tmpname))
# is_tarfile works on path-like objects
self.assertFalse(tarfile.is_tarfile(pathlib.Path(tmpname)))
# is_tarfile works on file objects
with open(tmpname, "rb") as fobj:
self.assertFalse(tarfile.is_tarfile(fobj))
# is_tarfile works on file-like objects
self.assertFalse(tarfile.is_tarfile(io.BytesIO(b"invalid")))
def test_is_tarfile_valid(self):
# is_tarfile works on filenames
self.assertTrue(tarfile.is_tarfile(self.tarname))
# is_tarfile works on path-like objects
self.assertTrue(tarfile.is_tarfile(pathlib.Path(self.tarname)))
# is_tarfile works on file objects
with open(self.tarname, "rb") as fobj:
self.assertTrue(tarfile.is_tarfile(fobj))
# is_tarfile works on file-like objects
with open(self.tarname, "rb") as fobj:
self.assertTrue(tarfile.is_tarfile(io.BytesIO(fobj.read())))
def test_empty_tarfile(self):
# Test for issue6123: Allow opening empty archives.
# This test checks if tarfile.open() is able to open an empty tar
@@ -365,7 +417,7 @@ class CommonReadTest(ReadTest):
def test_ignore_zeros(self):
# Test TarFile's ignore_zeros option.
# generate 512 pseudorandom bytes
data = Random(0).getrandbits(512*8).to_bytes(512, 'big')
data = Random(0).randbytes(512)
for char in (b'\0', b'a'):
# Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
# are ignored correctly.
@@ -682,6 +734,16 @@ class MiscReadTestBase(CommonReadTest):
self.assertEqual(m1.offset, m2.offset)
self.assertEqual(m1.get_info(), m2.get_info())
@unittest.skipIf(zlib is None, "requires zlib")
def test_zlib_error_does_not_leak(self):
# bpo-39039: tarfile.open allowed zlib exceptions to bubble up when
# parsing certain types of invalid data
with unittest.mock.patch("tarfile.TarInfo.fromtarfile") as mock:
mock.side_effect = zlib.error
with self.assertRaises(tarfile.ReadError):
tarfile.open(self.tarname)
class MiscReadTest(MiscReadTestBase, unittest.TestCase):
test_fail_comp = None
@@ -968,11 +1030,26 @@ class LongnameTest:
"iso8859-1", "strict")
self.assertEqual(tarinfo.type, self.longnametype)
def test_longname_directory(self):
# Test reading a longlink directory. Issue #47231.
longdir = ('a' * 101) + '/'
with os_helper.temp_cwd():
with tarfile.open(tmpname, 'w') as tar:
tar.format = self.format
try:
os.mkdir(longdir)
tar.add(longdir)
finally:
os.rmdir(longdir)
with tarfile.open(tmpname) as tar:
self.assertIsNotNone(tar.getmember(longdir))
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
subdir = "gnu"
longnametype = tarfile.GNUTYPE_LONGNAME
format = tarfile.GNU_FORMAT
# Since 3.2 tarfile is supposed to accurately restore sparse members and
# produce files with holes. This is what we actually want to test here.
@@ -1048,6 +1125,7 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase):
subdir = "pax"
longnametype = tarfile.XHDTYPE
format = tarfile.PAX_FORMAT
def test_pax_global_headers(self):
tar = tarfile.open(tarname, encoding="iso8859-1")
@@ -1600,6 +1678,52 @@ class GNUWriteTest(unittest.TestCase):
("longlnk/" * 127) + "longlink_")
class DeviceHeaderTest(WriteTestBase, unittest.TestCase):
prefix = "w:"
def test_headers_written_only_for_device_files(self):
# Regression test for bpo-18819.
tempdir = os.path.join(TEMPDIR, "device_header_test")
os.mkdir(tempdir)
try:
tar = tarfile.open(tmpname, self.mode)
try:
input_blk = tarfile.TarInfo(name="my_block_device")
input_reg = tarfile.TarInfo(name="my_regular_file")
input_blk.type = tarfile.BLKTYPE
input_reg.type = tarfile.REGTYPE
tar.addfile(input_blk)
tar.addfile(input_reg)
finally:
tar.close()
# devmajor and devminor should be *interpreted* as 0 in both...
tar = tarfile.open(tmpname, "r")
try:
output_blk = tar.getmember("my_block_device")
output_reg = tar.getmember("my_regular_file")
finally:
tar.close()
self.assertEqual(output_blk.devmajor, 0)
self.assertEqual(output_blk.devminor, 0)
self.assertEqual(output_reg.devmajor, 0)
self.assertEqual(output_reg.devminor, 0)
# ...but the fields should not actually be set on regular files:
with open(tmpname, "rb") as infile:
buf = infile.read()
buf_blk = buf[output_blk.offset:output_blk.offset_data]
buf_reg = buf[output_reg.offset:output_reg.offset_data]
# See `struct posixheader` in GNU docs for byte offsets:
# <https://www.gnu.org/software/tar/manual/html_node/Standard.html>
device_headers = slice(329, 329 + 16)
self.assertEqual(buf_blk[device_headers], b"0000000\0" * 2)
self.assertEqual(buf_reg[device_headers], b"\0" * 16)
finally:
os_helper.rmtree(tempdir)
class CreateTest(WriteTestBase, unittest.TestCase):
prefix = "x:"
@@ -1691,15 +1815,30 @@ class CreateTest(WriteTestBase, unittest.TestCase):
class GzipCreateTest(GzipTest, CreateTest):
pass
def test_create_with_compresslevel(self):
with tarfile.open(tmpname, self.mode, compresslevel=1) as tobj:
tobj.add(self.file_path)
with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj:
pass
class Bz2CreateTest(Bz2Test, CreateTest):
pass
def test_create_with_compresslevel(self):
with tarfile.open(tmpname, self.mode, compresslevel=1) as tobj:
tobj.add(self.file_path)
with tarfile.open(tmpname, 'r:bz2', compresslevel=1) as tobj:
pass
class LzmaCreateTest(LzmaTest, CreateTest):
pass
# Unlike gz and bz2, xz uses the preset keyword instead of compresslevel.
# It does not allow for preset to be specified when reading.
def test_create_with_preset(self):
with tarfile.open(tmpname, self.mode, preset=1) as tobj:
tobj.add(self.file_path)
class CreateWithXModeTest(CreateTest):
@@ -1840,6 +1979,61 @@ class PaxWriteTest(GNUWriteTest):
finally:
tar.close()
def test_create_pax_header(self):
# The ustar header should contain values that can be
# represented reasonably, even if a better (e.g. higher
# precision) version is set in the pax header.
# Issue #45863
# values that should be kept
t = tarfile.TarInfo()
t.name = "foo"
t.mtime = 1000.1
t.size = 100
t.uid = 123
t.gid = 124
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
self.assertEqual(info['name'], "foo")
# mtime should be rounded to nearest second
self.assertIsInstance(info['mtime'], int)
self.assertEqual(info['mtime'], 1000)
self.assertEqual(info['size'], 100)
self.assertEqual(info['uid'], 123)
self.assertEqual(info['gid'], 124)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))
# values that should be changed
t = tarfile.TarInfo()
t.name = "foo\u3374" # can't be represented in ascii
t.mtime = 10**10 # too big
t.size = 10**10 # too big
t.uid = 8**8 # too big
t.gid = 8**8+1 # too big
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
# name is kept as-is in info but should be added to pax header
self.assertEqual(info['name'], "foo\u3374")
self.assertEqual(info['mtime'], 0)
self.assertEqual(info['size'], 0)
self.assertEqual(info['uid'], 0)
self.assertEqual(info['gid'], 0)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
+ bytes(100) + b'ustar\x0000' + bytes(247) \
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
+ b'16 gid=16777217\n20 size=10000000000\n' \
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))
class UnicodeTest:
@@ -2274,23 +2468,32 @@ class MiscTest(unittest.TestCase):
tarfile.itn(0x10000000000, 6, tarfile.GNU_FORMAT)
def test__all__(self):
not_exported = {'version', 'grp', 'pwd', 'symlink_exception',
'NUL', 'BLOCKSIZE', 'RECORDSIZE', 'GNU_MAGIC',
'POSIX_MAGIC', 'LENGTH_NAME', 'LENGTH_LINK',
'LENGTH_PREFIX', 'REGTYPE', 'AREGTYPE', 'LNKTYPE',
'SYMTYPE', 'CHRTYPE', 'BLKTYPE', 'DIRTYPE', 'FIFOTYPE',
'CONTTYPE', 'GNUTYPE_LONGNAME', 'GNUTYPE_LONGLINK',
'GNUTYPE_SPARSE', 'XHDTYPE', 'XGLTYPE', 'SOLARIS_XHDTYPE',
'SUPPORTED_TYPES', 'REGULAR_TYPES', 'GNU_TYPES',
'PAX_FIELDS', 'PAX_NAME_FIELDS', 'PAX_NUMBER_FIELDS',
'stn', 'nts', 'nti', 'itn', 'calc_chksums', 'copyfileobj',
'filemode',
'EmptyHeaderError', 'TruncatedHeaderError',
'EOFHeaderError', 'InvalidHeaderError',
'SubsequentHeaderError', 'ExFileObject',
'main'}
not_exported = {
'version', 'grp', 'pwd', 'symlink_exception', 'NUL', 'BLOCKSIZE',
'RECORDSIZE', 'GNU_MAGIC', 'POSIX_MAGIC', 'LENGTH_NAME',
'LENGTH_LINK', 'LENGTH_PREFIX', 'REGTYPE', 'AREGTYPE', 'LNKTYPE',
'SYMTYPE', 'CHRTYPE', 'BLKTYPE', 'DIRTYPE', 'FIFOTYPE', 'CONTTYPE',
'GNUTYPE_LONGNAME', 'GNUTYPE_LONGLINK', 'GNUTYPE_SPARSE',
'XHDTYPE', 'XGLTYPE', 'SOLARIS_XHDTYPE', 'SUPPORTED_TYPES',
'REGULAR_TYPES', 'GNU_TYPES', 'PAX_FIELDS', 'PAX_NAME_FIELDS',
'PAX_NUMBER_FIELDS', 'stn', 'nts', 'nti', 'itn', 'calc_chksums',
'copyfileobj', 'filemode', 'EmptyHeaderError',
'TruncatedHeaderError', 'EOFHeaderError', 'InvalidHeaderError',
'SubsequentHeaderError', 'ExFileObject', 'main'}
support.check__all__(self, tarfile, not_exported=not_exported)
def test_useful_error_message_when_modules_missing(self):
fname = os.path.join(os.path.dirname(__file__), 'testtar.tar.xz')
with self.assertRaises(tarfile.ReadError) as excinfo:
error = tarfile.CompressionError('lzma module is not available'),
with unittest.mock.patch.object(tarfile.TarFile, 'xzopen', side_effect=error):
tarfile.open(fname)
self.assertIn(
"\n- method xz: CompressionError('lzma module is not available')\n",
str(excinfo.exception),
)
class CommandLineTest(unittest.TestCase):
@@ -2330,7 +2533,8 @@ class CommandLineTest(unittest.TestCase):
def test_test_command_verbose(self):
for tar_name in testtarnames:
for opt in '-v', '--verbose':
out = self.tarfilecmd(opt, '-t', tar_name)
out = self.tarfilecmd(opt, '-t', tar_name,
PYTHONIOENCODING='utf-8')
self.assertIn(b'is a tar archive.\n', out)
def test_test_command_invalid_file(self):
@@ -2405,7 +2609,8 @@ class CommandLineTest(unittest.TestCase):
'and-utf8-bom-sig-only.txt')]
for opt in '-v', '--verbose':
try:
out = self.tarfilecmd(opt, '-c', tmpname, *files)
out = self.tarfilecmd(opt, '-c', tmpname, *files,
PYTHONIOENCODING='utf-8')
self.assertIn(b' file created.', out)
with tarfile.open(tmpname) as tar:
tar.getmembers()
@@ -2463,7 +2668,8 @@ class CommandLineTest(unittest.TestCase):
for opt in '-v', '--verbose':
try:
with os_helper.temp_cwd(tarextdir):
out = self.tarfilecmd(opt, '-e', tmpname)
out = self.tarfilecmd(opt, '-e', tmpname,
PYTHONIOENCODING='utf-8')
self.assertIn(b' file is extracted.', out)
finally:
os_helper.rmtree(tarextdir)