diff --git a/Lib/shelve.py b/Lib/shelve.py new file mode 100644 index 0000000000..5d443a0fa8 --- /dev/null +++ b/Lib/shelve.py @@ -0,0 +1,243 @@ +"""Manage shelves of pickled objects. + +A "shelf" is a persistent, dictionary-like object. The difference +with dbm databases is that the values (not the keys!) in a shelf can +be essentially arbitrary Python objects -- anything that the "pickle" +module can handle. This includes most class instances, recursive data +types, and objects containing lots of shared sub-objects. The keys +are ordinary strings. + +To summarize the interface (key is a string, data is an arbitrary +object): + + import shelve + d = shelve.open(filename) # open, with (g)dbm filename -- no suffix + + d[key] = data # store data at key (overwrites old data if + # using an existing key) + data = d[key] # retrieve a COPY of the data at key (raise + # KeyError if no such key) -- NOTE that this + # access returns a *copy* of the entry! + del d[key] # delete data stored at key (raises KeyError + # if no such key) + flag = key in d # true if the key exists + list = d.keys() # a list of all existing keys (slow!) + + d.close() # close it + +Dependent on the implementation, closing a persistent dictionary may +or may not be necessary to flush changes to disk. + +Normally, d[key] returns a COPY of the entry. This needs care when +mutable entries are mutated: for example, if d[key] is a list, + d[key].append(anitem) +does NOT modify the entry d[key] itself, as stored in the persistent +mapping -- it only modifies the copy, which is then immediately +discarded, so that the append has NO effect whatsoever. To append an +item to d[key] in a way that will affect the persistent mapping, use: + data = d[key] + data.append(anitem) + d[key] = data + +To avoid the problem with mutable entries, you may pass the keyword +argument writeback=True in the call to shelve.open. When you use: + d = shelve.open(filename, writeback=True) +then d keeps a cache of all entries you access, and writes them all back +to the persistent mapping when you call d.close(). This ensures that +such usage as d[key].append(anitem) works as intended. + +However, using keyword argument writeback=True may consume vast amount +of memory for the cache, and it may make d.close() very slow, if you +access many of d's entries after opening it in this way: d has no way to +check which of the entries you access are mutable and/or which ones you +actually mutate, so it must cache, and write back at close, all of the +entries that you access. You can call d.sync() to write back all the +entries in the cache, and empty the cache (d.sync() also synchronizes +the persistent dictionary on disk, if feasible). +""" + +from pickle import Pickler, Unpickler +from io import BytesIO + +import collections.abc + +__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"] + +class _ClosedDict(collections.abc.MutableMapping): + 'Marker for a closed dict. Access attempts raise a ValueError.' + + def closed(self, *args): + raise ValueError('invalid operation on closed shelf') + __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed + + def __repr__(self): + return '' + + +class Shelf(collections.abc.MutableMapping): + """Base class for shelf implementations. + + This is initialized with a dictionary-like object. + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, dict, protocol=None, writeback=False, + keyencoding="utf-8"): + self.dict = dict + if protocol is None: + protocol = 3 + self._protocol = protocol + self.writeback = writeback + self.cache = {} + self.keyencoding = keyencoding + + def __iter__(self): + for k in self.dict.keys(): + yield k.decode(self.keyencoding) + + def __len__(self): + return len(self.dict) + + def __contains__(self, key): + return key.encode(self.keyencoding) in self.dict + + def get(self, key, default=None): + if key.encode(self.keyencoding) in self.dict: + return self[key] + return default + + def __getitem__(self, key): + try: + value = self.cache[key] + except KeyError: + f = BytesIO(self.dict[key.encode(self.keyencoding)]) + value = Unpickler(f).load() + if self.writeback: + self.cache[key] = value + return value + + def __setitem__(self, key, value): + if self.writeback: + self.cache[key] = value + f = BytesIO() + p = Pickler(f, self._protocol) + p.dump(value) + self.dict[key.encode(self.keyencoding)] = f.getvalue() + + def __delitem__(self, key): + del self.dict[key.encode(self.keyencoding)] + try: + del self.cache[key] + except KeyError: + pass + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + def close(self): + if self.dict is None: + return + try: + self.sync() + try: + self.dict.close() + except AttributeError: + pass + finally: + # Catch errors that may happen when close is called from __del__ + # because CPython is in interpreter shutdown. + try: + self.dict = _ClosedDict() + except: + self.dict = None + + def __del__(self): + if not hasattr(self, 'writeback'): + # __init__ didn't succeed, so don't bother closing + # see http://bugs.python.org/issue1339007 for details + return + self.close() + + def sync(self): + if self.writeback and self.cache: + self.writeback = False + for key, entry in self.cache.items(): + self[key] = entry + self.writeback = True + self.cache = {} + if hasattr(self.dict, 'sync'): + self.dict.sync() + + +class BsdDbShelf(Shelf): + """Shelf implementation using the "BSD" db interface. + + This adds methods first(), next(), previous(), last() and + set_location() that have no counterpart in [g]dbm databases. + + The actual database must be opened using one of the "bsddb" + modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or + bsddb.rnopen) and passed to the constructor. + + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, dict, protocol=None, writeback=False, + keyencoding="utf-8"): + Shelf.__init__(self, dict, protocol, writeback, keyencoding) + + def set_location(self, key): + (key, value) = self.dict.set_location(key) + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def next(self): + (key, value) = next(self.dict) + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def previous(self): + (key, value) = self.dict.previous() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def first(self): + (key, value) = self.dict.first() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def last(self): + (key, value) = self.dict.last() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + +class DbfilenameShelf(Shelf): + """Shelf implementation using the "dbm" generic dbm interface. + + This is initialized with the filename for the dbm database. + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, filename, flag='c', protocol=None, writeback=False): + import dbm + Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback) + + +def open(filename, flag='c', protocol=None, writeback=False): + """Open a persistent dictionary for reading and writing. + + The filename parameter is the base filename for the underlying + database. As a side-effect, an extension may be added to the + filename and more than one file may be created. The optional flag + parameter has the same interpretation as the flag parameter of + dbm.open(). The optional protocol parameter specifies the + version of the pickle protocol. + + See the module's __doc__ string for an overview of the interface. + """ + + return DbfilenameShelf(filename, flag, protocol, writeback) diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py new file mode 100644 index 0000000000..9ffe2cbeae --- /dev/null +++ b/Lib/test/test_shelve.py @@ -0,0 +1,223 @@ +import unittest +import shelve +import glob +from test import support +from collections.abc import MutableMapping +from test.test_dbm import dbm_iterator + +def L1(s): + return s.decode("latin-1") + +class byteskeydict(MutableMapping): + "Mapping that supports bytes keys" + + def __init__(self): + self.d = {} + + def __getitem__(self, key): + return self.d[L1(key)] + + def __setitem__(self, key, value): + self.d[L1(key)] = value + + def __delitem__(self, key): + del self.d[L1(key)] + + def __len__(self): + return len(self.d) + + def iterkeys(self): + for k in self.d.keys(): + yield k.encode("latin-1") + + __iter__ = iterkeys + + def keys(self): + return list(self.iterkeys()) + + def copy(self): + return byteskeydict(self.d) + + +class TestCase(unittest.TestCase): + + fn = "shelftemp.db" + + def tearDown(self): + for f in glob.glob(self.fn+"*"): + support.unlink(f) + + def test_close(self): + d1 = {} + s = shelve.Shelf(d1, protocol=2, writeback=False) + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + self.assertEqual(len(s), 1) + s.close() + self.assertRaises(ValueError, len, s) + try: + s['key1'] + except ValueError: + pass + else: + self.fail('Closed shelf should not find a key') + + def test_ascii_file_shelf(self): + s = shelve.open(self.fn, protocol=0) + try: + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + finally: + s.close() + + def test_binary_file_shelf(self): + s = shelve.open(self.fn, protocol=1) + try: + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + finally: + s.close() + + def test_proto2_file_shelf(self): + s = shelve.open(self.fn, protocol=2) + try: + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + finally: + s.close() + + def test_in_memory_shelf(self): + d1 = byteskeydict() + with shelve.Shelf(d1, protocol=0) as s: + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + d2 = byteskeydict() + with shelve.Shelf(d2, protocol=1) as s: + s['key1'] = (1,2,3,4) + self.assertEqual(s['key1'], (1,2,3,4)) + + self.assertEqual(len(d1), 1) + self.assertEqual(len(d2), 1) + self.assertNotEqual(d1.items(), d2.items()) + + def test_mutable_entry(self): + d1 = byteskeydict() + with shelve.Shelf(d1, protocol=2, writeback=False) as s: + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4]) + + d2 = byteskeydict() + with shelve.Shelf(d2, protocol=2, writeback=True) as s: + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + s['key1'].append(5) + self.assertEqual(s['key1'], [1,2,3,4,5]) + + self.assertEqual(len(d1), 1) + self.assertEqual(len(d2), 1) + + def test_keyencoding(self): + d = {} + key = 'Pöp' + # the default keyencoding is utf-8 + shelve.Shelf(d)[key] = [1] + self.assertIn(key.encode('utf-8'), d) + # but a different one can be given + shelve.Shelf(d, keyencoding='latin-1')[key] = [1] + self.assertIn(key.encode('latin-1'), d) + # with all consequences + s = shelve.Shelf(d, keyencoding='ascii') + self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1]) + + def test_writeback_also_writes_immediately(self): + # Issue 5754 + d = {} + key = 'key' + encodedkey = key.encode('utf-8') + with shelve.Shelf(d, writeback=True) as s: + s[key] = [1] + p1 = d[encodedkey] # Will give a KeyError if backing store not updated + s['key'].append(2) + p2 = d[encodedkey] + self.assertNotEqual(p1, p2) # Write creates new object in store + + def test_with(self): + d1 = {} + with shelve.Shelf(d1, protocol=2, writeback=False) as s: + s['key1'] = [1,2,3,4] + self.assertEqual(s['key1'], [1,2,3,4]) + self.assertEqual(len(s), 1) + self.assertRaises(ValueError, len, s) + try: + s['key1'] + except ValueError: + pass + else: + self.fail('Closed shelf should not find a key') + + def test_default_protocol(self): + with shelve.Shelf({}) as s: + self.assertEqual(s._protocol, 3) + +from test import mapping_tests + +class TestShelveBase(mapping_tests.BasicTestMappingProtocol): + fn = "shelftemp.db" + counter = 0 + def __init__(self, *args, **kw): + self._db = [] + mapping_tests.BasicTestMappingProtocol.__init__(self, *args, **kw) + type2test = shelve.Shelf + def _reference(self): + return {"key1":"value1", "key2":2, "key3":(1,2,3)} + def _empty_mapping(self): + if self._in_mem: + x= shelve.Shelf(byteskeydict(), **self._args) + else: + self.counter+=1 + x= shelve.open(self.fn+str(self.counter), **self._args) + self._db.append(x) + return x + def tearDown(self): + for db in self._db: + db.close() + self._db = [] + if not self._in_mem: + for f in glob.glob(self.fn+"*"): + support.unlink(f) + +class TestAsciiFileShelve(TestShelveBase): + _args={'protocol':0} + _in_mem = False +class TestBinaryFileShelve(TestShelveBase): + _args={'protocol':1} + _in_mem = False +class TestProto2FileShelve(TestShelveBase): + _args={'protocol':2} + _in_mem = False +class TestAsciiMemShelve(TestShelveBase): + _args={'protocol':0} + _in_mem = True +class TestBinaryMemShelve(TestShelveBase): + _args={'protocol':1} + _in_mem = True +class TestProto2MemShelve(TestShelveBase): + _args={'protocol':2} + _in_mem = True + +def test_main(): + for module in dbm_iterator(): + support.run_unittest( + TestAsciiFileShelve, + TestBinaryFileShelve, + TestProto2FileShelve, + TestAsciiMemShelve, + TestBinaryMemShelve, + TestProto2MemShelve, + TestCase + ) + +if __name__ == "__main__": + test_main()