| 1 | """A dumb and slow but simple dbm clone.
|
|---|
| 2 |
|
|---|
| 3 | For database spam, spam.dir contains the index (a text file),
|
|---|
| 4 | spam.bak *may* contain a backup of the index (also a text file),
|
|---|
| 5 | while spam.dat contains the data (a binary file).
|
|---|
| 6 |
|
|---|
| 7 | XXX TO DO:
|
|---|
| 8 |
|
|---|
| 9 | - seems to contain a bug when updating...
|
|---|
| 10 |
|
|---|
| 11 | - reclaim free space (currently, space once occupied by deleted or expanded
|
|---|
| 12 | items is never reused)
|
|---|
| 13 |
|
|---|
| 14 | - support concurrent access (currently, if two processes take turns making
|
|---|
| 15 | updates, they can mess up the index)
|
|---|
| 16 |
|
|---|
| 17 | - support efficient access to large databases (currently, the whole index
|
|---|
| 18 | is read when the database is opened, and some updates rewrite the whole index)
|
|---|
| 19 |
|
|---|
| 20 | - support opening for read-only (flag = 'm')
|
|---|
| 21 |
|
|---|
| 22 | """
|
|---|
| 23 |
|
|---|
| 24 | import os as _os
|
|---|
| 25 | import __builtin__
|
|---|
| 26 | import UserDict
|
|---|
| 27 |
|
|---|
| 28 | _open = __builtin__.open
|
|---|
| 29 |
|
|---|
| 30 | _BLOCKSIZE = 512
|
|---|
| 31 |
|
|---|
| 32 | error = IOError # For anydbm
|
|---|
| 33 |
|
|---|
| 34 | class _Database(UserDict.DictMixin):
|
|---|
| 35 |
|
|---|
| 36 | # The on-disk directory and data files can remain in mutually
|
|---|
| 37 | # inconsistent states for an arbitrarily long time (see comments
|
|---|
| 38 | # at the end of __setitem__). This is only repaired when _commit()
|
|---|
| 39 | # gets called. One place _commit() gets called is from __del__(),
|
|---|
| 40 | # and if that occurs at program shutdown time, module globals may
|
|---|
| 41 | # already have gotten rebound to None. Since it's crucial that
|
|---|
| 42 | # _commit() finish successfully, we can't ignore shutdown races
|
|---|
| 43 | # here, and _commit() must not reference any globals.
|
|---|
| 44 | _os = _os # for _commit()
|
|---|
| 45 | _open = _open # for _commit()
|
|---|
| 46 |
|
|---|
| 47 | def __init__(self, filebasename, mode):
|
|---|
| 48 | self._mode = mode
|
|---|
| 49 |
|
|---|
| 50 | # The directory file is a text file. Each line looks like
|
|---|
| 51 | # "%r, (%d, %d)\n" % (key, pos, siz)
|
|---|
| 52 | # where key is the string key, pos is the offset into the dat
|
|---|
| 53 | # file of the associated value's first byte, and siz is the number
|
|---|
| 54 | # of bytes in the associated value.
|
|---|
| 55 | self._dirfile = filebasename + _os.extsep + 'dir'
|
|---|
| 56 |
|
|---|
| 57 | # The data file is a binary file pointed into by the directory
|
|---|
| 58 | # file, and holds the values associated with keys. Each value
|
|---|
| 59 | # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
|
|---|
| 60 | # binary 8-bit string value.
|
|---|
| 61 | self._datfile = filebasename + _os.extsep + 'dat'
|
|---|
| 62 | self._bakfile = filebasename + _os.extsep + 'bak'
|
|---|
| 63 |
|
|---|
| 64 | # The index is an in-memory dict, mirroring the directory file.
|
|---|
| 65 | self._index = None # maps keys to (pos, siz) pairs
|
|---|
| 66 |
|
|---|
| 67 | # Mod by Jack: create data file if needed
|
|---|
| 68 | try:
|
|---|
| 69 | f = _open(self._datfile, 'r')
|
|---|
| 70 | except IOError:
|
|---|
| 71 | f = _open(self._datfile, 'w')
|
|---|
| 72 | self._chmod(self._datfile)
|
|---|
| 73 | f.close()
|
|---|
| 74 | self._update()
|
|---|
| 75 |
|
|---|
| 76 | # Read directory file into the in-memory index dict.
|
|---|
| 77 | def _update(self):
|
|---|
| 78 | self._index = {}
|
|---|
| 79 | try:
|
|---|
| 80 | f = _open(self._dirfile)
|
|---|
| 81 | except IOError:
|
|---|
| 82 | pass
|
|---|
| 83 | else:
|
|---|
| 84 | for line in f:
|
|---|
| 85 | line = line.rstrip()
|
|---|
| 86 | key, pos_and_siz_pair = eval(line)
|
|---|
| 87 | self._index[key] = pos_and_siz_pair
|
|---|
| 88 | f.close()
|
|---|
| 89 |
|
|---|
| 90 | # Write the index dict to the directory file. The original directory
|
|---|
| 91 | # file (if any) is renamed with a .bak extension first. If a .bak
|
|---|
| 92 | # file currently exists, it's deleted.
|
|---|
| 93 | def _commit(self):
|
|---|
| 94 | # CAUTION: It's vital that _commit() succeed, and _commit() can
|
|---|
| 95 | # be called from __del__(). Therefore we must never reference a
|
|---|
| 96 | # global in this routine.
|
|---|
| 97 | if self._index is None:
|
|---|
| 98 | return # nothing to do
|
|---|
| 99 |
|
|---|
| 100 | try:
|
|---|
| 101 | self._os.unlink(self._bakfile)
|
|---|
| 102 | except self._os.error:
|
|---|
| 103 | pass
|
|---|
| 104 |
|
|---|
| 105 | try:
|
|---|
| 106 | self._os.rename(self._dirfile, self._bakfile)
|
|---|
| 107 | except self._os.error:
|
|---|
| 108 | pass
|
|---|
| 109 |
|
|---|
| 110 | f = self._open(self._dirfile, 'w')
|
|---|
| 111 | self._chmod(self._dirfile)
|
|---|
| 112 | for key, pos_and_siz_pair in self._index.iteritems():
|
|---|
| 113 | f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
|---|
| 114 | f.close()
|
|---|
| 115 |
|
|---|
| 116 | sync = _commit
|
|---|
| 117 |
|
|---|
| 118 | def __getitem__(self, key):
|
|---|
| 119 | pos, siz = self._index[key] # may raise KeyError
|
|---|
| 120 | f = _open(self._datfile, 'rb')
|
|---|
| 121 | f.seek(pos)
|
|---|
| 122 | dat = f.read(siz)
|
|---|
| 123 | f.close()
|
|---|
| 124 | return dat
|
|---|
| 125 |
|
|---|
| 126 | # Append val to the data file, starting at a _BLOCKSIZE-aligned
|
|---|
| 127 | # offset. The data file is first padded with NUL bytes (if needed)
|
|---|
| 128 | # to get to an aligned offset. Return pair
|
|---|
| 129 | # (starting offset of val, len(val))
|
|---|
| 130 | def _addval(self, val):
|
|---|
| 131 | f = _open(self._datfile, 'rb+')
|
|---|
| 132 | f.seek(0, 2)
|
|---|
| 133 | pos = int(f.tell())
|
|---|
| 134 | npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
|
|---|
| 135 | f.write('\0'*(npos-pos))
|
|---|
| 136 | pos = npos
|
|---|
| 137 | f.write(val)
|
|---|
| 138 | f.close()
|
|---|
| 139 | return (pos, len(val))
|
|---|
| 140 |
|
|---|
| 141 | # Write val to the data file, starting at offset pos. The caller
|
|---|
| 142 | # is responsible for ensuring that there's enough room starting at
|
|---|
| 143 | # pos to hold val, without overwriting some other value. Return
|
|---|
| 144 | # pair (pos, len(val)).
|
|---|
| 145 | def _setval(self, pos, val):
|
|---|
| 146 | f = _open(self._datfile, 'rb+')
|
|---|
| 147 | f.seek(pos)
|
|---|
| 148 | f.write(val)
|
|---|
| 149 | f.close()
|
|---|
| 150 | return (pos, len(val))
|
|---|
| 151 |
|
|---|
| 152 | # key is a new key whose associated value starts in the data file
|
|---|
| 153 | # at offset pos and with length siz. Add an index record to
|
|---|
| 154 | # the in-memory index dict, and append one to the directory file.
|
|---|
| 155 | def _addkey(self, key, pos_and_siz_pair):
|
|---|
| 156 | self._index[key] = pos_and_siz_pair
|
|---|
| 157 | f = _open(self._dirfile, 'a')
|
|---|
| 158 | self._chmod(self._dirfile)
|
|---|
| 159 | f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
|---|
| 160 | f.close()
|
|---|
| 161 |
|
|---|
| 162 | def __setitem__(self, key, val):
|
|---|
| 163 | if not type(key) == type('') == type(val):
|
|---|
| 164 | raise TypeError, "keys and values must be strings"
|
|---|
| 165 | if key not in self._index:
|
|---|
| 166 | self._addkey(key, self._addval(val))
|
|---|
| 167 | else:
|
|---|
| 168 | # See whether the new value is small enough to fit in the
|
|---|
| 169 | # (padded) space currently occupied by the old value.
|
|---|
| 170 | pos, siz = self._index[key]
|
|---|
| 171 | oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
|
|---|
| 172 | newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
|
|---|
| 173 | if newblocks <= oldblocks:
|
|---|
| 174 | self._index[key] = self._setval(pos, val)
|
|---|
| 175 | else:
|
|---|
| 176 | # The new value doesn't fit in the (padded) space used
|
|---|
| 177 | # by the old value. The blocks used by the old value are
|
|---|
| 178 | # forever lost.
|
|---|
| 179 | self._index[key] = self._addval(val)
|
|---|
| 180 |
|
|---|
| 181 | # Note that _index may be out of synch with the directory
|
|---|
| 182 | # file now: _setval() and _addval() don't update the directory
|
|---|
| 183 | # file. This also means that the on-disk directory and data
|
|---|
| 184 | # files are in a mutually inconsistent state, and they'll
|
|---|
| 185 | # remain that way until _commit() is called. Note that this
|
|---|
| 186 | # is a disaster (for the database) if the program crashes
|
|---|
| 187 | # (so that _commit() never gets called).
|
|---|
| 188 |
|
|---|
| 189 | def __delitem__(self, key):
|
|---|
| 190 | # The blocks used by the associated value are lost.
|
|---|
| 191 | del self._index[key]
|
|---|
| 192 | # XXX It's unclear why we do a _commit() here (the code always
|
|---|
| 193 | # XXX has, so I'm not changing it). _setitem__ doesn't try to
|
|---|
| 194 | # XXX keep the directory file in synch. Why should we? Or
|
|---|
| 195 | # XXX why shouldn't __setitem__?
|
|---|
| 196 | self._commit()
|
|---|
| 197 |
|
|---|
| 198 | def keys(self):
|
|---|
| 199 | return self._index.keys()
|
|---|
| 200 |
|
|---|
| 201 | def has_key(self, key):
|
|---|
| 202 | return key in self._index
|
|---|
| 203 |
|
|---|
| 204 | def __contains__(self, key):
|
|---|
| 205 | return key in self._index
|
|---|
| 206 |
|
|---|
| 207 | def iterkeys(self):
|
|---|
| 208 | return self._index.iterkeys()
|
|---|
| 209 | __iter__ = iterkeys
|
|---|
| 210 |
|
|---|
| 211 | def __len__(self):
|
|---|
| 212 | return len(self._index)
|
|---|
| 213 |
|
|---|
| 214 | def close(self):
|
|---|
| 215 | self._commit()
|
|---|
| 216 | self._index = self._datfile = self._dirfile = self._bakfile = None
|
|---|
| 217 |
|
|---|
| 218 | __del__ = close
|
|---|
| 219 |
|
|---|
| 220 | def _chmod (self, file):
|
|---|
| 221 | if hasattr(self._os, 'chmod'):
|
|---|
| 222 | self._os.chmod(file, self._mode)
|
|---|
| 223 |
|
|---|
| 224 |
|
|---|
| 225 | def open(file, flag=None, mode=0666):
|
|---|
| 226 | """Open the database file, filename, and return corresponding object.
|
|---|
| 227 |
|
|---|
| 228 | The flag argument, used to control how the database is opened in the
|
|---|
| 229 | other DBM implementations, is ignored in the dumbdbm module; the
|
|---|
| 230 | database is always opened for update, and will be created if it does
|
|---|
| 231 | not exist.
|
|---|
| 232 |
|
|---|
| 233 | The optional mode argument is the UNIX mode of the file, used only when
|
|---|
| 234 | the database has to be created. It defaults to octal code 0666 (and
|
|---|
| 235 | will be modified by the prevailing umask).
|
|---|
| 236 |
|
|---|
| 237 | """
|
|---|
| 238 | # flag argument is currently ignored
|
|---|
| 239 |
|
|---|
| 240 | # Modify mode depending on the umask
|
|---|
| 241 | try:
|
|---|
| 242 | um = _os.umask(0)
|
|---|
| 243 | _os.umask(um)
|
|---|
| 244 | except AttributeError:
|
|---|
| 245 | pass
|
|---|
| 246 | else:
|
|---|
| 247 | # Turn off any bits that are set in the umask
|
|---|
| 248 | mode = mode & (~um)
|
|---|
| 249 |
|
|---|
| 250 | return _Database(file, mode)
|
|---|