Context Navigation

dbtables.py

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 25.0 KB

Line
1	#-----------------------------------------------------------------------
2	#
3	# Copyright (C) 2000, 2001 by Autonomous Zone Industries
4	# Copyright (C) 2002 Gregory P. Smith
5	#
6	# License: This is free software. You may use this software for any
7	# purpose including modification/redistribution, so long as
8	# this header remains intact and that you do not claim any
9	# rights of ownership or authorship of this software. This
10	# software has been tested, but no warranty is expressed or
11	# implied.
12	#
13	# -- Gregory P. Smith <greg@electricrain.com>
14
15	# This provides a simple database table interface built on top of
16	# the Python BerkeleyDB 3 interface.
17	#
18	_cvsid = '$Id: dbtables.py 46858 2006-06-11 08:35:14Z neal.norwitz $'
19
20	import re
21	import sys
22	import copy
23	import xdrlib
24	import random
25	from types import ListType, StringType
26	import cPickle as pickle
27
28	try:
29	# For Pythons w/distutils pybsddb
30	from bsddb3.db import *
31	except ImportError:
32	# For Python 2.3
33	from bsddb.db import *
34
35	# XXX(nnorwitz): is this correct? DBIncompleteError is conditional in _bsddb.c
36	try:
37	DBIncompleteError
38	except NameError:
39	class DBIncompleteError(Exception):
40	pass
41
42	class TableDBError(StandardError):
43	pass
44	class TableAlreadyExists(TableDBError):
45	pass
46
47
48	class Cond:
49	"""This condition matches everything"""
50	def __call__(self, s):
51	return 1
52
53	class ExactCond(Cond):
54	"""Acts as an exact match condition function"""
55	def __init__(self, strtomatch):
56	self.strtomatch = strtomatch
57	def __call__(self, s):
58	return s == self.strtomatch
59
60	class PrefixCond(Cond):
61	"""Acts as a condition function for matching a string prefix"""
62	def __init__(self, prefix):
63	self.prefix = prefix
64	def __call__(self, s):
65	return s[:len(self.prefix)] == self.prefix
66
67	class PostfixCond(Cond):
68	"""Acts as a condition function for matching a string postfix"""
69	def __init__(self, postfix):
70	self.postfix = postfix
71	def __call__(self, s):
72	return s[-len(self.postfix):] == self.postfix
73
74	class LikeCond(Cond):
75	"""
76	Acts as a function that will match using an SQL 'LIKE' style
77	string. Case insensitive and % signs are wild cards.
78	This isn't perfect but it should work for the simple common cases.
79	"""
80	def __init__(self, likestr, re_flags=re.IGNORECASE):
81	# escape python re characters
82	chars_to_escape = '.*+()[]?'
83	for char in chars_to_escape :
84	likestr = likestr.replace(char, '\\'+char)
85	# convert %s to wildcards
86	self.likestr = likestr.replace('%', '.*')
87	self.re = re.compile('^'+self.likestr+'$', re_flags)
88	def __call__(self, s):
89	return self.re.match(s)
90
91	#
92	# keys used to store database metadata
93	#
94	_table_names_key = '__TABLE_NAMES__' # list of the tables in this db
95	_columns = '._COLUMNS__' # table_name+this key contains a list of columns
96
97	def _columns_key(table):
98	return table + _columns
99
100	#
101	# these keys are found within table sub databases
102	#
103	_data = '._DATA_.' # this+column+this+rowid key contains table data
104	_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
105	# row in the table. (no data is stored)
106	_rowid_str_len = 8 # length in bytes of the unique rowid strings
107
108	def _data_key(table, col, rowid):
109	return table + _data + col + _data + rowid
110
111	def _search_col_data_key(table, col):
112	return table + _data + col + _data
113
114	def _search_all_data_key(table):
115	return table + _data
116
117	def _rowid_key(table, rowid):
118	return table + _rowid + rowid + _rowid
119
120	def _search_rowid_key(table):
121	return table + _rowid
122
123	def contains_metastrings(s) :
124	"""Verify that the given string does not contain any
125	metadata strings that might interfere with dbtables database operation.
126	"""
127	if (s.find(_table_names_key) >= 0 or
128	s.find(_columns) >= 0 or
129	s.find(_data) >= 0 or
130	s.find(_rowid) >= 0):
131	# Then
132	return 1
133	else:
134	return 0
135
136
137	class bsdTableDB :
138	def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
139	recover=0, dbflags=0):
140	"""bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
141
142	Open database name in the dbhome BerkeleyDB directory.
143	Use keyword arguments when calling this constructor.
144	"""
145	self.db = None
146	myflags = DB_THREAD
147	if create:
148	myflags \|= DB_CREATE
149	flagsforenv = (DB_INIT_MPOOL \| DB_INIT_LOCK \| DB_INIT_LOG \|
150	DB_INIT_TXN \| dbflags)
151	# DB_AUTO_COMMIT isn't a valid flag for env.open()
152	try:
153	dbflags \|= DB_AUTO_COMMIT
154	except AttributeError:
155	pass
156	if recover:
157	flagsforenv = flagsforenv \| DB_RECOVER
158	self.env = DBEnv()
159	# enable auto deadlock avoidance
160	self.env.set_lk_detect(DB_LOCK_DEFAULT)
161	self.env.open(dbhome, myflags \| flagsforenv)
162	if truncate:
163	myflags \|= DB_TRUNCATE
164	self.db = DB(self.env)
165	# this code relies on DBCursor.set* methods to raise exceptions
166	# rather than returning None
167	self.db.set_get_returns_none(1)
168	# allow duplicate entries [warning: be careful w/ metadata]
169	self.db.set_flags(DB_DUP)
170	self.db.open(filename, DB_BTREE, dbflags \| myflags, mode)
171	self.dbfilename = filename
172	# Initialize the table names list if this is a new database
173	txn = self.env.txn_begin()
174	try:
175	if not self.db.has_key(_table_names_key, txn):
176	self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn)
177	# Yes, bare except
178	except:
179	txn.abort()
180	raise
181	else:
182	txn.commit()
183	# TODO verify more of the database's metadata?
184	self.__tablecolumns = {}
185
186	def __del__(self):
187	self.close()
188
189	def close(self):
190	if self.db is not None:
191	self.db.close()
192	self.db = None
193	if self.env is not None:
194	self.env.close()
195	self.env = None
196
197	def checkpoint(self, mins=0):
198	try:
199	self.env.txn_checkpoint(mins)
200	except DBIncompleteError:
201	pass
202
203	def sync(self):
204	try:
205	self.db.sync()
206	except DBIncompleteError:
207	pass
208
209	def _db_print(self) :
210	"""Print the database to stdout for debugging"""
211	print "****** Printing raw database for debugging ******"
212	cur = self.db.cursor()
213	try:
214	key, data = cur.first()
215	while 1:
216	print repr({key: data})
217	next = cur.next()
218	if next:
219	key, data = next
220	else:
221	cur.close()
222	return
223	except DBNotFoundError:
224	cur.close()
225
226
227	def CreateTable(self, table, columns):
228	"""CreateTable(table, columns) - Create a new table in the database.
229
230	raises TableDBError if it already exists or for other DB errors.
231	"""
232	assert isinstance(columns, ListType)
233	txn = None
234	try:
235	# checking sanity of the table and column names here on
236	# table creation will prevent problems elsewhere.
237	if contains_metastrings(table):
238	raise ValueError(
239	"bad table name: contains reserved metastrings")
240	for column in columns :
241	if contains_metastrings(column):
242	raise ValueError(
243	"bad column name: contains reserved metastrings")
244
245	columnlist_key = _columns_key(table)
246	if self.db.has_key(columnlist_key):
247	raise TableAlreadyExists, "table already exists"
248
249	txn = self.env.txn_begin()
250	# store the table's column info
251	self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
252
253	# add the table name to the tablelist
254	tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn,
255	flags=DB_RMW))
256	tablelist.append(table)
257	# delete 1st, in case we opened with DB_DUP
258	self.db.delete(_table_names_key, txn)
259	self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
260
261	txn.commit()
262	txn = None
263	except DBError, dberror:
264	if txn:
265	txn.abort()
266	raise TableDBError, dberror[1]
267
268
269	def ListTableColumns(self, table):
270	"""Return a list of columns in the given table.
271	[] if the table doesn't exist.
272	"""
273	assert isinstance(table, StringType)
274	if contains_metastrings(table):
275	raise ValueError, "bad table name: contains reserved metastrings"
276
277	columnlist_key = _columns_key(table)
278	if not self.db.has_key(columnlist_key):
279	return []
280	pickledcolumnlist = self.db.get(columnlist_key)
281	if pickledcolumnlist:
282	return pickle.loads(pickledcolumnlist)
283	else:
284	return []
285
286	def ListTables(self):
287	"""Return a list of tables in this database."""
288	pickledtablelist = self.db.get(_table_names_key)
289	if pickledtablelist:
290	return pickle.loads(pickledtablelist)
291	else:
292	return []
293
294	def CreateOrExtendTable(self, table, columns):
295	"""CreateOrExtendTable(table, columns)
296
297	Create a new table in the database.
298
299	If a table of this name already exists, extend it to have any
300	additional columns present in the given list as well as
301	all of its current columns.
302	"""
303	assert isinstance(columns, ListType)
304	try:
305	self.CreateTable(table, columns)
306	except TableAlreadyExists:
307	# the table already existed, add any new columns
308	txn = None
309	try:
310	columnlist_key = _columns_key(table)
311	txn = self.env.txn_begin()
312
313	# load the current column list
314	oldcolumnlist = pickle.loads(
315	self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
316	# create a hash table for fast lookups of column names in the
317	# loop below
318	oldcolumnhash = {}
319	for c in oldcolumnlist:
320	oldcolumnhash[c] = c
321
322	# create a new column list containing both the old and new
323	# column names
324	newcolumnlist = copy.copy(oldcolumnlist)
325	for c in columns:
326	if not oldcolumnhash.has_key(c):
327	newcolumnlist.append(c)
328
329	# store the table's new extended column list
330	if newcolumnlist != oldcolumnlist :
331	# delete the old one first since we opened with DB_DUP
332	self.db.delete(columnlist_key, txn)
333	self.db.put(columnlist_key,
334	pickle.dumps(newcolumnlist, 1),
335	txn=txn)
336
337	txn.commit()
338	txn = None
339
340	self.__load_column_info(table)
341	except DBError, dberror:
342	if txn:
343	txn.abort()
344	raise TableDBError, dberror[1]
345
346
347	def __load_column_info(self, table) :
348	"""initialize the self.__tablecolumns dict"""
349	# check the column names
350	try:
351	tcolpickles = self.db.get(_columns_key(table))
352	except DBNotFoundError:
353	raise TableDBError, "unknown table: %r" % (table,)
354	if not tcolpickles:
355	raise TableDBError, "unknown table: %r" % (table,)
356	self.__tablecolumns[table] = pickle.loads(tcolpickles)
357
358	def __new_rowid(self, table, txn) :
359	"""Create a new unique row identifier"""
360	unique = 0
361	while not unique:
362	# Generate a random 64-bit row ID string
363	# (note: this code has <64 bits of randomness
364	# but it's plenty for our database id needs!)
365	p = xdrlib.Packer()
366	p.pack_int(int(random.random()*2147483647))
367	p.pack_int(int(random.random()*2147483647))
368	newid = p.get_buffer()
369
370	# Guarantee uniqueness by adding this key to the database
371	try:
372	self.db.put(_rowid_key(table, newid), None, txn=txn,
373	flags=DB_NOOVERWRITE)
374	except DBKeyExistError:
375	pass
376	else:
377	unique = 1
378
379	return newid
380
381
382	def Insert(self, table, rowdict) :
383	"""Insert(table, datadict) - Insert a new row into the table
384	using the keys+values from rowdict as the column values.
385	"""
386	txn = None
387	try:
388	if not self.db.has_key(_columns_key(table)):
389	raise TableDBError, "unknown table"
390
391	# check the validity of each column name
392	if not self.__tablecolumns.has_key(table):
393	self.__load_column_info(table)
394	for column in rowdict.keys() :
395	if not self.__tablecolumns[table].count(column):
396	raise TableDBError, "unknown column: %r" % (column,)
397
398	# get a unique row identifier for this row
399	txn = self.env.txn_begin()
400	rowid = self.__new_rowid(table, txn=txn)
401
402	# insert the row values into the table database
403	for column, dataitem in rowdict.items():
404	# store the value
405	self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
406
407	txn.commit()
408	txn = None
409
410	except DBError, dberror:
411	# WIBNI we could just abort the txn and re-raise the exception?
412	# But no, because TableDBError is not related to DBError via
413	# inheritance, so it would be backwards incompatible. Do the next
414	# best thing.
415	info = sys.exc_info()
416	if txn:
417	txn.abort()
418	self.db.delete(_rowid_key(table, rowid))
419	raise TableDBError, dberror[1], info[2]
420
421
422	def Modify(self, table, conditions={}, mappings={}):
423	"""Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
424
425	* table - the table name
426	* conditions - a dictionary keyed on column names containing
427	a condition callable expecting the data string as an
428	argument and returning a boolean.
429	* mappings - a dictionary keyed on column names containing a
430	condition callable expecting the data string as an argument and
431	returning the new string for that column.
432	"""
433	try:
434	matching_rowids = self.__Select(table, [], conditions)
435
436	# modify only requested columns
437	columns = mappings.keys()
438	for rowid in matching_rowids.keys():
439	txn = None
440	try:
441	for column in columns:
442	txn = self.env.txn_begin()
443	# modify the requested column
444	try:
445	dataitem = self.db.get(
446	_data_key(table, column, rowid),
447	txn)
448	self.db.delete(
449	_data_key(table, column, rowid),
450	txn)
451	except DBNotFoundError:
452	# XXXXXXX row key somehow didn't exist, assume no
453	# error
454	dataitem = None
455	dataitem = mappings[column](dataitem)
456	if dataitem <> None:
457	self.db.put(
458	_data_key(table, column, rowid),
459	dataitem, txn=txn)
460	txn.commit()
461	txn = None
462
463	# catch all exceptions here since we call unknown callables
464	except:
465	if txn:
466	txn.abort()
467	raise
468
469	except DBError, dberror:
470	raise TableDBError, dberror[1]
471
472	def Delete(self, table, conditions={}):
473	"""Delete(table, conditions) - Delete items matching the given
474	conditions from the table.
475
476	* conditions - a dictionary keyed on column names containing
477	condition functions expecting the data string as an
478	argument and returning a boolean.
479	"""
480	try:
481	matching_rowids = self.__Select(table, [], conditions)
482
483	# delete row data from all columns
484	columns = self.__tablecolumns[table]
485	for rowid in matching_rowids.keys():
486	txn = None
487	try:
488	txn = self.env.txn_begin()
489	for column in columns:
490	# delete the data key
491	try:
492	self.db.delete(_data_key(table, column, rowid),
493	txn)
494	except DBNotFoundError:
495	# XXXXXXX column may not exist, assume no error
496	pass
497
498	try:
499	self.db.delete(_rowid_key(table, rowid), txn)
500	except DBNotFoundError:
501	# XXXXXXX row key somehow didn't exist, assume no error
502	pass
503	txn.commit()
504	txn = None
505	except DBError, dberror:
506	if txn:
507	txn.abort()
508	raise
509	except DBError, dberror:
510	raise TableDBError, dberror[1]
511
512
513	def Select(self, table, columns, conditions={}):
514	"""Select(table, columns, conditions) - retrieve specific row data
515	Returns a list of row column->value mapping dictionaries.
516
517	* columns - a list of which column data to return. If
518	columns is None, all columns will be returned.
519	* conditions - a dictionary keyed on column names
520	containing callable conditions expecting the data string as an
521	argument and returning a boolean.
522	"""
523	try:
524	if not self.__tablecolumns.has_key(table):
525	self.__load_column_info(table)
526	if columns is None:
527	columns = self.__tablecolumns[table]
528	matching_rowids = self.__Select(table, columns, conditions)
529	except DBError, dberror:
530	raise TableDBError, dberror[1]
531	# return the matches as a list of dictionaries
532	return matching_rowids.values()
533
534
535	def __Select(self, table, columns, conditions):
536	"""__Select() - Used to implement Select and Delete (above)
537	Returns a dictionary keyed on rowids containing dicts
538	holding the row data for columns listed in the columns param
539	that match the given conditions.
540	* conditions is a dictionary keyed on column names
541	containing callable conditions expecting the data string as an
542	argument and returning a boolean.
543	"""
544	# check the validity of each column name
545	if not self.__tablecolumns.has_key(table):
546	self.__load_column_info(table)
547	if columns is None:
548	columns = self.tablecolumns[table]
549	for column in (columns + conditions.keys()):
550	if not self.__tablecolumns[table].count(column):
551	raise TableDBError, "unknown column: %r" % (column,)
552
553	# keyed on rows that match so far, containings dicts keyed on
554	# column names containing the data for that row and column.
555	matching_rowids = {}
556	# keys are rowids that do not match
557	rejected_rowids = {}
558
559	# attempt to sort the conditions in such a way as to minimize full
560	# column lookups
561	def cmp_conditions(atuple, btuple):
562	a = atuple[1]
563	b = btuple[1]
564	if type(a) is type(b):
565	if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
566	# longest prefix first
567	return cmp(len(b.prefix), len(a.prefix))
568	if isinstance(a, LikeCond) and isinstance(b, LikeCond):
569	# longest likestr first
570	return cmp(len(b.likestr), len(a.likestr))
571	return 0
572	if isinstance(a, ExactCond):
573	return -1
574	if isinstance(b, ExactCond):
575	return 1
576	if isinstance(a, PrefixCond):
577	return -1
578	if isinstance(b, PrefixCond):
579	return 1
580	# leave all unknown condition callables alone as equals
581	return 0
582
583	conditionlist = conditions.items()
584	conditionlist.sort(cmp_conditions)
585
586	# Apply conditions to column data to find what we want
587	cur = self.db.cursor()
588	column_num = -1
589	for column, condition in conditionlist:
590	column_num = column_num + 1
591	searchkey = _search_col_data_key(table, column)
592	# speedup: don't linear search columns within loop
593	if column in columns:
594	savethiscolumndata = 1 # save the data for return
595	else:
596	savethiscolumndata = 0 # data only used for selection
597
598	try:
599	key, data = cur.set_range(searchkey)
600	while key[:len(searchkey)] == searchkey:
601	# extract the rowid from the key
602	rowid = key[-_rowid_str_len:]
603
604	if not rejected_rowids.has_key(rowid):
605	# if no condition was specified or the condition
606	# succeeds, add row to our match list.
607	if not condition or condition(data):
608	if not matching_rowids.has_key(rowid):
609	matching_rowids[rowid] = {}
610	if savethiscolumndata:
611	matching_rowids[rowid][column] = data
612	else:
613	if matching_rowids.has_key(rowid):
614	del matching_rowids[rowid]
615	rejected_rowids[rowid] = rowid
616
617	key, data = cur.next()
618
619	except DBError, dberror:
620	if dberror[0] != DB_NOTFOUND:
621	raise
622	continue
623
624	cur.close()
625
626	# we're done selecting rows, garbage collect the reject list
627	del rejected_rowids
628
629	# extract any remaining desired column data from the
630	# database for the matching rows.
631	if len(columns) > 0:
632	for rowid, rowdata in matching_rowids.items():
633	for column in columns:
634	if rowdata.has_key(column):
635	continue
636	try:
637	rowdata[column] = self.db.get(
638	_data_key(table, column, rowid))
639	except DBError, dberror:
640	if dberror[0] != DB_NOTFOUND:
641	raise
642	rowdata[column] = None
643
644	# return the matches
645	return matching_rowids
646
647
648	def Drop(self, table):
649	"""Remove an entire table from the database"""
650	txn = None
651	try:
652	txn = self.env.txn_begin()
653
654	# delete the column list
655	self.db.delete(_columns_key(table), txn)
656
657	cur = self.db.cursor(txn)
658
659	# delete all keys containing this tables column and row info
660	table_key = _search_all_data_key(table)
661	while 1:
662	try:
663	key, data = cur.set_range(table_key)
664	except DBNotFoundError:
665	break
666	# only delete items in this table
667	if key[:len(table_key)] != table_key:
668	break
669	cur.delete()
670
671	# delete all rowids used by this table
672	table_key = _search_rowid_key(table)
673	while 1:
674	try:
675	key, data = cur.set_range(table_key)
676	except DBNotFoundError:
677	break
678	# only delete items in this table
679	if key[:len(table_key)] != table_key:
680	break
681	cur.delete()
682
683	cur.close()
684
685	# delete the tablename from the table name list
686	tablelist = pickle.loads(
687	self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
688	try:
689	tablelist.remove(table)
690	except ValueError:
691	# hmm, it wasn't there, oh well, that's what we want.
692	pass
693	# delete 1st, incase we opened with DB_DUP
694	self.db.delete(_table_names_key, txn)
695	self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
696
697	txn.commit()
698	txn = None
699
700	if self.__tablecolumns.has_key(table):
701	del self.__tablecolumns[table]
702
703	except DBError, dberror:
704	if txn:
705	txn.abort()
706	raise TableDBError, dberror[1]

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/dev-lang/python/Lib/bsddb/dbtables.py

Download in other formats: