1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 __doc__ = """\
33
34 C{littletable} - a Python module to give ORM-like access to a collection of objects
35
36 The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a
37 collection of user objects. C{littletable} provides a L{DataObject} class for ad hoc creation
38 of semi-immutable objects that can be stored in a C{littletable} L{Table}.
39
40 In addition to basic ORM-style insert/remove/query/delete access to the contents of a
41 Table, C{littletable} offers:
42 - simple indexing for improved retrieval performance, and optional enforcing key uniqueness
43 - access to objects using indexed attributes
44 - simplified joins using '+' operator syntax between annotated Tables
45 - the result of any query or join is a new first-class C{littletable} Table
46
47 C{littletable} Tables do not require an upfront schema definition, but simply work off of the
48 attributes in the stored values, and those referenced in any query parameters.
49
50 Here is a simple C{littletable} data storage/retrieval example::
51
52 from littletable import Table, DataObject
53
54 customers = Table('customers')
55 customers.create_index("id", unique=True)
56 customers.insert(DataObject(id="0010", name="George Jetson"))
57 customers.insert(DataObject(id="0020", name="Wile E. Coyote"))
58 customers.insert(DataObject(id="0030", name="Jonny Quest"))
59
60 catalog = Table('catalog')
61 catalog.create_index("sku", unique=True)
62 catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100))
63 catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3))
64 catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8))
65 catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12))
66 print(catalog.by.sku["ANVIL-001"].descr)
67
68 wishitems = Table('wishitems')
69 wishitems.create_index("custid")
70 wishitems.create_index("sku")
71 wishitems.insert(DataObject(custid="0020", sku="ANVIL-001"))
72 wishitems.insert(DataObject(custid="0020", sku="BRDSD-001"))
73 wishitems.insert(DataObject(custid="0020", sku="MAGNT-001"))
74 wishitems.insert(DataObject(custid="0030", sku="MAGNT-001"))
75 wishitems.insert(DataObject(custid="0030", sku="MAGLS-001"))
76
77 # print a particular customer name
78 # (unique indexes will return a single item; non-unique
79 # indexes will return a list of all matching items)
80 print(customers.by.id["0030"].name)
81
82 # print all items sold by the pound
83 for item in catalog.query(unitofmeas="LB"):
84 print(item.sku, item.descr)
85
86 # print all items that cost more than 10
87 for item in catalog.where(lambda o : o.unitprice>10):
88 print(item.sku, item.descr, item.unitprice)
89
90 # join tables to create queryable wishlists collection
91 wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku")
92
93 # print all wishlist items with price > 10
94 bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10)
95 for item in bigticketitems:
96 print(item)
97
98 # list all wishlist items in descending order by price
99 for item in wishlists().sort("unitprice desc"):
100 print(item)
101 """
102
103 __version__ = "0.9"
104 __versionTime__ = "27 Jun 2016 10:02"
105 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
106
107 import sys
108 PY_3 = sys.version_info[0] == 3
109
110 from collections import defaultdict, deque, namedtuple
111 from itertools import groupby,islice,starmap,repeat
112 if not PY_3:
113 from itertools import ifilter as filter
114
115 from operator import attrgetter
116 import csv
117 import json
118 _consumer = deque(maxlen=0)
119 do_all = _consumer.extend
120
121 try:
122 from itertools import product
123 except ImportError:
125 tupleseqs = [[(x,) for x in s] for s in seqs]
126 def _product(*seqs):
127 if len(seqs) == 1:
128 for x in seqs[0]:
129 yield x
130 else:
131 for x in seqs[0]:
132 for p in _product(*seqs[1:]):
133 yield x+p
134 for p in _product(*tupleseqs):
135 yield p
136
137 try:
138 t = basestring
139 except NameError:
140 basestring = str
141
142 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"]
143
145 if hasattr(obj, "__dict__"):
146
147 return obj.__dict__.keys()
148 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
149
150 return obj._fields
151 elif hasattr(obj, "__slots__"):
152 return obj.__slots__
153 else:
154 raise ValueError("object with unknown attributes")
155
157 if hasattr(obj, "__dict__"):
158
159 return json.dumps(obj.__dict__)
160 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
161
162 return json.dumps(dict(zip(obj._fields, obj)))
163 elif hasattr(obj, "__slots__"):
164 return json.dumps({k:v for k,v in zip(obj.__slots__,
165 (getattr(obj,a) for a in obj.__slots__))})
166 else:
167 raise ValueError("object with unknown attributes")
168
170 """A generic semi-mutable object for storing data values in a table. Attributes
171 can be set by passing in named arguments in the constructor, or by setting them
172 as C{object.attribute = value}. New attributes can be added any time, but updates
173 are ignored. Table joins are returned as a Table of DataObjects."""
175 if kwargs:
176 self.__dict__.update(kwargs)
178 return repr(self.__dict__)
184 if hasattr(self,k):
185 return getattr(self,k)
186 else:
187 raise KeyError("object has no such attribute " + k)
189 return self.__dict__ == other.__dict__
190
193 self.attr = attr
194 self.obs = defaultdict(list)
195 self.is_unique = False
197 self.obs[k].append(v)
199 return self.obs.get(k,[])
203 return iter(self.obs)
205 return sorted(filter(None, self.obs.keys()))
207 return self.obs.items()
209 try:
210 k = getattr(obj, self.attr)
211 self.obs[k].remove(obj)
212 except (ValueError,AttributeError,KeyError):
213 pass
215 return key in self.obs
217 return self.__class__(self.attr)
218
220 - def __init__(self, attr, accept_none=False):
221 self.attr = attr
222 self.obs = {}
223 self.is_unique = True
224 self.accept_none = accept_none
225 self.none_values = set()
227 if k:
228 if k not in self.obs:
229 self.obs[k] = v
230 else:
231 raise KeyError("duplicate key value %s" % k)
232 else:
233 self.none_values.add(v)
235 if k:
236 return [self.obs.get(k)] if k in self.obs else []
237 else:
238 return list(self.none_values)
240 if k:
241 return k in self.obs
242 else:
243 return self.accept_none and self.none_values
245 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
247 return [(k,[v]) for k,v in self.obs.items()]
249 k = getattr(obj, self.attr)
250 if k:
251 if k in self.obs:
252 del self.obs[k]
253 else:
254 self.none_values.discard(obj)
255
260 return getattr(self._index, attr)
262 ret = Table()
263 if k in self._index:
264 ret.insert_many(self._index[k])
265 return ret
267 return k in self._index
268
273 return getattr(self._index, attr)
275 return k in self._index
277 if k:
278 return self._index[k][0]
279 else:
280 ret = Table()
281 if k in self._index:
282 ret.insert_many(self._index[k])
283 return ret
284
288
290 """A quick way to query for matching records using their indexed attributes. The attribute
291 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
292 dict-like access to the underlying records in the table, as in::
293
294 employees.by.socsecnum["000-00-0000"]
295 customers.by.zipcode["12345"]
296
297 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes
298 are not attributes of the table, but of items in the table.)
299
300 The behavior differs slightly for unique and non-unique indexes:
301 - if the index is unique, then retrieving a matching object, will return just the object;
302 if there is no matching object, C{KeyError} is raised (making a table with a unique
303 index behave very much like a Python dict)
304 - if the index is non-unique, then all matching objects will be returned in a new Table,
305 just as if a regular query had been performed; if no objects match the key value, an empty
306 Table is returned and no exception is raised.
307
308 If there is no index defined for the given attribute, then C{AttributeError} is raised.
309 """
310 if attr in self.table._indexes:
311 ret = self.table._indexes[attr]
312 if isinstance(ret, _UniqueObjIndex):
313 ret = _UniqueObjIndexWrapper(ret)
314 if isinstance(ret, _ObjIndex):
315 ret = _ObjIndexWrapper(ret)
316 return ret
317 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
318
319
321 """Table is the main class in C{littletable}, for representing a collection of DataObjects or
322 user-defined objects with publicly accessible attributes or properties. Tables can be:
323 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>}
324 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index}
325 - queried, specifying values to exact match in the desired records, see L{where}
326 - filtered (using L{where}), using a simple predicate function to match desired records;
327 useful for selecting using inequalities or compound conditions
328 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__}
329 - joined, using L{join_on} to identify attribute to be used for joining with another table, and
330 L{join} or operator '+' to perform the actual join
331 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects
332 by attribute values
333 - grouped, using L{groupby} to create a summary table of computed values, grouped by a key
334 attribute
335 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files
336 Queries and joins return their results as new Table objects, so that queries and joins can
337 be easily performed as a succession of operations.
338 """
340 """Create a new, empty Table.
341 @param table_name: name for Table
342 @type table_name: string (optional)
343 """
344 self(table_name)
345 self.obs = []
346 self._indexes = {}
347 self._uniqueIndexes = []
348 self.by = _IndexAccessor(self)
349
351 """Return the number of objects in the Table."""
352 return len(self.obs)
353
355 """Create an iterator over the objects in the Table."""
356 return iter(self.obs)
357
359 """Provides direct indexed/sliced access to the Table's underlying list of objects."""
360 if isinstance(i, slice):
361 ret = self.copy_template()
362 ret.insert_many(self.obs[i])
363 return ret
364 else:
365 return self.obs[i]
366
368 """A quick way to query for matching records using their indexed attributes. The attribute
369 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
370 dict-like access to the underlying records in the table, as in::
371
372 employees.by.socsecnum["000-00-0000"]
373 customers.by.zipcode["12345"]
374
375 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes
376 are not attributes of the table, but of items in the table.)
377
378 The behavior differs slightly for unique and non-unique indexes:
379 - if the index is unique, then retrieving a matching object, will return just the object;
380 if there is no matching object, C{KeyError} is raised (making a table with a unique
381 index behave very much like a Python dict)
382 - if the index is non-unique, then all matching objects will be returned in a new Table,
383 just as if a regular query had been performed; if no objects match the key value, an empty
384 Table is returned and no exception is raised.
385
386 If there is no index defined for the given attribute, then C{AttributeError} is raised.
387 """
388 if attr in self._indexes:
389 ret = self._indexes[attr]
390 if isinstance(ret, _UniqueObjIndex):
391 ret = _UniqueObjIndexWrapper(ret)
392 if isinstance(ret, _ObjIndex):
393 ret = _ObjIndexWrapper(ret)
394 return ret
395 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
396
398 return bool(self.obs)
399
400 __nonzero__ = __bool__
401
403 """Support UNION of 2 tables using "+" operator."""
404 if isinstance(other, JoinTerm):
405
406 return other + self
407 elif isinstance(other, Table):
408
409 return self.union(other)
410 else:
411
412 return self.clone().insert_many(other)
413
415 """Support UNION of 2 tables using "+=" operator."""
416 return self.insert_many(other)
417
420
422 """A simple way to assign a name to a table, such as those
423 dynamically created by joins and queries.
424 @param table_name: name for Table
425 @type table_name: string
426 """
427 self.table_name = table_name
428 return self
429
431 """Create empty copy of the current table, with copies of all
432 index definitions.
433 """
434 ret = Table(self.table_name)
435
436
437 ret._indexes.update(dict((k,v.copy_template()) for k,v in self._indexes.items()))
438 if name is not None:
439 ret(name)
440 return ret
441
442 - def clone(self, name=None):
443 """Create full copy of the current table, including table contents
444 and index definitions.
445 """
446 ret = self.copy_template()
447 ret.insert_many(self.obs)
448 if name is not None:
449 ret(name)
450 return ret
451
452 - def create_index(self, attr, unique=False, accept_none=False):
453 """Create a new index on a given attribute.
454 If C{unique} is True and records are found in the table with duplicate
455 attribute values, the index is deleted and C{KeyError} is raised.
456
457 If the table already has an index on the given attribute, then no
458 action is taken and no exception is raised.
459 @param attr: the attribute to be used for indexed access and joins
460 @type attr: string
461 @param unique: flag indicating whether the indexed field values are
462 expected to be unique across table entries
463 @type unique: boolean
464 @param accept_none: flag indicating whether None is an acceptable
465 unique key value for this attribute
466 @type accept_none: boolean
467 """
468 if attr in self._indexes:
469 return self
470
471 if unique:
472 self._indexes[attr] = _UniqueObjIndex(attr,accept_none)
473 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
474 else:
475 self._indexes[attr] = _ObjIndex(attr)
476 accept_none = True
477 ind = self._indexes[attr]
478 try:
479 for obj in self.obs:
480 if hasattr(obj, attr):
481 obval = getattr(obj, attr) or None
482 else:
483 obval = None
484 if obval or accept_none:
485 ind[obval] = obj
486 else:
487 raise KeyError("None is not an allowed key")
488 return self
489
490 except KeyError:
491 del self._indexes[attr]
492 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
493 raise
494
496 """Deletes an index from the Table. Can be used to drop and rebuild an index,
497 or to convert a non-unique index to a unique index, or vice versa.
498 @param attr: name of an indexed attribute
499 @type attr: string
500 """
501 if attr in self._indexes:
502 del self._indexes[attr]
503 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
504
506 """Insert a new object into this Table.
507 @param obj: any Python object
508 Objects can be constructed using the defined DataObject type, or they can
509 be any Python object that does not use the Python C{__slots__} feature; C{littletable}
510 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and
511 index attributes and values.
512
513 If the table contains a unique index, and the record to be inserted would add
514 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the
515 object is not inserted.
516
517 If the table has no unique indexes, then it is possible to insert duplicate
518 objects into the table.
519 """
520
521
522 uniqueIndexes = self._uniqueIndexes
523 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none)
524 or (
525 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind
526 )
527 for ind in uniqueIndexes):
528
529 for ind in uniqueIndexes:
530 if (getattr(obj, ind.attr, None) is None and not ind.accept_none):
531 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj)
532 if getattr(obj, ind.attr) in ind:
533 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj)
534
535 self.obs.append(obj)
536 for attr, ind in self._indexes.items():
537 obval = getattr(obj, attr)
538 ind[obval] = obj
539 return self
540
542 """Inserts a collection of objects into the table."""
543
544
545 do_all(self.insert(ob) for ob in it)
546 return self
547
549 """Removes an object from the table. If object is not in the table, then
550 no action is taken and no exception is raised."""
551
552
553
554 do_all(ind.remove(ob) for attr,ind in self._indexes.items())
555
556
557 self.obs.remove(ob)
558
560 """Removes a collection of objects from the table."""
561
562
563 do_all(self.remove(ob) for ob in it)
564
566 attr,v = attr_val
567 if attr in self._indexes:
568 idx = self._indexes[attr]
569 if v in idx:
570 return len(idx[v])
571 else:
572 return 0
573 else:
574 return 1e9
575
576 - def where(self, wherefn=None, **kwargs):
577 """
578 Retrieves matching objects from the table, based on given
579 named parameters. If multiple named parameters are given, then
580 only objects that satisfy all of the query criteria will be returned.
581
582 Special named args:
583 - C{_orderby="attr,..."} - (Deprecated) resulting table should sort content objects
584 by the C{attr}s given in a comma-separated string; to sort in
585 descending order, reference the attribute as C{attr desc}.
586
587 - C{_limit} - maximum number of records to return
588
589 @param wherefn: a method or lambda that returns a boolean result, as in::
590
591 lambda ob : ob.unitprice > 10
592
593 @type wherefn: callable(object) returning boolean
594
595 @param kwargs: attributes for selecting records, given as additional
596 named arguments of the form C{attrname="attrvalue"}.
597
598 @return: a new Table containing the matching objects
599 """
600
601 flags = dict((k,v) for k,v in kwargs.items() if k.startswith("_"))
602 for f in flags:
603 del kwargs[f]
604
605 if kwargs:
606
607
608
609
610 kwargs = kwargs.items()
611 if len(kwargs) > 1 and len(self.obs) > 100:
612 kwargs = sorted(kwargs, key=self._query_attr_sort_fn)
613
614 ret = self
615 for k,v in kwargs:
616 newret = ret.copy_template()
617 if k in ret._indexes:
618 newret.insert_many(ret._indexes[k][v])
619 else:
620 newret.insert_many( r for r in ret.obs
621 if hasattr(r,k) and getattr(r,k) == v )
622 ret = newret
623 else:
624 ret = self.clone()
625
626
627
628 if flags:
629 if '_orderby' in flags:
630 ret.sort(flags['_orderby'])
631 if '_limit' in flags:
632 del ret.obs[flags['_limit']:]
633
634 if wherefn is not None:
635 newret = ret.copy_template()
636 newret.insert_many(filter(wherefn, ret.obs))
637 ret = newret
638
639 return ret
640
642 """Deletes matching objects from the table, based on given
643 named parameters. If multiple named parameters are given, then
644 only objects that satisfy all of the query criteria will be removed.
645 @param kwargs: attributes for selecting records, given as additional
646 named arguments of the form C{attrname="attrvalue"}.
647 @return: the number of objects removed from the table
648 """
649 if not kwargs:
650 return 0
651
652 affected = self.where(**kwargs)
653 self.remove_many(affected)
654 return len(affected)
655
656 - def sort(self, key, reverse=False):
657 """Sort Table in place, using given fields as sort key.
658 @param key: if this is a string, it is a comma-separated list of field names,
659 optionally followed by 'desc' to indicate descending sort instead of the
660 default ascending sort; if a list or tuple, it is a list or tuple of field names
661 or field names with ' desc' appended; if it is a function, then it is the
662 function to be used as the sort key function
663 @return: self
664 """
665 if isinstance(key, (basestring,list,tuple)):
666 if isinstance(key, basestring):
667 attrdefs = [s.strip() for s in key.split(',')]
668
669
670 attr_orders = [(a.split()+['asc',])[:2] for a in attrdefs][::-1]
671 else:
672
673 attr_orders = key
674 attrs = [attr for attr,order in attr_orders]
675
676
677 if all(order=='asc' for attr,order in attr_orders):
678 self.obs.sort(key=attrgetter(*attrs), reverse=reverse)
679 elif all(order=='desc' for attr,order in attr_orders):
680 self.obs.sort(key=attrgetter(*attrs), reverse=not reverse)
681 else:
682
683
684
685 do_all(self.obs.sort(key=attrgetter(attr), reverse=(order=="desc"))
686 for attr,order in attr_orders)
687 else:
688 keyfn = key
689 self.obs.sort(key=keyfn, reverse=reverse)
690 return self
691
692 - def select(self, fields, **exprs):
693 """
694 Create a new table containing a subset of attributes, with optionally
695 newly-added fields computed from each rec in the original table.
696
697 Special kwargs:
698 - C{_unique=True} - (Deprecated) only return a set of unique rows
699
700 @param fields: list of strings, or single space-delimited string, listing attribute name to be included in the output
701 @type fields: list, or space-delimited string
702 @param exprs: one or more named callable arguments, to compute additional fields using the given function
703 @type exprs: C{name=callable}, callable takes the record as an argument, and returns the new attribute value
704 If a string is passed as a callable, this string will be used using string formatting, given the record
705 as a source of interpolation values. For instance, C{fullName = '%(lastName)s, %(firstName)s'}
706
707 """
708 if isinstance(fields, basestring):
709 fields = fields.split()
710
711 unique = exprs.pop('_unique', False)
712
713 def _makeStringCallable(expr):
714 if isinstance(expr,basestring):
715 return lambda rec: expr % rec
716 else:
717 return expr
718
719 exprs = dict((k, _makeStringCallable(v)) for k,v in exprs)
720
721 raw_tuples = []
722 for rec in self.obs:
723 attrvalues = tuple(getattr(rec, fieldname, None) for fieldname in fields)
724 if exprs:
725 attrvalues += tuple(expr(rec) for expr in exprs.values())
726 raw_tuples.append(attrvalues)
727
728 if unique:
729 raw_tuples = list(set(raw_tuples))
730
731 allNames = tuple(fields) + tuple(exprs.keys())
732 return Table().insert_many(DataObject(**dict(zip(allNames, outtuple))) for outtuple in raw_tuples)
733
760
761 - def join(self, other, attrlist=None, auto_create_indexes=True, **kwargs):
762 """
763 Join the objects of one table with the objects of another, based on the given
764 matching attributes in the named arguments. The attrlist specifies the attributes to
765 be copied from the source tables - if omitted, all attributes will be copied. Entries
766 in the attrlist may be single attribute names, or if there are duplicate names in both
767 tables, then a C{(table,attributename)} tuple can be given to disambiguate which
768 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to
769 rename an attribute from a source table.
770
771 This method may be called directly, or can be constructed using the L{join_on} method and
772 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")}
773 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm
774 or tables can be added to construct a compound join expression. When complete, the
775 join expression gets executed by calling the resulting join definition,
776 using C{join_expression([attrlist])}.
777
778 @param other: other table to join to
779 @param attrlist: list of attributes to be copied to the new joined table; if
780 none provided, all attributes of both tables will be used (taken from the first
781 object in each table)
782 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples
783 (list may contain both strings and tuples)
784 @param kwargs: attributes to join on, given as additional named arguments
785 of the form C{table1attr="table2attr"}, or a dict mapping attribute names.
786 @returns: a new Table containing the joined data as new DataObjects
787 """
788 if not kwargs:
789 raise TypeError("must specify at least one join attribute as a named argument")
790 thiscol,othercol = next(iter(kwargs.items()))
791
792 retname = ("(%s:%s^%s:%s)" %
793 (self.table_name, thiscol, other.table_name, othercol))
794
795 if not (self.obs and other.obs):
796 return Table(retname)
797
798 if isinstance(attrlist, basestring):
799 attrlist = re.split(r'[,\s]+', attrlist)
800
801
802 thisnames = set(_object_attrnames(self.obs[0]))
803 othernames = set(_object_attrnames(other.obs[0]))
804 fullcols = []
805 if attrlist is not None:
806 for col in attrlist:
807 if isinstance(col, tuple):
808
809
810 fullcols.append((col + (col[1],))[:3])
811 else:
812 if col in thisnames:
813 fullcols.append( (self, col, col) )
814 elif col in othernames:
815 fullcols.append( (other, col, col) )
816 else:
817 raise ValueError("join attribute not found: " + col)
818 else:
819 fullcols = [(self,n,n) for n in thisnames]
820 fullcols += [(other,n,n) for n in othernames]
821
822 thiscols = list(filter(lambda o:o[0] is self, fullcols))
823 othercols = list(filter(lambda o:o[0] is other, fullcols))
824
825 if auto_create_indexes:
826 if thiscol not in self._indexes:
827 self.create_index(thiscol)
828 if othercol not in other._indexes:
829 other.create_index(othercol)
830
831 thiscolindex = othercolindex = None
832 if thiscol in self._indexes:
833 thiscolindex = self._indexes[thiscol]
834 else:
835 raise ValueError("indexed attribute required for join: "+thiscol)
836 if othercol in other._indexes:
837 othercolindex = other._indexes[othercol]
838 else:
839 raise ValueError("indexed attribute required for join: "+othercol)
840
841
842 if len(thiscolindex) < len(othercolindex):
843 shortindex, longindex = (thiscolindex, othercolindex)
844 swap = False
845 else:
846 shortindex, longindex = (othercolindex, thiscolindex)
847 swap = True
848
849
850
851
852
853
854
855
856
857 matchingrows = list((longindex[key],rows) if swap else (rows, longindex[key])
858 for key,rows in shortindex.items())
859
860 joinrows = []
861 for thisrows,otherrows in matchingrows:
862 for trow,orow in product(thisrows,otherrows):
863 retobj = DataObject()
864
865
866 do_all(setattr(retobj, a, getattr(trow,c)) for _,c,a in thiscols)
867
868
869 do_all(setattr(retobj, a, getattr(orow,c)) for _,c,a in othercols)
870 joinrows.append(retobj)
871
872 ret = Table(retname)
873 for tbl,collist in zip([self,other],[thiscols,othercols]):
874 for _,c,a in collist:
875 if c in tbl._indexes:
876 ret.create_index(a)
877 ret.insert_many(joinrows)
878 return ret
879
881 """Creates a JoinTerm in preparation for joining with another table, to
882 indicate what attribute should be used in the join. Only indexed attributes
883 may be used in a join.
884 @param attr: attribute name to join from this table (may be different
885 from the attribute name in the table being joined to)
886 @type attr: string
887 @returns: L{JoinTerm}"""
888 if attr not in self._indexes:
889 raise ValueError("can only join on indexed attributes")
890 return JoinTerm(self, attr)
891
892 - def pivot(self, attrlist):
893 """Pivots the data using the given attributes, returning a L{PivotTable}.
894 @param attrlist: list of attributes to be used to construct the pivot table
895 @type attrlist: list of strings, or string of space-delimited attribute names
896 """
897 if isinstance(attrlist, basestring):
898 attrlist = attrlist.split()
899 if all(a in self._indexes for a in attrlist):
900 return PivotTable(self,[],attrlist)
901 else:
902 raise ValueError("pivot can only be called using indexed attributes")
903
904 - def _import(self, source, encoding, transforms=None, reader=csv.DictReader):
905 close_on_exit = False
906 if isinstance(source, basestring):
907 if PY_3:
908 source = open(source, encoding=encoding)
909 else:
910 source = open(source)
911 close_on_exit = True
912 try:
913 csvdata = reader(source)
914 self.insert_many(DataObject(**s) for s in csvdata)
915 if transforms:
916 for attr,fn in transforms.items():
917 default = None
918 if isinstance(fn,tuple):
919 fn,default = fn
920 objfn = lambda obj : fn(getattr(obj,attr))
921 self.add_field(attr, objfn, default)
922 finally:
923 if close_on_exit:
924 source.close()
925
926 - def csv_import(self, csv_source, encoding='UTF-8', transforms=None):
927 """Imports the contents of a CSV-formatted file into this table.
928 @param csv_source: CSV file - if a string is given, the file with that name will be
929 opened, read, and closed; if a file object is given, then that object
930 will be read as-is, and left for the caller to be closed.
931 @type csv_source: string or file
932 @param transforms: dict of functions by attribute name; if given, each
933 attribute will be transformed using the corresponding transform; if there is no
934 matching transform, the attribute will be read as a string (default); the
935 transform function can also be defined as a (function, default-value) tuple; if
936 there is an Exception raised by the transform function, then the attribute will
937 be set to the given default value
938 @type transforms: dict (optional)
939 """
940 return self._import(csv_source, encoding, transforms)
941
942 - def _xsv_import(self, xsv_source, transforms=None, splitstr="\t"):
943 xsv_reader = lambda src: csv.DictReader(src, delimiter=splitstr)
944 return self._import(xsv_source, transforms, reader=xsv_reader)
945
946 - def tsv_import(self, xsv_source, transforms=None):
947 """Imports the contents of a tab-separated data file into this table.
948 @param xsv_source: tab-separated data file - if a string is given, the file with that name will be
949 opened, read, and closed; if a file object is given, then that object
950 will be read as-is, and left for the caller to be closed.
951 @type xsv_source: string or file
952 @param transforms: dict of functions by attribute name; if given, each
953 attribute will be transformed using the corresponding transform; if there is no
954 matching transform, the attribute will be read as a string (default); the
955 transform function can also be defined as a (function, default-value) tuple; if
956 there is an Exception raised by the transform function, then the attribute will
957 be set to the given default value
958 @type transforms: dict (optional)
959 """
960 return self._xsv_import(xsv_source, transforms=transforms, splitstr="\t")
961
963 """Exports the contents of the table to a CSV-formatted file.
964 @param csv_dest: CSV file - if a string is given, the file with that name will be
965 opened, written, and closed; if a file object is given, then that object
966 will be written as-is, and left for the caller to be closed.
967 @type csv_dest: string or file
968 @param fieldnames: attribute names to be exported; can be given as a single
969 string with space-delimited names, or as a list of attribute names
970 """
971 close_on_exit = False
972 if isinstance(csv_dest, basestring):
973 csv_dest = open(csv_dest,'wb')
974 close_on_exit = True
975 try:
976 if fieldnames is None:
977 fieldnames = list(_object_attrnames(self.obs[0]))
978 if isinstance(fieldnames, basestring):
979 fieldnames = fieldnames.split()
980
981 csv_dest.write(','.join(fieldnames) + '\n')
982 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore')
983 if hasattr(self.obs[0], "__dict__"):
984
985
986 do_all(csvout.writerow(o.__dict__) for o in self.obs)
987 else:
988
989
990
991
992 do_all(csvout.writerow(dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)),
993 zip(repeat(o), fieldnames)))) for o in self.obs)
994 finally:
995 if close_on_exit:
996 csv_dest.close()
997
999 """Imports the contents of a JSON data file into this table.
1000 @param source: JSON data file - if a string is given, the file with that name will be
1001 opened, read, and closed; if a file object is given, then that object
1002 will be read as-is, and left for the caller to be closed.
1003 @type source: string or file
1004 @param transforms: dict of functions by attribute name; if given, each
1005 attribute will be transformed using the corresponding transform; if there is no
1006 matching transform, the attribute will be read as a string (default); the
1007 transform function can also be defined as a (function, default-value) tuple; if
1008 there is an Exception raised by the transform function, then the attribute will
1009 be set to the given default value
1010 @type transforms: dict (optional)
1011 """
1012 class _JsonFileReader(object):
1013 def __init__(self, src):
1014 self.source = src
1015 def __iter__(self):
1016 current = ''
1017 for line in self.source:
1018 if current:
1019 current += ' '
1020 current += line
1021 try:
1022 yield json.loads(current)
1023 current = ''
1024 except Exception:
1025 pass
1026 return self._import(source, transforms=transforms, reader=_JsonFileReader)
1027
1029 """Exports the contents of the table to a JSON-formatted file.
1030 @param dest: output file - if a string is given, the file with that name will be
1031 opened, written, and closed; if a file object is given, then that object
1032 will be written as-is, and left for the caller to be closed.
1033 @type dest: string or file
1034 @param fieldnames: attribute names to be exported; can be given as a single
1035 string with space-delimited names, or as a list of attribute names
1036 """
1037 close_on_exit = False
1038 if isinstance(dest, basestring):
1039 dest = open(dest,'wb')
1040 close_on_exit = True
1041 try:
1042 if isinstance(fieldnames, basestring):
1043 fieldnames = fieldnames.split()
1044
1045 if fieldnames is None:
1046 do_all(
1047 dest.write(_to_json(o)+'\n') for o in self.obs
1048 )
1049 else:
1050 do_all(
1051 dest.write(json.dumps({f:getattr(o, f) for f in fieldnames})+'\n')
1052 for o in self.obs
1053 )
1054 finally:
1055 if close_on_exit:
1056 dest.close()
1057
1058 - def add_field(self, attrname, fn, default=None):
1059 """Computes a new attribute for each object in table, or replaces an
1060 existing attribute in each record with a computed value
1061 @param attrname: attribute to compute for each object
1062 @type attrname: string
1063 @param fn: function used to compute new attribute value, based on
1064 other values in the object, as in::
1065
1066 lambda ob : ob.commission_pct/100.0 * ob.gross_sales
1067
1068 @type fn: function(obj) returns value
1069 @param default: value to use if an exception is raised while trying
1070 to evaluate fn
1071 """
1072
1073 def _addFieldToRec(rec, fn=fn, default=default):
1074 try:
1075 val = fn(rec)
1076 except Exception:
1077 val = default
1078 if isinstance(rec, DataObject):
1079 object.__setattr__(rec, attrname, val)
1080 else:
1081 setattr(rec, attrname, val)
1082 do_all(_addFieldToRec(r) for r in self)
1083 return self
1084
1085 - def addfield(self, attrname, fn, default=None):
1086
1087 return self.add_field(attrname, fn, default)
1088
1089 - def groupby(self, keyexpr, **outexprs):
1090 """simple prototype of group by, with support for expressions in the group-by clause
1091 and outputs
1092 @param keyexpr: grouping field and optional expression for computing the key value;
1093 if a string is passed
1094 @type keyexpr: string or tuple
1095 @param outexprs: named arguments describing one or more summary values to
1096 compute per key
1097 @type outexprs: callable, taking a sequence of objects as input and returning
1098 a single summary value
1099 """
1100 if isinstance(keyexpr, basestring):
1101 keyattrs = keyexpr.split()
1102 keyfn = lambda o : tuple(getattr(o, k) for k in keyattrs)
1103
1104 elif isinstance(keyexpr, tuple):
1105 keyattrs = (keyexpr[0],)
1106 keyfn = keyexpr[1]
1107
1108 groupedobs = defaultdict(list)
1109
1110
1111 do_all(groupedobs[keyfn(ob)].append(ob) for ob in self.obs)
1112
1113 tbl = Table()
1114
1115
1116 do_all(tbl.create_index(k, unique=(len(keyattrs)==1)) for k in keyattrs)
1117 for key, recs in sorted(groupedobs.iteritems()):
1118 groupobj = DataObject(**dict(zip(keyattrs, key)))
1119
1120
1121 do_all(setattr(groupobj, subkey, expr(recs))
1122 for subkey, expr in outexprs.items())
1123 tbl.insert(groupobj)
1124 return tbl
1125
1128
1130 ret = self.copy_template()
1131 seen = set()
1132 for rec in self:
1133 reckey = tuple(rec.__dict__.values())
1134 if reckey not in seen:
1135 seen.add(reckey)
1136 ret.insert(rec)
1137 return ret
1138
1140 """Enhanced Table containing pivot results from calling table.pivot().
1141 """
1142 - def __init__(self, parent, attr_val_path, attrlist):
1143 """PivotTable initializer - do not create these directly, use
1144 L{Table.pivot}.
1145 """
1146 super(PivotTable,self).__init__()
1147 self._attr_path = attr_val_path[:]
1148 self._pivot_attrs = attrlist[:]
1149 self._subtable_dict = {}
1150
1151
1152
1153 self._indexes.update(dict((k,v.copy_template()) for k,v in parent._indexes.items()))
1154 if not attr_val_path:
1155 self.insert_many(parent.obs)
1156 else:
1157 attr,val = attr_val_path[-1]
1158 self.insert_many(parent.where(**{attr:val}))
1159 parent._subtable_dict[val] = self
1160
1161 if len(attrlist) > 0:
1162 this_attr = attrlist[0]
1163 sub_attrlist = attrlist[1:]
1164 ind = parent._indexes[this_attr]
1165 self.subtables = [ PivotTable(self,
1166 attr_val_path + [(this_attr,k)],
1167 sub_attrlist) for k in sorted(ind.keys()) ]
1168 else:
1169 self.subtables = []
1170
1172 if self._subtable_dict:
1173 return self._subtable_dict[val]
1174 else:
1175 return super(PivotTable,self).__getitem__(val)
1176
1178 return sorted(self._subtable_dict.keys())
1179
1181 return sorted(self._subtable_dict.items())
1182
1184 return [self._subtable_dict.items[k] for k in self.keys()]
1185
1187 """Return the set of attribute-value pairs that define the contents of this
1188 table within the original source table.
1189 """
1190 return self._attr_path
1191
1193 """Return the pivot_key as a displayable string.
1194 """
1195 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
1196
1198 """Return whether this table has further subtables.
1199 """
1200 return bool(self.subtables)
1201
1202 - def dump(self, out=sys.stdout, row_fn=repr, limit=-1, indent=0):
1203 """Dump out the contents of this table in a nested listing.
1204 @param out: output stream to write to
1205 @param row_fn: function to call to display individual rows
1206 @param limit: number of records to show at deepest level of pivot (-1=show all)
1207 @param indent: current nesting level
1208 """
1209 NL = '\n'
1210 if indent:
1211 out.write(" "*indent + self.pivot_key_str())
1212 else:
1213 out.write("Pivot: %s" % ','.join(self._pivot_attrs))
1214 out.write(NL)
1215 if self.has_subtables():
1216
1217
1218
1219 do_all(sub.dump(out, row_fn, limit, indent+1) for sub in self.subtables if sub)
1220 else:
1221 if limit >= 0:
1222 showslice = slice(0,limit)
1223 else:
1224 showslice = slice(None,None)
1225
1226
1227 do_all(out.write(" "*(indent+1) + row_fn(r) + NL)
1228 for r in self.obs[showslice])
1229 out.flush()
1230
1231 - def dump_counts(self, out=sys.stdout, count_fn=len, colwidth=10):
1232 """Dump out the summary counts of entries in this pivot table as a tabular listing.
1233 @param out: output stream to write to
1234 """
1235 if len(self._pivot_attrs) == 1:
1236 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
1237 maxkeylen = max(len(str(k)) for k in self.keys())
1238 maxvallen = colwidth
1239 keytally = {}
1240 for k, sub in self.items():
1241 sub_v = count_fn(sub)
1242 maxvallen = max(maxvallen, len(str(sub_v)))
1243 keytally[k] = sub_v
1244 for k,sub in self.items():
1245 out.write("%-*.*s " % (maxkeylen,maxkeylen,k))
1246 out.write("%*s\n" % (maxvallen,keytally[k]))
1247 elif len(self._pivot_attrs) == 2:
1248 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
1249 maxkeylen = max(max(len(str(k)) for k in self.keys()),5)
1250 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),colwidth)
1251 keytally = dict((k,0) for k in self.subtables[0].keys())
1252 out.write("%*s " % (maxkeylen,''))
1253 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys()))
1254 out.write(' %*s\n' % (maxvallen, 'Total'))
1255 for k,sub in self.items():
1256 out.write("%-*.*s " % (maxkeylen,maxkeylen,k))
1257 for kk,ssub in sub.items():
1258 ssub_v = count_fn(ssub)
1259 out.write("%*d " % (maxvallen,ssub_v))
1260 keytally[kk] += ssub_v
1261 maxvallen = max(maxvallen, len(str(ssub_v)))
1262 sub_v = count_fn(sub)
1263 maxvallen = max(maxvallen, len(str(sub_v)))
1264 out.write("%*d\n" % (maxvallen,sub_v))
1265 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total"))
1266 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items())))
1267 out.write(" %*d\n" % (maxvallen,sum(tally for k,tally in keytally.items())))
1268 else:
1269 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1270
1272 """Dump out the summary counts of this pivot table as a Table.
1273 """
1274 if summarycolname is None:
1275 summarycolname = col
1276 ret = Table()
1277 topattr = self._pivot_attrs[0]
1278
1279
1280 do_all(ret.create_index(attr) for attr in self._pivot_attrs)
1281 if len(self._pivot_attrs) == 1:
1282 for sub in self.subtables:
1283 subattr,subval = sub._attr_path[-1]
1284 attrdict = {subattr:subval}
1285 if fn is None:
1286 attrdict['Count'] = len(sub)
1287 else:
1288 attrdict[summarycolname] = fn(s[col] for s in sub)
1289 ret.insert(DataObject(**attrdict))
1290 elif len(self._pivot_attrs) == 2:
1291 for sub in self.subtables:
1292 for ssub in sub.subtables:
1293 attrdict = dict(ssub._attr_path)
1294 if fn is None:
1295 attrdict['Count'] = len(ssub)
1296 else:
1297 attrdict[summarycolname] = fn(s[col] for s in ssub)
1298 ret.insert(DataObject(**attrdict))
1299 elif len(self._pivot_attrs) == 3:
1300 for sub in self.subtables:
1301 for ssub in sub.subtables:
1302 for sssub in ssub.subtables:
1303 attrdict = dict(sssub._attr_path)
1304 if fn is None:
1305 attrdict['Count'] = len(sssub)
1306 else:
1307 attrdict[summarycolname] = fn(s[col] for s in sssub)
1308 ret.insert(DataObject(**attrdict))
1309 else:
1310 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1311 return ret
1312
1314 """Temporary object created while composing a join across tables using
1315 L{Table.join_on} and '+' addition. JoinTerm's are usually created by
1316 calling join_on on a Table object, as in::
1317
1318 customers.join_on("id") + orders.join_on("custid")
1319
1320 This join expression would set up the join relationship
1321 equivalent to::
1322
1323 customers.join(orders, id="custid")
1324
1325 If tables are being joined on attributes that have the same name in
1326 both tables, then a join expression could be created by adding a
1327 JoinTerm of one table directly to the other table::
1328
1329 customers.join_on("custid") + orders
1330
1331 Once the join expression is composed, the actual join is performed
1332 using function call notation::
1333
1334 customerorders = customers.join_on("custid") + orders
1335 for custord in customerorders():
1336 print custord
1337
1338 When calling the join expression, you can optionally specify a
1339 list of attributes as defined in L{Table.join}.
1340 """
1341 - def __init__(self, sourceTable, joinfield):
1342 self.sourcetable = sourceTable
1343 self.joinfield = joinfield
1344 self.jointo = None
1345
1347 if isinstance(other, Table):
1348 other = other.join_on(self.joinfield)
1349 if isinstance(other, JoinTerm):
1350 if self.jointo is None:
1351 if other.jointo is None:
1352 self.jointo = other
1353 else:
1354 self.jointo = other()
1355 return self
1356 else:
1357 if other.jointo is None:
1358 return self() + other
1359 else:
1360 return self() + other()
1361 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1362
1364 if isinstance(other, Table):
1365 return other.join_on(self.joinfield) + self
1366 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1367
1369 if self.jointo:
1370 other = self.jointo
1371 if isinstance(other, Table):
1372 other = other.join_on(self.joinfield)
1373 ret = self.sourcetable.join(other.sourcetable, attrs,
1374 **{self.joinfield : other.joinfield})
1375 return ret
1376 else:
1377 return self.sourcetable.query()
1378
1381
1382
1383 if __name__ == "__main__":
1384
1385
1386 from functools import partial
1387 try:
1388 import simplejson as json
1389 json_dumps = partial(json.dumps, indent=' ')
1390 except ImportError:
1391 import json
1392 json_dumps = partial(json.dumps, indent=2)
1393
1394
1395 rawdata = """\
1396 Phoenix:AZ:85001:KPHX
1397 Phoenix:AZ:85001:KPHY
1398 Phoenix:AZ:85001:KPHA
1399 Dallas:TX:75201:KDFW""".splitlines()
1400
1401
1402 stations = Table()
1403
1404 stations.create_index("stn", unique=True)
1405
1406 fields = "city state zip stn".split()
1407 for d in rawdata:
1408 ob = DataObject()
1409 for k,v in zip(fields, d.split(':')):
1410 setattr(ob,k,v.strip())
1411 stations.insert(ob)
1412
1413
1414 for queryargs in [
1415 dict(city="Phoenix"),
1416 dict(city="Phoenix", stn="KPHX"),
1417 dict(stn="KPHA", city="Phoenix"),
1418 dict(state="TX"),
1419 dict(city="New York"),
1420 dict(city="Phoenix", _orderby="stn"),
1421 dict(city="Phoenix", _orderby="stn desc"),
1422 ]:
1423 print(queryargs)
1424 result = stations.where(**queryargs)
1425 print(len(result))
1426 for r in result: print (r)
1427 print('')
1428
1429
1430 print(list(stations.where()))
1431 print('')
1432
1433 amfm = Table()
1434 amfm.create_index("stn", unique=True)
1435 amfm.insert(DataObject(stn="KPHY", band="AM"))
1436 amfm.insert(DataObject(stn="KPHX", band="FM"))
1437 amfm.insert(DataObject(stn="KPHA", band="FM"))
1438 amfm.insert(DataObject(stn="KDFW", band="FM"))
1439 print(amfm.by.stn["KPHY"])
1440 print(amfm.by.stn["KPHY"].band)
1441
1442 try:
1443 amfm.insert(DataObject(stn="KPHA", band="AM"))
1444 except KeyError:
1445 print("duplicate key not allowed")
1446
1447 print('')
1448 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1449 )(["stn", "city", (amfm,"band","AMFM"),
1450 (stations,"state","st")]).sort("AMFM"):
1451 print(repr(rec))
1452
1453 print('')
1454 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1455 )(["stn", "city", (amfm,"band"), (stations,"state","st")]):
1456 print(json_dumps(rec.__dict__))
1457
1458 print('')
1459 for rec in (stations.join_on("stn") + amfm.join_on("stn"))():
1460 print(json_dumps(rec.__dict__))
1461
1462 print('')
1463 stations.create_index("state")
1464 for az_stn in stations.by.state['AZ']:
1465 print(az_stn)
1466
1467 print('')
1468 pivot = stations.pivot("state")
1469 pivot.dump_counts()
1470
1471 print('')
1472 amfm.create_index("band")
1473 pivot = (stations.join_on("stn") + amfm)().pivot("state band")
1474 pivot.dump_counts()
1475