Module littletable
[frames] | no frames]

Source Code for Module littletable

   1  # 
   2  # 
   3  # littletable.py 
   4  #  
   5  # littletable is a simple in-memory database for ad-hoc or user-defined objects, 
   6  # supporting simple query and join operations - useful for ORM-like access 
   7  # to a collection of data objects, without dealing with SQL 
   8  # 
   9  # 
  10  # Copyright (c) 2010-2016  Paul T. McGuire 
  11  # 
  12  # Permission is hereby granted, free of charge, to any person obtaining 
  13  # a copy of this software and associated documentation files (the 
  14  # "Software"), to deal in the Software without restriction, including 
  15  # without limitation the rights to use, copy, modify, merge, publish, 
  16  # distribute, sublicense, and/or sell copies of the Software, and to 
  17  # permit persons to whom the Software is furnished to do so, subject to 
  18  # the following conditions: 
  19  # 
  20  # The above copyright notice and this permission notice shall be 
  21  # included in all copies or substantial portions of the Software. 
  22  # 
  23  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  24  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  25  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  26  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  27  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  28  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  29  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  30  # 
  31   
  32  __doc__ = """\ 
  33   
  34  C{littletable} - a Python module to give ORM-like access to a collection of objects 
  35   
  36  The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a  
  37  collection of user objects.  C{littletable} provides a L{DataObject} class for ad hoc creation 
  38  of semi-immutable objects that can be stored in a C{littletable} L{Table}. 
  39   
  40  In addition to basic ORM-style insert/remove/query/delete access to the contents of a  
  41  Table, C{littletable} offers: 
  42   - simple indexing for improved retrieval performance, and optional enforcing key uniqueness 
  43   - access to objects using indexed attributes 
  44   - simplified joins using '+' operator syntax between annotated Tables 
  45   - the result of any query or join is a new first-class C{littletable} Table 
  46   
  47  C{littletable} Tables do not require an upfront schema definition, but simply work off of the 
  48  attributes in the stored values, and those referenced in any query parameters. 
  49   
  50  Here is a simple C{littletable} data storage/retrieval example:: 
  51   
  52      from littletable import Table, DataObject 
  53   
  54      customers = Table('customers') 
  55      customers.create_index("id", unique=True) 
  56      customers.insert(DataObject(id="0010", name="George Jetson")) 
  57      customers.insert(DataObject(id="0020", name="Wile E. Coyote")) 
  58      customers.insert(DataObject(id="0030", name="Jonny Quest")) 
  59   
  60      catalog = Table('catalog') 
  61      catalog.create_index("sku", unique=True) 
  62      catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100)) 
  63      catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3)) 
  64      catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8)) 
  65      catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12)) 
  66      print(catalog.by.sku["ANVIL-001"].descr) 
  67   
  68      wishitems = Table('wishitems') 
  69      wishitems.create_index("custid") 
  70      wishitems.create_index("sku") 
  71      wishitems.insert(DataObject(custid="0020", sku="ANVIL-001")) 
  72      wishitems.insert(DataObject(custid="0020", sku="BRDSD-001")) 
  73      wishitems.insert(DataObject(custid="0020", sku="MAGNT-001")) 
  74      wishitems.insert(DataObject(custid="0030", sku="MAGNT-001")) 
  75      wishitems.insert(DataObject(custid="0030", sku="MAGLS-001")) 
  76   
  77      # print a particular customer name  
  78      # (unique indexes will return a single item; non-unique 
  79      # indexes will return a list of all matching items) 
  80      print(customers.by.id["0030"].name) 
  81   
  82      # print all items sold by the pound 
  83      for item in catalog.query(unitofmeas="LB"): 
  84          print(item.sku, item.descr) 
  85   
  86      # print all items that cost more than 10 
  87      for item in catalog.where(lambda o : o.unitprice>10): 
  88          print(item.sku, item.descr, item.unitprice) 
  89   
  90      # join tables to create queryable wishlists collection 
  91      wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku") 
  92   
  93      # print all wishlist items with price > 10 
  94      bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10) 
  95      for item in bigticketitems: 
  96          print(item) 
  97   
  98      # list all wishlist items in descending order by price 
  99      for item in wishlists().sort("unitprice desc"): 
 100          print(item) 
 101  """ 
 102   
 103  __version__ = "0.9" 
 104  __versionTime__ = "27 Jun 2016 10:02" 
 105  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
 106   
 107  import sys 
 108  PY_3 = sys.version_info[0] == 3 
 109   
 110  from collections import defaultdict, deque, namedtuple 
 111  from itertools import groupby,islice,starmap,repeat 
 112  if not PY_3: 
 113      from itertools import ifilter as filter 
 114       
 115  from operator import attrgetter 
 116  import csv 
 117  import json 
 118  _consumer = deque(maxlen=0) 
 119  do_all = _consumer.extend 
 120   
 121  try: 
 122      from itertools import product 
 123  except ImportError: 
124 - def product(*seqs):
125 tupleseqs = [[(x,) for x in s] for s in seqs] 126 def _product(*seqs): 127 if len(seqs) == 1: 128 for x in seqs[0]: 129 yield x 130 else: 131 for x in seqs[0]: 132 for p in _product(*seqs[1:]): 133 yield x+p
134 for p in _product(*tupleseqs): 135 yield p 136 137 try: 138 t = basestring 139 except NameError: 140 basestring = str 141 142 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"] 143
144 -def _object_attrnames(obj):
145 if hasattr(obj, "__dict__"): 146 # normal object 147 return obj.__dict__.keys() 148 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 149 # namedtuple 150 return obj._fields 151 elif hasattr(obj, "__slots__"): 152 return obj.__slots__ 153 else: 154 raise ValueError("object with unknown attributes")
155
156 -def _to_json(obj):
157 if hasattr(obj, "__dict__"): 158 # normal object 159 return json.dumps(obj.__dict__) 160 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 161 # namedtuple 162 return json.dumps(dict(zip(obj._fields, obj))) 163 elif hasattr(obj, "__slots__"): 164 return json.dumps({k:v for k,v in zip(obj.__slots__, 165 (getattr(obj,a) for a in obj.__slots__))}) 166 else: 167 raise ValueError("object with unknown attributes")
168
169 -class DataObject(object):
170 """A generic semi-mutable object for storing data values in a table. Attributes 171 can be set by passing in named arguments in the constructor, or by setting them 172 as C{object.attribute = value}. New attributes can be added any time, but updates 173 are ignored. Table joins are returned as a Table of DataObjects."""
174 - def __init__(self, **kwargs):
175 if kwargs: 176 self.__dict__.update(kwargs)
177 - def __repr__(self):
178 return repr(self.__dict__)
179 - def __setattr__(self, attr, val):
180 # make all attributes write-once 181 if attr not in self.__dict__: 182 super(DataObject,self).__setattr__(attr,val)
183 - def __getitem__(self, k):
184 if hasattr(self,k): 185 return getattr(self,k) 186 else: 187 raise KeyError("object has no such attribute " + k)
188 - def __eq__(self, other):
189 return self.__dict__ == other.__dict__
190
191 -class _ObjIndex(object):
192 - def __init__(self, attr):
193 self.attr = attr 194 self.obs = defaultdict(list) 195 self.is_unique = False
196 - def __setitem__(self, k, v):
197 self.obs[k].append(v)
198 - def __getitem__(self, k):
199 return self.obs.get(k,[])
200 - def __len__(self):
201 return len(self.obs)
202 - def __iter__(self):
203 return iter(self.obs)
204 - def keys(self):
205 return sorted(filter(None, self.obs.keys()))
206 - def items(self):
207 return self.obs.items()
208 - def remove(self, obj):
209 try: 210 k = getattr(obj, self.attr) 211 self.obs[k].remove(obj) 212 except (ValueError,AttributeError,KeyError): 213 pass
214 - def __contains__(self, key):
215 return key in self.obs
216 - def copy_template(self):
217 return self.__class__(self.attr)
218
219 -class _UniqueObjIndex(_ObjIndex):
220 - def __init__(self, attr, accept_none=False):
221 self.attr = attr 222 self.obs = {} 223 self.is_unique = True 224 self.accept_none = accept_none 225 self.none_values = set()
226 - def __setitem__(self, k, v):
227 if k: 228 if k not in self.obs: 229 self.obs[k] = v 230 else: 231 raise KeyError("duplicate key value %s" % k) 232 else: 233 self.none_values.add(v)
234 - def __getitem__(self, k):
235 if k: 236 return [self.obs.get(k)] if k in self.obs else [] 237 else: 238 return list(self.none_values)
239 - def __contains__(self, k):
240 if k: 241 return k in self.obs 242 else: 243 return self.accept_none and self.none_values
244 - def keys(self):
245 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
246 - def items(self):
247 return [(k,[v]) for k,v in self.obs.items()]
248 - def remove(self, obj):
249 k = getattr(obj, self.attr) 250 if k: 251 if k in self.obs: 252 del self.obs[k] 253 else: 254 self.none_values.discard(obj)
255
256 -class _ObjIndexWrapper(object):
257 - def __init__(self, ind):
258 self._index = ind
259 - def __getattr__(self, attr):
260 return getattr(self._index, attr)
261 - def __getitem__(self, k):
262 ret = Table() 263 if k in self._index: 264 ret.insert_many(self._index[k]) 265 return ret
266 - def __contains__(self, k):
267 return k in self._index
268
269 -class _UniqueObjIndexWrapper(object):
270 - def __init__(self, ind):
271 self._index = ind
272 - def __getattr__(self, attr):
273 return getattr(self._index, attr)
274 - def __contains__(self, k):
275 return k in self._index
276 - def __getitem__(self, k):
277 if k: 278 return self._index[k][0] 279 else: 280 ret = Table() 281 if k in self._index: 282 ret.insert_many(self._index[k]) 283 return ret
284
285 -class _IndexAccessor(object):
286 - def __init__(self, table):
287 self.table = table
288
289 - def __getattr__(self, attr):
290 """A quick way to query for matching records using their indexed attributes. The attribute 291 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 292 dict-like access to the underlying records in the table, as in:: 293 294 employees.by.socsecnum["000-00-0000"] 295 customers.by.zipcode["12345"] 296 297 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes 298 are not attributes of the table, but of items in the table.) 299 300 The behavior differs slightly for unique and non-unique indexes: 301 - if the index is unique, then retrieving a matching object, will return just the object; 302 if there is no matching object, C{KeyError} is raised (making a table with a unique 303 index behave very much like a Python dict) 304 - if the index is non-unique, then all matching objects will be returned in a new Table, 305 just as if a regular query had been performed; if no objects match the key value, an empty 306 Table is returned and no exception is raised. 307 308 If there is no index defined for the given attribute, then C{AttributeError} is raised. 309 """ 310 if attr in self.table._indexes: 311 ret = self.table._indexes[attr] 312 if isinstance(ret, _UniqueObjIndex): 313 ret = _UniqueObjIndexWrapper(ret) 314 if isinstance(ret, _ObjIndex): 315 ret = _ObjIndexWrapper(ret) 316 return ret 317 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
318 319
320 -class Table(object):
321 """Table is the main class in C{littletable}, for representing a collection of DataObjects or 322 user-defined objects with publicly accessible attributes or properties. Tables can be: 323 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>} 324 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index} 325 - queried, specifying values to exact match in the desired records, see L{where} 326 - filtered (using L{where}), using a simple predicate function to match desired records; 327 useful for selecting using inequalities or compound conditions 328 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__} 329 - joined, using L{join_on} to identify attribute to be used for joining with another table, and 330 L{join} or operator '+' to perform the actual join 331 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects 332 by attribute values 333 - grouped, using L{groupby} to create a summary table of computed values, grouped by a key 334 attribute 335 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files 336 Queries and joins return their results as new Table objects, so that queries and joins can 337 be easily performed as a succession of operations. 338 """
339 - def __init__(self, table_name=''):
340 """Create a new, empty Table. 341 @param table_name: name for Table 342 @type table_name: string (optional) 343 """ 344 self(table_name) 345 self.obs = [] 346 self._indexes = {} 347 self._uniqueIndexes = [] 348 self.by = _IndexAccessor(self)
349
350 - def __len__(self):
351 """Return the number of objects in the Table.""" 352 return len(self.obs)
353
354 - def __iter__(self):
355 """Create an iterator over the objects in the Table.""" 356 return iter(self.obs)
357
358 - def __getitem__(self, i):
359 """Provides direct indexed/sliced access to the Table's underlying list of objects.""" 360 if isinstance(i, slice): 361 ret = self.copy_template() 362 ret.insert_many(self.obs[i]) 363 return ret 364 else: 365 return self.obs[i]
366
367 - def __getattr__(self, attr):
368 """A quick way to query for matching records using their indexed attributes. The attribute 369 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 370 dict-like access to the underlying records in the table, as in:: 371 372 employees.by.socsecnum["000-00-0000"] 373 customers.by.zipcode["12345"] 374 375 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes 376 are not attributes of the table, but of items in the table.) 377 378 The behavior differs slightly for unique and non-unique indexes: 379 - if the index is unique, then retrieving a matching object, will return just the object; 380 if there is no matching object, C{KeyError} is raised (making a table with a unique 381 index behave very much like a Python dict) 382 - if the index is non-unique, then all matching objects will be returned in a new Table, 383 just as if a regular query had been performed; if no objects match the key value, an empty 384 Table is returned and no exception is raised. 385 386 If there is no index defined for the given attribute, then C{AttributeError} is raised. 387 """ 388 if attr in self._indexes: 389 ret = self._indexes[attr] 390 if isinstance(ret, _UniqueObjIndex): 391 ret = _UniqueObjIndexWrapper(ret) 392 if isinstance(ret, _ObjIndex): 393 ret = _ObjIndexWrapper(ret) 394 return ret 395 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
396
397 - def __bool__(self):
398 return bool(self.obs)
399 400 __nonzero__ = __bool__ 401
402 - def __add__(self, other):
403 """Support UNION of 2 tables using "+" operator.""" 404 if isinstance(other, JoinTerm): 405 # special case if added to a JoinTerm, do join, not union 406 return other + self 407 elif isinstance(other, Table): 408 # if other is another Table, just union them 409 return self.union(other) 410 else: 411 # assume other is a sequence of some sort, insert all elements 412 return self.clone().insert_many(other)
413
414 - def __iadd__(self, other):
415 """Support UNION of 2 tables using "+=" operator.""" 416 return self.insert_many(other)
417
418 - def union(self, other):
419 return self.clone().insert_many(other.obs)
420
421 - def __call__(self, table_name):
422 """A simple way to assign a name to a table, such as those 423 dynamically created by joins and queries. 424 @param table_name: name for Table 425 @type table_name: string 426 """ 427 self.table_name = table_name 428 return self
429
430 - def copy_template(self, name=None):
431 """Create empty copy of the current table, with copies of all 432 index definitions. 433 """ 434 ret = Table(self.table_name) 435 #~ for k,v in self._indexes.items(): 436 #~ ret._indexes[k] = v.copy_template() 437 ret._indexes.update(dict((k,v.copy_template()) for k,v in self._indexes.items())) 438 if name is not None: 439 ret(name) 440 return ret
441
442 - def clone(self, name=None):
443 """Create full copy of the current table, including table contents 444 and index definitions. 445 """ 446 ret = self.copy_template() 447 ret.insert_many(self.obs) 448 if name is not None: 449 ret(name) 450 return ret
451
452 - def create_index(self, attr, unique=False, accept_none=False):
453 """Create a new index on a given attribute. 454 If C{unique} is True and records are found in the table with duplicate 455 attribute values, the index is deleted and C{KeyError} is raised. 456 457 If the table already has an index on the given attribute, then no 458 action is taken and no exception is raised. 459 @param attr: the attribute to be used for indexed access and joins 460 @type attr: string 461 @param unique: flag indicating whether the indexed field values are 462 expected to be unique across table entries 463 @type unique: boolean 464 @param accept_none: flag indicating whether None is an acceptable 465 unique key value for this attribute 466 @type accept_none: boolean 467 """ 468 if attr in self._indexes: 469 return self 470 471 if unique: 472 self._indexes[attr] = _UniqueObjIndex(attr,accept_none) 473 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 474 else: 475 self._indexes[attr] = _ObjIndex(attr) 476 accept_none = True 477 ind = self._indexes[attr] 478 try: 479 for obj in self.obs: 480 if hasattr(obj, attr): 481 obval = getattr(obj, attr) or None 482 else: 483 obval = None 484 if obval or accept_none: 485 ind[obval] = obj 486 else: 487 raise KeyError("None is not an allowed key") 488 return self 489 490 except KeyError: 491 del self._indexes[attr] 492 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 493 raise
494
495 - def delete_index(self, attr):
496 """Deletes an index from the Table. Can be used to drop and rebuild an index, 497 or to convert a non-unique index to a unique index, or vice versa. 498 @param attr: name of an indexed attribute 499 @type attr: string 500 """ 501 if attr in self._indexes: 502 del self._indexes[attr] 503 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
504
505 - def insert(self, obj):
506 """Insert a new object into this Table. 507 @param obj: any Python object 508 Objects can be constructed using the defined DataObject type, or they can 509 be any Python object that does not use the Python C{__slots__} feature; C{littletable} 510 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and 511 index attributes and values. 512 513 If the table contains a unique index, and the record to be inserted would add 514 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the 515 object is not inserted. 516 517 If the table has no unique indexes, then it is possible to insert duplicate 518 objects into the table. 519 """ 520 521 # verify new object doesn't duplicate any existing unique index values 522 uniqueIndexes = self._uniqueIndexes #[ind for ind in self._indexes.values() if ind.is_unique] 523 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none) 524 or ( 525 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind 526 ) 527 for ind in uniqueIndexes): 528 # had a problem, find which one 529 for ind in uniqueIndexes: 530 if (getattr(obj, ind.attr, None) is None and not ind.accept_none): 531 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj) 532 if getattr(obj, ind.attr) in ind: 533 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj) 534 535 self.obs.append(obj) 536 for attr, ind in self._indexes.items(): 537 obval = getattr(obj, attr) 538 ind[obval] = obj 539 return self
540
541 - def insert_many(self, it):
542 """Inserts a collection of objects into the table.""" 543 #~ for ob in it: 544 #~ self.insert(ob) 545 do_all(self.insert(ob) for ob in it) 546 return self
547
548 - def remove(self, ob):
549 """Removes an object from the table. If object is not in the table, then 550 no action is taken and no exception is raised.""" 551 # remove from indexes 552 #~ for attr,ind in self._indexes.items(): 553 #~ ind.remove(ob) 554 do_all(ind.remove(ob) for attr,ind in self._indexes.items()) 555 556 # remove from main object list 557 self.obs.remove(ob)
558
559 - def remove_many(self, it):
560 """Removes a collection of objects from the table.""" 561 #~ for ob in it: 562 #~ self.remove(ob) 563 do_all(self.remove(ob) for ob in it)
564
565 - def _query_attr_sort_fn(self, attr_val):
566 attr,v = attr_val 567 if attr in self._indexes: 568 idx = self._indexes[attr] 569 if v in idx: 570 return len(idx[v]) 571 else: 572 return 0 573 else: 574 return 1e9
575
576 - def where(self, wherefn=None, **kwargs):
577 """ 578 Retrieves matching objects from the table, based on given 579 named parameters. If multiple named parameters are given, then 580 only objects that satisfy all of the query criteria will be returned. 581 582 Special named args: 583 - C{_orderby="attr,..."} - (Deprecated) resulting table should sort content objects 584 by the C{attr}s given in a comma-separated string; to sort in 585 descending order, reference the attribute as C{attr desc}. 586 587 - C{_limit} - maximum number of records to return 588 589 @param wherefn: a method or lambda that returns a boolean result, as in:: 590 591 lambda ob : ob.unitprice > 10 592 593 @type wherefn: callable(object) returning boolean 594 595 @param kwargs: attributes for selecting records, given as additional 596 named arguments of the form C{attrname="attrvalue"}. 597 598 @return: a new Table containing the matching objects 599 """ 600 # extract meta keys 601 flags = dict((k,v) for k,v in kwargs.items() if k.startswith("_")) 602 for f in flags: 603 del kwargs[f] 604 605 if kwargs: 606 # order query criteria in ascending order of number of matching items 607 # for each individual given attribute; this will minimize the number 608 # of filtering records that each subsequent attribute will have to 609 # handle 610 kwargs = kwargs.items() 611 if len(kwargs) > 1 and len(self.obs) > 100: 612 kwargs = sorted(kwargs, key=self._query_attr_sort_fn) 613 614 ret = self 615 for k,v in kwargs: 616 newret = ret.copy_template() 617 if k in ret._indexes: 618 newret.insert_many(ret._indexes[k][v]) 619 else: 620 newret.insert_many( r for r in ret.obs 621 if hasattr(r,k) and getattr(r,k) == v ) 622 ret = newret 623 else: 624 ret = self.clone() 625 626 # apply flags 627 # sort before clip 628 if flags: 629 if '_orderby' in flags: 630 ret.sort(flags['_orderby']) 631 if '_limit' in flags: 632 del ret.obs[flags['_limit']:] 633 634 if wherefn is not None: 635 newret = ret.copy_template() 636 newret.insert_many(filter(wherefn, ret.obs)) 637 ret = newret 638 639 return ret
640
641 - def delete(self, **kwargs):
642 """Deletes matching objects from the table, based on given 643 named parameters. If multiple named parameters are given, then 644 only objects that satisfy all of the query criteria will be removed. 645 @param kwargs: attributes for selecting records, given as additional 646 named arguments of the form C{attrname="attrvalue"}. 647 @return: the number of objects removed from the table 648 """ 649 if not kwargs: 650 return 0 651 652 affected = self.where(**kwargs) 653 self.remove_many(affected) 654 return len(affected)
655
656 - def sort(self, key, reverse=False):
657 """Sort Table in place, using given fields as sort key. 658 @param key: if this is a string, it is a comma-separated list of field names, 659 optionally followed by 'desc' to indicate descending sort instead of the 660 default ascending sort; if a list or tuple, it is a list or tuple of field names 661 or field names with ' desc' appended; if it is a function, then it is the 662 function to be used as the sort key function 663 @return: self 664 """ 665 if isinstance(key, (basestring,list,tuple)): 666 if isinstance(key, basestring): 667 attrdefs = [s.strip() for s in key.split(',')] 668 # leftmost attr is the most primary sort key, so do succession of 669 # sorts from right to left 670 attr_orders = [(a.split()+['asc',])[:2] for a in attrdefs][::-1] 671 else: 672 # attr definitions were already resolved to a sequence by the caller 673 attr_orders = key 674 attrs = [attr for attr,order in attr_orders] 675 676 #special optimization if all orders are ascending or descending 677 if all(order=='asc' for attr,order in attr_orders): 678 self.obs.sort(key=attrgetter(*attrs), reverse=reverse) 679 elif all(order=='desc' for attr,order in attr_orders): 680 self.obs.sort(key=attrgetter(*attrs), reverse=not reverse) 681 else: 682 # mix of ascending and descending sorts, have to do succession of sorts 683 #~ for attr,order in attr_orders: 684 #~ self.obs.sort(key=lambda ob:getattr(ob,attr), reverse=(order=="desc")) 685 do_all(self.obs.sort(key=attrgetter(attr), reverse=(order=="desc")) 686 for attr,order in attr_orders) 687 else: 688 keyfn = key 689 self.obs.sort(key=keyfn, reverse=reverse) 690 return self
691
692 - def select(self, fields, **exprs):
693 """ 694 Create a new table containing a subset of attributes, with optionally 695 newly-added fields computed from each rec in the original table. 696 697 Special kwargs: 698 - C{_unique=True} - (Deprecated) only return a set of unique rows 699 700 @param fields: list of strings, or single space-delimited string, listing attribute name to be included in the output 701 @type fields: list, or space-delimited string 702 @param exprs: one or more named callable arguments, to compute additional fields using the given function 703 @type exprs: C{name=callable}, callable takes the record as an argument, and returns the new attribute value 704 If a string is passed as a callable, this string will be used using string formatting, given the record 705 as a source of interpolation values. For instance, C{fullName = '%(lastName)s, %(firstName)s'} 706 707 """ 708 if isinstance(fields, basestring): 709 fields = fields.split() 710 711 unique = exprs.pop('_unique', False) 712 713 def _makeStringCallable(expr): 714 if isinstance(expr,basestring): 715 return lambda rec: expr % rec 716 else: 717 return expr
718 719 exprs = dict((k, _makeStringCallable(v)) for k,v in exprs) 720 721 raw_tuples = [] 722 for rec in self.obs: 723 attrvalues = tuple(getattr(rec, fieldname, None) for fieldname in fields) 724 if exprs: 725 attrvalues += tuple(expr(rec) for expr in exprs.values()) 726 raw_tuples.append(attrvalues) 727 728 if unique: 729 raw_tuples = list(set(raw_tuples)) 730 731 allNames = tuple(fields) + tuple(exprs.keys()) 732 return Table().insert_many(DataObject(**dict(zip(allNames, outtuple))) for outtuple in raw_tuples)
733
734 - def format(self, *fields, **exprs):
735 """ 736 Create a new table with all string formatted attribute values, typically in preparation for 737 formatted output. 738 @param fields: one or more strings, each string is an attribute name to be included in the output 739 @type fields: string (multiple) 740 @param exprs: one or more named string arguments, to format the given attribute with a formatting string 741 @type exprs: name=string 742 """ 743 #~ select_exprs = {} 744 #~ for f in fields: 745 #~ select_exprs[f] = lambda r : str(getattr,f,None) 746 fields = set(fields) 747 select_exprs = dict((f, lambda r,f=f : str(getattr,f,None)) for f in fields) 748 749 for ename,expr in exprs.items(): 750 if isinstance(expr, basestring): 751 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', expr): 752 select_exprs[ename] = lambda r : str(getattr(r, expr, None)) 753 else: 754 if "{}" in expr or "{0}" in expr: 755 select_exprs[ename] = lambda r : expr.format(r) 756 else: 757 select_exprs[ename] = lambda r : expr % getattr(r, ename) if hasattr(r, ename) else "None" 758 759 return self.select(**select_exprs)
760
761 - def join(self, other, attrlist=None, auto_create_indexes=True, **kwargs):
762 """ 763 Join the objects of one table with the objects of another, based on the given 764 matching attributes in the named arguments. The attrlist specifies the attributes to 765 be copied from the source tables - if omitted, all attributes will be copied. Entries 766 in the attrlist may be single attribute names, or if there are duplicate names in both 767 tables, then a C{(table,attributename)} tuple can be given to disambiguate which 768 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to 769 rename an attribute from a source table. 770 771 This method may be called directly, or can be constructed using the L{join_on} method and 772 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")} 773 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm 774 or tables can be added to construct a compound join expression. When complete, the 775 join expression gets executed by calling the resulting join definition, 776 using C{join_expression([attrlist])}. 777 778 @param other: other table to join to 779 @param attrlist: list of attributes to be copied to the new joined table; if 780 none provided, all attributes of both tables will be used (taken from the first 781 object in each table) 782 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples 783 (list may contain both strings and tuples) 784 @param kwargs: attributes to join on, given as additional named arguments 785 of the form C{table1attr="table2attr"}, or a dict mapping attribute names. 786 @returns: a new Table containing the joined data as new DataObjects 787 """ 788 if not kwargs: 789 raise TypeError("must specify at least one join attribute as a named argument") 790 thiscol,othercol = next(iter(kwargs.items())) 791 792 retname = ("(%s:%s^%s:%s)" % 793 (self.table_name, thiscol, other.table_name, othercol)) 794 # make sure both tables contain records to join - if not, just return empty list 795 if not (self.obs and other.obs): 796 return Table(retname) 797 798 if isinstance(attrlist, basestring): 799 attrlist = re.split(r'[,\s]+', attrlist) 800 801 # expand attrlist to full (table, name, alias) tuples 802 thisnames = set(_object_attrnames(self.obs[0])) 803 othernames = set(_object_attrnames(other.obs[0])) 804 fullcols = [] 805 if attrlist is not None: 806 for col in attrlist: 807 if isinstance(col, tuple): 808 # assume col contains at least (table, colname), fill in alias if missing 809 # to be same as colname 810 fullcols.append((col + (col[1],))[:3]) 811 else: 812 if col in thisnames: 813 fullcols.append( (self, col, col) ) 814 elif col in othernames: 815 fullcols.append( (other, col, col) ) 816 else: 817 raise ValueError("join attribute not found: " + col) 818 else: 819 fullcols = [(self,n,n) for n in thisnames] 820 fullcols += [(other,n,n) for n in othernames] 821 822 thiscols = list(filter(lambda o:o[0] is self, fullcols)) 823 othercols = list(filter(lambda o:o[0] is other, fullcols)) 824 825 if auto_create_indexes: 826 if thiscol not in self._indexes: 827 self.create_index(thiscol) 828 if othercol not in other._indexes: 829 other.create_index(othercol) 830 831 thiscolindex = othercolindex = None 832 if thiscol in self._indexes: 833 thiscolindex = self._indexes[thiscol] 834 else: 835 raise ValueError("indexed attribute required for join: "+thiscol) 836 if othercol in other._indexes: 837 othercolindex = other._indexes[othercol] 838 else: 839 raise ValueError("indexed attribute required for join: "+othercol) 840 841 # use table with fewer keys to drive join 842 if len(thiscolindex) < len(othercolindex): 843 shortindex, longindex = (thiscolindex, othercolindex) 844 swap = False 845 else: 846 shortindex, longindex = (othercolindex, thiscolindex) 847 swap = True 848 849 # find matching rows 850 #~ matchingrows = [] 851 #~ for key,rows in shortindex.items(): 852 #~ if key in longindex: 853 #~ if swap: 854 #~ matchingrows.append( (longindex[key], rows) ) 855 #~ else: 856 #~ matchingrows.append( (rows, longindex[key]) ) 857 matchingrows = list((longindex[key],rows) if swap else (rows, longindex[key]) 858 for key,rows in shortindex.items()) 859 860 joinrows = [] 861 for thisrows,otherrows in matchingrows: 862 for trow,orow in product(thisrows,otherrows): 863 retobj = DataObject() 864 #~ for _,c,a in thiscols: 865 #~ setattr(retobj, a, getattr(trow,c)) 866 do_all(setattr(retobj, a, getattr(trow,c)) for _,c,a in thiscols) 867 #~ for _,c,a in othercols: 868 #~ setattr(retobj, a, getattr(orow,c)) 869 do_all(setattr(retobj, a, getattr(orow,c)) for _,c,a in othercols) 870 joinrows.append(retobj) 871 872 ret = Table(retname) 873 for tbl,collist in zip([self,other],[thiscols,othercols]): 874 for _,c,a in collist: 875 if c in tbl._indexes: 876 ret.create_index(a) # no unique indexes in join results 877 ret.insert_many(joinrows) 878 return ret
879
880 - def join_on(self, attr):
881 """Creates a JoinTerm in preparation for joining with another table, to 882 indicate what attribute should be used in the join. Only indexed attributes 883 may be used in a join. 884 @param attr: attribute name to join from this table (may be different 885 from the attribute name in the table being joined to) 886 @type attr: string 887 @returns: L{JoinTerm}""" 888 if attr not in self._indexes: 889 raise ValueError("can only join on indexed attributes") 890 return JoinTerm(self, attr)
891
892 - def pivot(self, attrlist):
893 """Pivots the data using the given attributes, returning a L{PivotTable}. 894 @param attrlist: list of attributes to be used to construct the pivot table 895 @type attrlist: list of strings, or string of space-delimited attribute names 896 """ 897 if isinstance(attrlist, basestring): 898 attrlist = attrlist.split() 899 if all(a in self._indexes for a in attrlist): 900 return PivotTable(self,[],attrlist) 901 else: 902 raise ValueError("pivot can only be called using indexed attributes")
903
904 - def _import(self, source, encoding, transforms=None, reader=csv.DictReader):
905 close_on_exit = False 906 if isinstance(source, basestring): 907 if PY_3: 908 source = open(source, encoding=encoding) 909 else: 910 source = open(source) 911 close_on_exit = True 912 try: 913 csvdata = reader(source) 914 self.insert_many(DataObject(**s) for s in csvdata) 915 if transforms: 916 for attr,fn in transforms.items(): 917 default = None 918 if isinstance(fn,tuple): 919 fn,default = fn 920 objfn = lambda obj : fn(getattr(obj,attr)) 921 self.add_field(attr, objfn, default) 922 finally: 923 if close_on_exit: 924 source.close()
925
926 - def csv_import(self, csv_source, encoding='UTF-8', transforms=None):
927 """Imports the contents of a CSV-formatted file into this table. 928 @param csv_source: CSV file - if a string is given, the file with that name will be 929 opened, read, and closed; if a file object is given, then that object 930 will be read as-is, and left for the caller to be closed. 931 @type csv_source: string or file 932 @param transforms: dict of functions by attribute name; if given, each 933 attribute will be transformed using the corresponding transform; if there is no 934 matching transform, the attribute will be read as a string (default); the 935 transform function can also be defined as a (function, default-value) tuple; if 936 there is an Exception raised by the transform function, then the attribute will 937 be set to the given default value 938 @type transforms: dict (optional) 939 """ 940 return self._import(csv_source, encoding, transforms)
941
942 - def _xsv_import(self, xsv_source, transforms=None, splitstr="\t"):
943 xsv_reader = lambda src: csv.DictReader(src, delimiter=splitstr) 944 return self._import(xsv_source, transforms, reader=xsv_reader)
945
946 - def tsv_import(self, xsv_source, transforms=None):
947 """Imports the contents of a tab-separated data file into this table. 948 @param xsv_source: tab-separated data file - if a string is given, the file with that name will be 949 opened, read, and closed; if a file object is given, then that object 950 will be read as-is, and left for the caller to be closed. 951 @type xsv_source: string or file 952 @param transforms: dict of functions by attribute name; if given, each 953 attribute will be transformed using the corresponding transform; if there is no 954 matching transform, the attribute will be read as a string (default); the 955 transform function can also be defined as a (function, default-value) tuple; if 956 there is an Exception raised by the transform function, then the attribute will 957 be set to the given default value 958 @type transforms: dict (optional) 959 """ 960 return self._xsv_import(xsv_source, transforms=transforms, splitstr="\t")
961
962 - def csv_export(self, csv_dest, fieldnames=None):
963 """Exports the contents of the table to a CSV-formatted file. 964 @param csv_dest: CSV file - if a string is given, the file with that name will be 965 opened, written, and closed; if a file object is given, then that object 966 will be written as-is, and left for the caller to be closed. 967 @type csv_dest: string or file 968 @param fieldnames: attribute names to be exported; can be given as a single 969 string with space-delimited names, or as a list of attribute names 970 """ 971 close_on_exit = False 972 if isinstance(csv_dest, basestring): 973 csv_dest = open(csv_dest,'wb') 974 close_on_exit = True 975 try: 976 if fieldnames is None: 977 fieldnames = list(_object_attrnames(self.obs[0])) 978 if isinstance(fieldnames, basestring): 979 fieldnames = fieldnames.split() 980 981 csv_dest.write(','.join(fieldnames) + '\n') 982 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore') 983 if hasattr(self.obs[0], "__dict__"): 984 #~ for o in self.obs: 985 #~ csvout.writerow(o.__dict__) 986 do_all(csvout.writerow(o.__dict__) for o in self.obs) 987 else: 988 #~ for o in self.obs: 989 #~ row = dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 990 #~ zip(repeat(o), fieldnames))) 991 #~ csvout.writerow(row) 992 do_all(csvout.writerow(dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 993 zip(repeat(o), fieldnames)))) for o in self.obs) 994 finally: 995 if close_on_exit: 996 csv_dest.close()
997
998 - def json_import(self, source, transforms=None):
999 """Imports the contents of a JSON data file into this table. 1000 @param source: JSON data file - if a string is given, the file with that name will be 1001 opened, read, and closed; if a file object is given, then that object 1002 will be read as-is, and left for the caller to be closed. 1003 @type source: string or file 1004 @param transforms: dict of functions by attribute name; if given, each 1005 attribute will be transformed using the corresponding transform; if there is no 1006 matching transform, the attribute will be read as a string (default); the 1007 transform function can also be defined as a (function, default-value) tuple; if 1008 there is an Exception raised by the transform function, then the attribute will 1009 be set to the given default value 1010 @type transforms: dict (optional) 1011 """ 1012 class _JsonFileReader(object): 1013 def __init__(self, src): 1014 self.source = src
1015 def __iter__(self): 1016 current = '' 1017 for line in self.source: 1018 if current: 1019 current += ' ' 1020 current += line 1021 try: 1022 yield json.loads(current) 1023 current = '' 1024 except Exception: 1025 pass 1026 return self._import(source, transforms=transforms, reader=_JsonFileReader) 1027
1028 - def json_export(self, dest, fieldnames=None):
1029 """Exports the contents of the table to a JSON-formatted file. 1030 @param dest: output file - if a string is given, the file with that name will be 1031 opened, written, and closed; if a file object is given, then that object 1032 will be written as-is, and left for the caller to be closed. 1033 @type dest: string or file 1034 @param fieldnames: attribute names to be exported; can be given as a single 1035 string with space-delimited names, or as a list of attribute names 1036 """ 1037 close_on_exit = False 1038 if isinstance(dest, basestring): 1039 dest = open(dest,'wb') 1040 close_on_exit = True 1041 try: 1042 if isinstance(fieldnames, basestring): 1043 fieldnames = fieldnames.split() 1044 1045 if fieldnames is None: 1046 do_all( 1047 dest.write(_to_json(o)+'\n') for o in self.obs 1048 ) 1049 else: 1050 do_all( 1051 dest.write(json.dumps({f:getattr(o, f) for f in fieldnames})+'\n') 1052 for o in self.obs 1053 ) 1054 finally: 1055 if close_on_exit: 1056 dest.close()
1057
1058 - def add_field(self, attrname, fn, default=None):
1059 """Computes a new attribute for each object in table, or replaces an 1060 existing attribute in each record with a computed value 1061 @param attrname: attribute to compute for each object 1062 @type attrname: string 1063 @param fn: function used to compute new attribute value, based on 1064 other values in the object, as in:: 1065 1066 lambda ob : ob.commission_pct/100.0 * ob.gross_sales 1067 1068 @type fn: function(obj) returns value 1069 @param default: value to use if an exception is raised while trying 1070 to evaluate fn 1071 """ 1072 #~ for rec in self: 1073 def _addFieldToRec(rec, fn=fn, default=default): 1074 try: 1075 val = fn(rec) 1076 except Exception: 1077 val = default 1078 if isinstance(rec, DataObject): 1079 object.__setattr__(rec, attrname, val) 1080 else: 1081 setattr(rec, attrname, val)
1082 do_all(_addFieldToRec(r) for r in self) 1083 return self 1084
1085 - def addfield(self, attrname, fn, default=None):
1086 # deprecated in favor of add_field 1087 return self.add_field(attrname, fn, default)
1088
1089 - def groupby(self, keyexpr, **outexprs):
1090 """simple prototype of group by, with support for expressions in the group-by clause 1091 and outputs 1092 @param keyexpr: grouping field and optional expression for computing the key value; 1093 if a string is passed 1094 @type keyexpr: string or tuple 1095 @param outexprs: named arguments describing one or more summary values to 1096 compute per key 1097 @type outexprs: callable, taking a sequence of objects as input and returning 1098 a single summary value 1099 """ 1100 if isinstance(keyexpr, basestring): 1101 keyattrs = keyexpr.split() 1102 keyfn = lambda o : tuple(getattr(o, k) for k in keyattrs) 1103 1104 elif isinstance(keyexpr, tuple): 1105 keyattrs = (keyexpr[0],) 1106 keyfn = keyexpr[1] 1107 1108 groupedobs = defaultdict(list) 1109 #~ for ob in self.obs: 1110 #~ groupedobs[keyfn(ob)].append(ob) 1111 do_all(groupedobs[keyfn(ob)].append(ob) for ob in self.obs) 1112 1113 tbl = Table() 1114 #~ for k in keyattrs: 1115 #~ tbl.create_index(k, unique=(len(keyattrs)==1)) 1116 do_all(tbl.create_index(k, unique=(len(keyattrs)==1)) for k in keyattrs) 1117 for key, recs in sorted(groupedobs.iteritems()): 1118 groupobj = DataObject(**dict(zip(keyattrs, key))) 1119 #~ for subkey, expr in outexprs.items(): 1120 #~ setattr(groupobj, subkey, expr(recs)) 1121 do_all(setattr(groupobj, subkey, expr(recs)) 1122 for subkey, expr in outexprs.items()) 1123 tbl.insert(groupobj) 1124 return tbl
1125
1126 - def run(self):
1127 return self
1128
1129 - def unique(self):
1130 ret = self.copy_template() 1131 seen = set() 1132 for rec in self: 1133 reckey = tuple(rec.__dict__.values()) 1134 if reckey not in seen: 1135 seen.add(reckey) 1136 ret.insert(rec) 1137 return ret
1138
1139 -class PivotTable(Table):
1140 """Enhanced Table containing pivot results from calling table.pivot(). 1141 """
1142 - def __init__(self, parent, attr_val_path, attrlist):
1143 """PivotTable initializer - do not create these directly, use 1144 L{Table.pivot}. 1145 """ 1146 super(PivotTable,self).__init__() 1147 self._attr_path = attr_val_path[:] 1148 self._pivot_attrs = attrlist[:] 1149 self._subtable_dict = {} 1150 1151 #~ for k,v in parent._indexes.items(): 1152 #~ self._indexes[k] = v.copy_template() 1153 self._indexes.update(dict((k,v.copy_template()) for k,v in parent._indexes.items())) 1154 if not attr_val_path: 1155 self.insert_many(parent.obs) 1156 else: 1157 attr,val = attr_val_path[-1] 1158 self.insert_many(parent.where(**{attr:val})) 1159 parent._subtable_dict[val] = self 1160 1161 if len(attrlist) > 0: 1162 this_attr = attrlist[0] 1163 sub_attrlist = attrlist[1:] 1164 ind = parent._indexes[this_attr] 1165 self.subtables = [ PivotTable(self, 1166 attr_val_path + [(this_attr,k)], 1167 sub_attrlist) for k in sorted(ind.keys()) ] 1168 else: 1169 self.subtables = []
1170
1171 - def __getitem__(self,val):
1172 if self._subtable_dict: 1173 return self._subtable_dict[val] 1174 else: 1175 return super(PivotTable,self).__getitem__(val)
1176
1177 - def keys(self):
1178 return sorted(self._subtable_dict.keys())
1179
1180 - def items(self):
1181 return sorted(self._subtable_dict.items())
1182
1183 - def values(self):
1184 return [self._subtable_dict.items[k] for k in self.keys()]
1185
1186 - def pivot_key(self):
1187 """Return the set of attribute-value pairs that define the contents of this 1188 table within the original source table. 1189 """ 1190 return self._attr_path
1191
1192 - def pivot_key_str(self):
1193 """Return the pivot_key as a displayable string. 1194 """ 1195 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
1196
1197 - def has_subtables(self):
1198 """Return whether this table has further subtables. 1199 """ 1200 return bool(self.subtables)
1201
1202 - def dump(self, out=sys.stdout, row_fn=repr, limit=-1, indent=0):
1203 """Dump out the contents of this table in a nested listing. 1204 @param out: output stream to write to 1205 @param row_fn: function to call to display individual rows 1206 @param limit: number of records to show at deepest level of pivot (-1=show all) 1207 @param indent: current nesting level 1208 """ 1209 NL = '\n' 1210 if indent: 1211 out.write(" "*indent + self.pivot_key_str()) 1212 else: 1213 out.write("Pivot: %s" % ','.join(self._pivot_attrs)) 1214 out.write(NL) 1215 if self.has_subtables(): 1216 #~ for sub in self.subtables: 1217 #~ if sub: 1218 #~ sub.dump(out, row_fn, limit, indent+1) 1219 do_all(sub.dump(out, row_fn, limit, indent+1) for sub in self.subtables if sub) 1220 else: 1221 if limit >= 0: 1222 showslice = slice(0,limit) 1223 else: 1224 showslice = slice(None,None) 1225 #~ for r in self.obs[showslice]: 1226 #~ out.write(" "*(indent+1) + row_fn(r) + NL) 1227 do_all(out.write(" "*(indent+1) + row_fn(r) + NL) 1228 for r in self.obs[showslice]) 1229 out.flush()
1230
1231 - def dump_counts(self, out=sys.stdout, count_fn=len, colwidth=10):
1232 """Dump out the summary counts of entries in this pivot table as a tabular listing. 1233 @param out: output stream to write to 1234 """ 1235 if len(self._pivot_attrs) == 1: 1236 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1237 maxkeylen = max(len(str(k)) for k in self.keys()) 1238 maxvallen = colwidth 1239 keytally = {} 1240 for k, sub in self.items(): 1241 sub_v = count_fn(sub) 1242 maxvallen = max(maxvallen, len(str(sub_v))) 1243 keytally[k] = sub_v 1244 for k,sub in self.items(): 1245 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1246 out.write("%*s\n" % (maxvallen,keytally[k])) 1247 elif len(self._pivot_attrs) == 2: 1248 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1249 maxkeylen = max(max(len(str(k)) for k in self.keys()),5) 1250 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),colwidth) 1251 keytally = dict((k,0) for k in self.subtables[0].keys()) 1252 out.write("%*s " % (maxkeylen,'')) 1253 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys())) 1254 out.write(' %*s\n' % (maxvallen, 'Total')) 1255 for k,sub in self.items(): 1256 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1257 for kk,ssub in sub.items(): 1258 ssub_v = count_fn(ssub) 1259 out.write("%*d " % (maxvallen,ssub_v)) 1260 keytally[kk] += ssub_v 1261 maxvallen = max(maxvallen, len(str(ssub_v))) 1262 sub_v = count_fn(sub) 1263 maxvallen = max(maxvallen, len(str(sub_v))) 1264 out.write("%*d\n" % (maxvallen,sub_v)) 1265 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total")) 1266 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items()))) 1267 out.write(" %*d\n" % (maxvallen,sum(tally for k,tally in keytally.items()))) 1268 else: 1269 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1270
1271 - def summary_counts(self, fn=None, col=None, summarycolname=None):
1272 """Dump out the summary counts of this pivot table as a Table. 1273 """ 1274 if summarycolname is None: 1275 summarycolname = col 1276 ret = Table() 1277 topattr = self._pivot_attrs[0] 1278 #~ for attr in self._pivot_attrs: 1279 #~ ret.create_index(attr) 1280 do_all(ret.create_index(attr) for attr in self._pivot_attrs) 1281 if len(self._pivot_attrs) == 1: 1282 for sub in self.subtables: 1283 subattr,subval = sub._attr_path[-1] 1284 attrdict = {subattr:subval} 1285 if fn is None: 1286 attrdict['Count'] = len(sub) 1287 else: 1288 attrdict[summarycolname] = fn(s[col] for s in sub) 1289 ret.insert(DataObject(**attrdict)) 1290 elif len(self._pivot_attrs) == 2: 1291 for sub in self.subtables: 1292 for ssub in sub.subtables: 1293 attrdict = dict(ssub._attr_path) 1294 if fn is None: 1295 attrdict['Count'] = len(ssub) 1296 else: 1297 attrdict[summarycolname] = fn(s[col] for s in ssub) 1298 ret.insert(DataObject(**attrdict)) 1299 elif len(self._pivot_attrs) == 3: 1300 for sub in self.subtables: 1301 for ssub in sub.subtables: 1302 for sssub in ssub.subtables: 1303 attrdict = dict(sssub._attr_path) 1304 if fn is None: 1305 attrdict['Count'] = len(sssub) 1306 else: 1307 attrdict[summarycolname] = fn(s[col] for s in sssub) 1308 ret.insert(DataObject(**attrdict)) 1309 else: 1310 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots") 1311 return ret
1312
1313 -class JoinTerm(object):
1314 """Temporary object created while composing a join across tables using 1315 L{Table.join_on} and '+' addition. JoinTerm's are usually created by 1316 calling join_on on a Table object, as in:: 1317 1318 customers.join_on("id") + orders.join_on("custid") 1319 1320 This join expression would set up the join relationship 1321 equivalent to:: 1322 1323 customers.join(orders, id="custid") 1324 1325 If tables are being joined on attributes that have the same name in 1326 both tables, then a join expression could be created by adding a 1327 JoinTerm of one table directly to the other table:: 1328 1329 customers.join_on("custid") + orders 1330 1331 Once the join expression is composed, the actual join is performed 1332 using function call notation:: 1333 1334 customerorders = customers.join_on("custid") + orders 1335 for custord in customerorders(): 1336 print custord 1337 1338 When calling the join expression, you can optionally specify a 1339 list of attributes as defined in L{Table.join}. 1340 """
1341 - def __init__(self, sourceTable, joinfield):
1342 self.sourcetable = sourceTable 1343 self.joinfield = joinfield 1344 self.jointo = None
1345
1346 - def __add__(self, other):
1347 if isinstance(other, Table): 1348 other = other.join_on(self.joinfield) 1349 if isinstance(other, JoinTerm): 1350 if self.jointo is None: 1351 if other.jointo is None: 1352 self.jointo = other 1353 else: 1354 self.jointo = other() 1355 return self 1356 else: 1357 if other.jointo is None: 1358 return self() + other 1359 else: 1360 return self() + other() 1361 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1362
1363 - def __radd__(self, other):
1364 if isinstance(other, Table): 1365 return other.join_on(self.joinfield) + self 1366 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1367
1368 - def __call__(self, attrs=None):
1369 if self.jointo: 1370 other = self.jointo 1371 if isinstance(other, Table): 1372 other = other.join_on(self.joinfield) 1373 ret = self.sourcetable.join(other.sourcetable, attrs, 1374 **{self.joinfield : other.joinfield}) 1375 return ret 1376 else: 1377 return self.sourcetable.query()
1378
1379 - def join_on(self, col):
1380 return self().join_on(col)
1381 1382 1383 if __name__ == "__main__": 1384 1385 # import json in Python 2 or 3 compatible forms 1386 from functools import partial 1387 try: 1388 import simplejson as json 1389 json_dumps = partial(json.dumps, indent=' ') 1390 except ImportError: 1391 import json 1392 json_dumps = partial(json.dumps, indent=2) 1393 1394 1395 rawdata = """\ 1396 Phoenix:AZ:85001:KPHX 1397 Phoenix:AZ:85001:KPHY 1398 Phoenix:AZ:85001:KPHA 1399 Dallas:TX:75201:KDFW""".splitlines() 1400 1401 # load miniDB 1402 stations = Table() 1403 #~ stations.create_index("city") 1404 stations.create_index("stn", unique=True) 1405 1406 fields = "city state zip stn".split() 1407 for d in rawdata: 1408 ob = DataObject() 1409 for k,v in zip(fields, d.split(':')): 1410 setattr(ob,k,v.strip()) 1411 stations.insert(ob) 1412 1413 # perform some queries and deletes 1414 for queryargs in [ 1415 dict(city="Phoenix"), 1416 dict(city="Phoenix", stn="KPHX"), 1417 dict(stn="KPHA", city="Phoenix"), 1418 dict(state="TX"), 1419 dict(city="New York"), 1420 dict(city="Phoenix", _orderby="stn"), 1421 dict(city="Phoenix", _orderby="stn desc"), 1422 ]: 1423 print(queryargs) 1424 result = stations.where(**queryargs) 1425 print(len(result)) 1426 for r in result: print (r) 1427 print('') 1428 #~ print stations.delete(city="Phoenix") 1429 #~ print stations.delete(city="Boston") 1430 print(list(stations.where())) 1431 print('') 1432 1433 amfm = Table() 1434 amfm.create_index("stn", unique=True) 1435 amfm.insert(DataObject(stn="KPHY", band="AM")) 1436 amfm.insert(DataObject(stn="KPHX", band="FM")) 1437 amfm.insert(DataObject(stn="KPHA", band="FM")) 1438 amfm.insert(DataObject(stn="KDFW", band="FM")) 1439 print(amfm.by.stn["KPHY"]) 1440 print(amfm.by.stn["KPHY"].band) 1441 1442 try: 1443 amfm.insert(DataObject(stn="KPHA", band="AM")) 1444 except KeyError: 1445 print("duplicate key not allowed") 1446 1447 print('') 1448 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1449 )(["stn", "city", (amfm,"band","AMFM"), 1450 (stations,"state","st")]).sort("AMFM"): 1451 print(repr(rec)) 1452 1453 print('') 1454 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1455 )(["stn", "city", (amfm,"band"), (stations,"state","st")]): 1456 print(json_dumps(rec.__dict__)) 1457 1458 print('') 1459 for rec in (stations.join_on("stn") + amfm.join_on("stn"))(): 1460 print(json_dumps(rec.__dict__)) 1461 1462 print('') 1463 stations.create_index("state") 1464 for az_stn in stations.by.state['AZ']: 1465 print(az_stn) 1466 1467 print('') 1468 pivot = stations.pivot("state") 1469 pivot.dump_counts() 1470 1471 print('') 1472 amfm.create_index("band") 1473 pivot = (stations.join_on("stn") + amfm)().pivot("state band") 1474 pivot.dump_counts() 1475