Module csvutils
[hide private]
[frames] | no frames]

Source Code for Module csvutils

  1  #!/usr/bin/env python 
  2   
  3  ''' 
  4  Transformation utilities for csv (or csv-like) generated rows. 
  5   
  6  The standard csv module is very useful for parsing tabular data in CSV format. 
  7  Typically though, one or more transformations need to be applied to the generated 
  8  rows before being ready to be used; for instance "convert the 3rd column to int, 
  9  the 5th to float and ignore all the rest". This module provides an easy way to 
 10  specify such transformations upfront instead of coding them every time by hand. 
 11   
 12  Two classes are currently available, L{SequenceTransformer} and L{MappingTransformer}, 
 13  that represent each row as a list (like C{csv.reader}) or dict (like C{csv.DictReader}), 
 14  respectively. 
 15   
 16  @requires: Python 2.3 or later. 
 17  ''' 
 18   
 19   
 20  __all__ = ['RowTransformer', 'SequenceTransformer', 'MappingTransformer'] 
 21  __author__ = 'George Sakkis <george.sakkis AT gmail DOT com>' 
 22   
 23  # Python 2.3 support 
 24  try: set 
 25  except NameError: 
 26      from sets import Set as set 
 27   
 28  #======= RowTransformer ================================================ 
 29   
30 -class RowTransformer(object):
31 '''Abstract base transformer class.''' 32
33 - def __init__(self, key_adaptors, **kwds):
34 '''Specifies the transformations to apply for each row. 35 36 @param key_adaptors: Specifies the adaptor to transform each column. 37 A column is identified by some C{key} and C{adaptor} is either a 38 callable C{f(x)} or None (equivalent to the identity C{lambda x:x}). 39 @type key_adaptors: Sequence of (key,adaptor) pairs 40 41 @keyword default: An adaptor for all columns not specified explicitly in 42 C{key_adaptors}. 43 @type default: Callable C{f(x)} or C{None} 44 45 @keyword include: The columns to include for each row: 46 - If given, only the items at the respective columns are included, 47 in the same order. 48 - Otherwise if C{default} is given, all columns are included. 49 - Otherwise, if neither C{include} or C{default} are given, 50 only the keys specified in C{key_adaptors} are included. 51 @type include: Iterable of keys 52 53 @keyword exclude: The columns to exclude for each row. This takes precedence 54 over all other options: a column specified in C{exclude} will B{never} 55 be included in the transformed rows. 56 @type exclude: Iterable of keys 57 ''' 58 self._exclude = set(kwds.get('exclude',())) 59 self._key2Adaptor = {} 60 include = [] 61 for key,adaptor in key_adaptors: 62 if key in self._key2Adaptor: 63 raise ValueError('More than one adaptors for column %r' % key) 64 include.append(key) 65 self._key2Adaptor[key] = adaptor 66 self._default_adaptor = kwds.get('default', None) 67 if 'include' in kwds: 68 self._include = kwds['include'] 69 elif 'default' not in kwds: 70 # include only the explicitly specified columns 71 self._include = include 72 else: # include all columns 73 self._include = []
74
75 - def __call__(self, rows):
76 '''Transform the given rows by this transformer. 77 78 @param rows: An iterable of rows. The representation of a row is up to 79 concrete subclasses to decide. 80 81 @return: An iterator over the transformed rows. 82 ''' 83 raise NotImplementedError('Abstract method')
84 85 86 #======= SequenceTransformer =================================================== 87
88 -class SequenceTransformer(RowTransformer):
89 '''A L{RowTransformer} that expects and returns rows as I{sequences}. 90 91 Examples: 92 93 >>> import csv 94 >>> rows = list(csv.reader(["1,3.34,4-3.2j,John", 95 ... "4,4,4,4", 96 ... "0,-1.1,3.4,None"])) 97 98 >>> # by default, SequenceTransformer returns each row as is 99 >>> list(SequenceTransformer()(rows)) == rows 100 True 101 102 >>> # transform and return the first two columns only 103 >>> for row in SequenceTransformer(int,float)(rows): 104 ... print row 105 [1, 3.3399999999999999] 106 [4, 4.0] 107 [0, -1.1000000000000001] 108 109 >>> # as before, but keep the rest columns too 110 >>> for row in SequenceTransformer(int, float, default=None)(rows): 111 ... print row 112 [1, 3.3399999999999999, '4-3.2j', 'John'] 113 [4, 4.0, '4', '4'] 114 [0, -1.1000000000000001, '3.4', 'None'] 115 116 >>> # as before, but in reverse column order 117 >>> for row in SequenceTransformer(int, float, default=None, 118 ... include=reversed(xrange(4)))(rows): 119 ... print row 120 ['John', '4-3.2j', 3.3399999999999999, 1] 121 ['4', '4', 4.0, 4] 122 ['None', '3.4', -1.1000000000000001, 0] 123 124 >>> # transform the second column and leave the rest as is 125 >>> for row in SequenceTransformer((1,float), default=None)(rows): 126 ... print row 127 ['1', 3.3399999999999999, '4-3.2j', 'John'] 128 ['4', 4.0, '4', '4'] 129 ['0', -1.1000000000000001, '3.4', 'None'] 130 131 >>> # transform and return the 4nd and the 2th column, in this order 132 >>> for row in SequenceTransformer((3,str),(1,float))(rows): 133 ... print row 134 ['John', 3.3399999999999999] 135 ['4', 4.0] 136 ['None', -1.1000000000000001] 137 138 >>> # exclude the 4th column and eval() the rest (XXX: Use eval for trusted data only) 139 >>> for row in SequenceTransformer(default=eval, exclude=[3])(rows): 140 ... print row 141 [1, 3.3399999999999999, (4-3.2000000000000002j)] 142 [4, 4, 4] 143 [0, -1.1000000000000001, 3.3999999999999999] 144 ''' 145
146 - def __init__(self, *adaptors, **kwds):
147 '''Specifies what transformations to apply to each row. 148 149 @param adaptors: The adaptors for selected columns. The i-th adaptor can be: 150 - None: C{row[i]} will be left as is. 151 - A callable C{f(x)}: C{row[i]} will be transformed by f to C{f(row[i])}. 152 - A pair C{(j,A)}: C{row[j]} will be transformed by adaptor A, where 153 A can be C{None} or a callable C{f(x)} as above. C{i} is ignored in 154 this case. 155 156 @keyword include: It can be: 157 - An iterable of indices: Only the items at the respective columns 158 are included (except for those that are also in C{exclude}). 159 - A positive integer N: shortcut for C{xrange(N)}. 160 161 @keyword default,exclude: See L{RowTransformer.__init__} 162 ''' 163 key_adaptors = [] 164 for i,adaptor in enumerate(adaptors): 165 # check if 'adaptor' is actually an (i,adaptor) pair or not 166 try: i,adaptor = adaptor 167 except: pass 168 if not (isinstance(i,int) and i>=0): 169 raise ValueError('Indices must be non-negative integers ' 170 '(%r given)' % i) 171 key_adaptors.append((i,adaptor)) 172 # convert 'include' to a range if an integer is passed 173 if isinstance(kwds.get('include'), int): 174 kwds['include'] = xrange(kwds['include']) 175 RowTransformer.__init__(self, key_adaptors, **kwds)
176
177 - def __call__(self, rows):
178 '''Transform the given rows by this transformer. 179 180 @param rows: An iterable of sequences. 181 @return: An iterator over the transformed rows as lists. 182 ''' 183 exclude = self._exclude 184 get_adaptor = self._key2Adaptor.get 185 default = self._default_adaptor 186 if self._include: # include selected columns 187 indexed_adaptors = [(j,get_adaptor(j,default)) 188 for j in self._include if j not in exclude] 189 for row in rows: 190 new_row = [None] * len(indexed_adaptors) 191 for i,(j,adaptor) in enumerate(indexed_adaptors): 192 if adaptor is None: 193 new_row[i] = row[j] 194 else: 195 new_row[i] = adaptor(row[j]) 196 yield new_row 197 else: # include all (non-excluded) columns 198 excluded = object() 199 adaptors = [] 200 for row in rows: 201 new_row = []; append = new_row.append 202 for i,value in enumerate(row): 203 try: adaptor = adaptors[i] 204 except IndexError: 205 # this will typically be raised only for the first row 206 if i in exclude: 207 adaptor = excluded 208 else: 209 adaptor = get_adaptor(i,default) 210 adaptors.append(adaptor) 211 if adaptor is not excluded: 212 if adaptor is None: 213 append(value) 214 else: 215 append(adaptor(value)) 216 yield new_row
217 218 #======= MappingTransformer ==================================================== 219
220 -class MappingTransformer(RowTransformer):
221 '''A L{RowTransformer} that expects and returns rows as I{mappings}. 222 223 Examples: 224 225 >>> import csv 226 >>> rows = list(csv.DictReader(["1,3.34,4-3.2j,John", 227 ... "4,4,4,4", 228 ... "0,-1.1,3.4,None" ], 229 ... fieldnames="IFCS")) 230 231 >>> # by default, MappingTransformer returns each row as is 232 >>> list(MappingTransformer()(rows)) == rows 233 True 234 235 >>> # transform and return the first two columns only 236 >>> for row in MappingTransformer({'I':int,'F':float})(rows): 237 ... print row 238 {'I': 1, 'F': 3.3399999999999999} 239 {'I': 4, 'F': 4.0} 240 {'I': 0, 'F': -1.1000000000000001} 241 242 >>> # as before, but keep the rest columns too 243 >>> for row in MappingTransformer({'I':int, 'F':float}, default=None)(rows): 244 ... print row 245 {'I': 1, 'C': '4-3.2j', 'S': 'John', 'F': 3.3399999999999999} 246 {'I': 4, 'C': '4', 'S': '4', 'F': 4.0} 247 {'I': 0, 'C': '3.4', 'S': 'None', 'F': -1.1000000000000001} 248 249 >>> # transform the 'F' column and leave the rest as is 250 >>> for row in MappingTransformer({'F':float}, default=None)(rows): 251 ... print row 252 {'I': '1', 'C': '4-3.2j', 'S': 'John', 'F': 3.3399999999999999} 253 {'I': '4', 'C': '4', 'S': '4', 'F': 4.0} 254 {'I': '0', 'C': '3.4', 'S': 'None', 'F': -1.1000000000000001} 255 256 >>> # transform and return the 'F' and 'S' columns 257 >>> for row in MappingTransformer({'S':str,'F':float})(rows): 258 ... print row 259 {'S': 'John', 'F': 3.3399999999999999} 260 {'S': '4', 'F': 4.0} 261 {'S': 'None', 'F': -1.1000000000000001} 262 263 >>> # exclude the 'S' column and eval() the rest (XXX: Use eval for trusted data only) 264 >>> for row in MappingTransformer(default=eval, exclude=['S'])(rows): 265 ... print row 266 {'I': 1, 'C': (4-3.2000000000000002j), 'F': 3.3399999999999999} 267 {'I': 4, 'C': 4, 'F': 4} 268 {'I': 0, 'C': 3.3999999999999999, 'F': -1.1000000000000001} 269 ''' 270
271 - def __init__(self, adaptors={}, **kwds):
272 '''Specifies what transformations to apply to each row. 273 274 @param adaptors: A mapping from column names to adaptors. 275 @keyword default,include,exclude: See L{RowTransformer.__init__} 276 ''' 277 RowTransformer.__init__(self, adaptors.items(), **kwds)
278
279 - def __call__(self, rows):
280 '''Transform the given rows by this transformer. 281 282 @param rows: An iterable of mappings. 283 @return: An iterator over the transformed rows as dicts. 284 ''' 285 exclude = self._exclude 286 get_adaptor = self._key2Adaptor.get 287 default = self._default_adaptor 288 if self._include: # include selected columns 289 key_adaptors = [(key,get_adaptor(key,default)) 290 for key in self._include if key not in exclude] 291 for row in rows: 292 new_row = {} 293 for key,adaptor in key_adaptors: 294 if adaptor is None: 295 new_row[key] = row[key] 296 else: 297 new_row[key] = adaptor(row[key]) 298 yield new_row 299 else: # include all (non-excluded) columns 300 excluded = object() 301 key2adaptor = {} 302 for row in rows: 303 new_row = {} 304 for key in row: 305 try: adaptor = key2adaptor[key] 306 except KeyError: 307 if key in exclude: 308 adaptor = excluded 309 else: 310 adaptor = get_adaptor(key,default) 311 key2adaptor[key] = adaptor 312 if adaptor is not excluded: 313 if adaptor is None: 314 new_row[key] = row[key] 315 else: 316 new_row[key] = adaptor(row[key]) 317 yield new_row
318 319 320 if __name__ == '__main__': 321 import doctest 322 doctest.testmod() 323