Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.2" 
  61  __versionTime__ = "29 Apr 2016 15:10 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75  import traceback 
  76   
  77  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  78   
  79  __all__ = [ 
  80  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  81  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  82  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  83  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  84  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  85  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  86  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  87  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  88  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  89  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  90  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  91  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  92  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  93  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  94  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  95  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  96  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  97  ] 
  98   
  99  PY_3 = sys.version.startswith('3') 
 100  if PY_3: 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103      unichr = chr 
 104      _ustr = str 
 105   
 106      # build list of single arg builtins, that can be used as parse actions 
 107      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 108   
 109  else: 
 110      _MAX_INT = sys.maxint 
 111      range = xrange 
112 113 - def _ustr(obj):
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 116 then < returns the unicode object | encodes it with the default encoding | ... >. 117 """ 118 if isinstance(obj,unicode): 119 return obj 120 121 try: 122 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 123 # it won't break any existing code. 124 return str(obj) 125 126 except UnicodeEncodeError: 127 # Else encode it 128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 129 xmlcharref = Regex('&#\d+;') 130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 131 return xmlcharref.transformString(ret)
132 133 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 134 singleArgBuiltins = [] 135 import __builtin__ 136 for fname in "sum len sorted reversed list tuple set any all min max".split(): 137 try: 138 singleArgBuiltins.append(getattr(__builtin__,fname)) 139 except AttributeError: 140 continue 141 142 _generatorType = type((y for y in range(1)))
143 144 -def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data.""" 146 147 # ampersand must be replaced first 148 from_symbols = '&><"\'' 149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 150 for from_,to_ in zip(from_symbols, to_symbols): 151 data = data.replace(from_, to_) 152 return data
153
154 -class _Constants(object):
155 pass
156 157 alphas = string.ascii_uppercase + string.ascii_lowercase 158 nums = "0123456789" 159 hexnums = nums + "ABCDEFabcdef" 160 alphanums = alphas + nums 161 _bslash = chr(92) 162 printables = "".join(c for c in string.printable if c not in string.whitespace)
163 164 -class ParseBaseException(Exception):
165 """base exception class for all parsing runtime exceptions""" 166 # Performance tuning: we construct a *lot* of these, so keep this 167 # constructor as small and fast as possible
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc 170 if msg is None: 171 self.msg = pstr 172 self.pstr = "" 173 else: 174 self.msg = msg 175 self.pstr = pstr 176 self.parserElement = elem
177
178 - def __getattr__( self, aname ):
179 """supported attributes by name are: 180 - lineno - returns the line number of the exception text 181 - col - returns the column number of the exception text 182 - line - returns the line containing the exception text 183 """ 184 if( aname == "lineno" ): 185 return lineno( self.loc, self.pstr ) 186 elif( aname in ("col", "column") ): 187 return col( self.loc, self.pstr ) 188 elif( aname == "line" ): 189 return line( self.loc, self.pstr ) 190 else: 191 raise AttributeError(aname)
192
193 - def __str__( self ):
194 return "%s (at char %d), (line:%d, col:%d)" % \ 195 ( self.msg, self.loc, self.lineno, self.column )
196 - def __repr__( self ):
197 return _ustr(self)
198 - def markInputline( self, markerString = ">!<" ):
199 """Extracts the exception line from the input string, and marks 200 the location of the exception with a special symbol. 201 """ 202 line_str = self.line 203 line_column = self.column - 1 204 if markerString: 205 line_str = "".join((line_str[:line_column], 206 markerString, line_str[line_column:])) 207 return line_str.strip()
208 - def __dir__(self):
209 return "lineno col line".split() + dir(type(self))
210
211 -class ParseException(ParseBaseException):
212 """exception thrown when parse expressions don't match class; 213 supported attributes by name are: 214 - lineno - returns the line number of the exception text 215 - col - returns the column number of the exception text 216 - line - returns the line containing the exception text 217 """ 218 pass
219
220 -class ParseFatalException(ParseBaseException):
221 """user-throwable exception thrown when inconsistent parse content 222 is found; stops all parsing immediately""" 223 pass
224
225 -class ParseSyntaxException(ParseFatalException):
226 """just like C{L{ParseFatalException}}, but thrown internally when an 227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 228 an unbacktrackable syntax error has been found"""
229 - def __init__(self, pe):
230 super(ParseSyntaxException, self).__init__( 231 pe.pstr, pe.loc, pe.msg, pe.parserElement)
232
233 #~ class ReparseException(ParseBaseException): 234 #~ """Experimental class - parse actions can raise this exception to cause 235 #~ pyparsing to reparse the input string: 236 #~ - with a modified input string, and/or 237 #~ - with a modified start location 238 #~ Set the values of the ReparseException in the constructor, and raise the 239 #~ exception in a parse action to cause pyparsing to use the new string/location. 240 #~ Setting the values as None causes no change to be made. 241 #~ """ 242 #~ def __init_( self, newstring, restartLoc ): 243 #~ self.newParseText = newstring 244 #~ self.reparseLoc = restartLoc 245 246 -class RecursiveGrammarException(Exception):
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
251 - def __str__( self ):
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
254 -class _ParseResultsWithOffset(object):
255 - def __init__(self,p1,p2):
256 self.tup = (p1,p2)
257 - def __getitem__(self,i):
258 return self.tup[i]
259 - def __repr__(self):
260 return repr(self.tup)
261 - def setOffset(self,i):
262 self.tup = (self.tup[0],i)
263
264 -class ParseResults(object):
265 """Structured parse results, to provide multiple means of access to the parsed data: 266 - as a list (C{len(results)}) 267 - by list index (C{results[0], results[1]}, etc.) 268 - by attribute (C{results.<resultsName>}) 269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls): 272 return toklist 273 retobj = object.__new__(cls) 274 retobj.__doinit = True 275 return retobj
276 277 # Performance tuning: we construct a *lot* of these, so keep this 278 # constructor as small and fast as possible
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit: 281 self.__doinit = False 282 self.__name = None 283 self.__parent = None 284 self.__accumNames = {} 285 self.__asList = asList 286 self.__modal = modal 287 if toklist is None: 288 toklist = [] 289 if isinstance(toklist, list): 290 self.__toklist = toklist[:] 291 elif isinstance(toklist, _generatorType): 292 self.__toklist = list(toklist) 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,(int,slice)): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 #~ for name in self.__tokdict: 355 #~ occurrences = self.__tokdict[name] 356 #~ for j in removed: 357 #~ for k, (value, position) in enumerate(occurrences): 358 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 for name,occurrences in self.__tokdict.items(): 360 for j in removed: 361 for k, (value, position) in enumerate(occurrences): 362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 363 else: 364 del self.__tokdict[i]
365
366 - def __contains__( self, k ):
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
374 - def iterkeys( self ):
375 """Returns all named result keys.""" 376 if hasattr(self.__tokdict, "iterkeys"): 377 return self.__tokdict.iterkeys() 378 else: 379 return iter(self.__tokdict)
380
381 - def itervalues( self ):
382 """Returns all named result values.""" 383 return (self[k] for k in self.iterkeys())
384
385 - def iteritems( self ):
386 return ((k, self[k]) for k in self.iterkeys())
387 388 if PY_3: 389 keys = iterkeys 390 values = itervalues 391 items = iteritems 392 else:
393 - def keys( self ):
394 """Returns all named result keys.""" 395 return list(self.iterkeys())
396
397 - def values( self ):
398 """Returns all named result values.""" 399 return list(self.itervalues())
400
401 - def items( self ):
402 """Returns all named result keys and values as a list of tuples.""" 403 return list(self.iteritems())
404
405 - def haskeys( self ):
406 """Since keys() returns an iterator, this method is helpful in bypassing 407 code that looks for the existence of any defined results names.""" 408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last). 412 Supports both list and dict semantics for pop(). If passed no 413 argument or an integer argument, it will use list semantics 414 and pop tokens from the list of parsed tokens. If passed a 415 non-integer argument (most likely a string), it will use dict 416 semantics and pop the corresponding value from any defined 417 results names. A second default return value argument is 418 supported, just as in dict.pop().""" 419 if not args: 420 args = [-1] 421 for k,v in kwargs.items(): 422 if k == 'default': 423 args = (args[0], v) 424 else: 425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 426 if (isinstance(args[0], int) or 427 len(args) == 1 or 428 args[0] in self): 429 index = args[0] 430 ret = self[index] 431 del self[index] 432 return ret 433 else: 434 defaultvalue = args[1] 435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no 439 such name, then returns the given C{defaultValue} or C{None} if no 440 C{defaultValue} is specified.""" 441 if key in self: 442 return self[key] 443 else: 444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens.""" 448 self.__toklist.insert(index, insStr) 449 # fixup indices in token dictionary 450 #~ for name in self.__tokdict: 451 #~ occurrences = self.__tokdict[name] 452 #~ for k, (value, position) in enumerate(occurrences): 453 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 454 for name,occurrences in self.__tokdict.items(): 455 for k, (value, position) in enumerate(occurrences): 456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
458 - def append( self, item ):
459 """Add single element to end of ParseResults list of elements.""" 460 self.__toklist.append(item)
461
462 - def extend( self, itemseq ):
463 """Add sequence of elements to end of ParseResults list of elements.""" 464 if isinstance(itemseq, ParseResults): 465 self += itemseq 466 else: 467 self.__toklist.extend(itemseq)
468
469 - def clear( self ):
470 """Clear all elements and results names.""" 471 del self.__toklist[:] 472 self.__tokdict.clear()
473
474 - def __getattr__( self, name ):
475 try: 476 return self[name] 477 except KeyError: 478 return "" 479 480 if name in self.__tokdict: 481 if name not in self.__accumNames: 482 return self.__tokdict[name][-1][0] 483 else: 484 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 485 else: 486 return ""
487
488 - def __add__( self, other ):
489 ret = self.copy() 490 ret += other 491 return ret
492
493 - def __iadd__( self, other ):
494 if other.__tokdict: 495 offset = len(self.__toklist) 496 addoffset = lambda a: offset if a<0 else a+offset 497 otheritems = other.__tokdict.items() 498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 499 for (k,vlist) in otheritems for v in vlist] 500 for k,v in otherdictitems: 501 self[k] = v 502 if isinstance(v[0],ParseResults): 503 v[0].__parent = wkref(self) 504 505 self.__toklist += other.__toklist 506 self.__accumNames.update( other.__accumNames ) 507 return self
508
509 - def __radd__(self, other):
510 if isinstance(other,int) and other == 0: 511 # useful for merging many ParseResults using sum() builtin 512 return self.copy() 513 else: 514 # this may raise a TypeError - so be it 515 return other + self
516
517 - def __repr__( self ):
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
520 - def __str__( self ):
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
523 - def _asStringList( self, sep='' ):
524 out = [] 525 for item in self.__toklist: 526 if out and sep: 527 out.append(sep) 528 if isinstance( item, ParseResults ): 529 out += item._asStringList() 530 else: 531 out.append( _ustr(item) ) 532 return out
533
534 - def asList( self ):
535 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
538 - def asDict( self ):
539 """Returns the named parse results as a nested dictionary.""" 540 if PY_3: 541 item_fn = self.items 542 else: 543 item_fn = self.iteritems 544 545 def toItem(obj): 546 if isinstance(obj, ParseResults): 547 if obj.haskeys(): 548 return obj.asDict() 549 else: 550 return [toItem(v) for v in obj] 551 else: 552 return obj
553 554 return dict((k,toItem(v)) for k,v in item_fn())
555
556 - def copy( self ):
557 """Returns a new copy of a C{ParseResults} object.""" 558 ret = ParseResults( self.__toklist ) 559 ret.__tokdict = self.__tokdict.copy() 560 ret.__parent = self.__parent 561 ret.__accumNames.update( self.__accumNames ) 562 ret.__name = self.__name 563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 567 nl = "\n" 568 out = [] 569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 570 for v in vlist) 571 nextLevelIndent = indent + " " 572 573 # collapse out indents if formatting is not desired 574 if not formatted: 575 indent = "" 576 nextLevelIndent = "" 577 nl = "" 578 579 selfTag = None 580 if doctag is not None: 581 selfTag = doctag 582 else: 583 if self.__name: 584 selfTag = self.__name 585 586 if not selfTag: 587 if namedItemsOnly: 588 return "" 589 else: 590 selfTag = "ITEM" 591 592 out += [ nl, indent, "<", selfTag, ">" ] 593 594 for i,res in enumerate(self.__toklist): 595 if isinstance(res,ParseResults): 596 if i in namedItems: 597 out += [ res.asXML(namedItems[i], 598 namedItemsOnly and doctag is None, 599 nextLevelIndent, 600 formatted)] 601 else: 602 out += [ res.asXML(None, 603 namedItemsOnly and doctag is None, 604 nextLevelIndent, 605 formatted)] 606 else: 607 # individual token, see if there is a name for it 608 resTag = None 609 if i in namedItems: 610 resTag = namedItems[i] 611 if not resTag: 612 if namedItemsOnly: 613 continue 614 else: 615 resTag = "ITEM" 616 xmlBodyText = _xml_escape(_ustr(res)) 617 out += [ nl, nextLevelIndent, "<", resTag, ">", 618 xmlBodyText, 619 "</", resTag, ">" ] 620 621 out += [ nl, indent, "</", selfTag, ">" ] 622 return "".join(out)
623
624 - def __lookup(self,sub):
625 for k,vlist in self.__tokdict.items(): 626 for v,loc in vlist: 627 if sub is v: 628 return k 629 return None
630
631 - def getName(self):
632 """Returns the results name for this token expression.""" 633 if self.__name: 634 return self.__name 635 elif self.__parent: 636 par = self.__parent() 637 if par: 638 return par.__lookup(self) 639 else: 640 return None 641 elif (len(self) == 1 and 642 len(self.__tokdict) == 1 and 643 self.__tokdict.values()[0][0][1] in (0,-1)): 644 return self.__tokdict.keys()[0] 645 else: 646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}. 650 Accepts an optional C{indent} argument so that this string can be embedded 651 in a nested display of other data.""" 652 out = [] 653 NL = '\n' 654 out.append( indent+_ustr(self.asList()) ) 655 if self.haskeys(): 656 items = sorted(self.items()) 657 for k,v in items: 658 if out: 659 out.append(NL) 660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 661 if isinstance(v,ParseResults): 662 if v: 663 out.append( v.dump(indent,depth+1) ) 664 else: 665 out.append(_ustr(v)) 666 else: 667 out.append(_ustr(v)) 668 elif any(isinstance(vv,ParseResults) for vv in self): 669 v = self 670 for i,vv in enumerate(v): 671 if isinstance(vv,ParseResults): 672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 673 else: 674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 675 676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module. 680 Accepts additional positional or keyword args as defined for the 681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 682 pprint.pprint(self.asList(), *args, **kwargs)
683 684 # add support for pickle protocol
685 - def __getstate__(self):
686 return ( self.__toklist, 687 ( self.__tokdict.copy(), 688 self.__parent is not None and self.__parent() or None, 689 self.__accumNames, 690 self.__name ) )
691
692 - def __setstate__(self,state):
693 self.__toklist = state[0] 694 (self.__tokdict, 695 par, 696 inAccumNames, 697 self.__name) = state[1] 698 self.__accumNames = {} 699 self.__accumNames.update(inAccumNames) 700 if par is not None: 701 self.__parent = wkref(par) 702 else: 703 self.__parent = None
704
705 - def __getnewargs__(self):
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
708 - def __dir__(self):
709 return (dir(type(self)) + list(self.keys()))
710 711 collections.MutableMapping.register(ParseResults)
712 713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators. 715 The first column is number 1. 716 717 Note: the default parsing behavior is to expand tabs in the input string 718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 720 consistent view of the parsed string, the parse location, and line and column 721 positions within the parsed string. 722 """ 723 s = strg 724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
726 -def lineno(loc,strg):
727 """Returns current line number within a string, counting newlines as line separators. 728 The first line is number 1. 729 730 Note: the default parsing behavior is to expand tabs in the input string 731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 733 consistent view of the parsed string, the parse location, and line and column 734 positions within the parsed string. 735 """ 736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators. 740 """ 741 lastCR = strg.rfind("\n", 0, loc) 742 nextCR = strg.find("\n", loc) 743 if nextCR >= 0: 744 return strg[lastCR+1:nextCR] 745 else: 746 return strg[lastCR+1:]
747
748 -def _defaultStartDebugAction( instring, loc, expr ):
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
751 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
754 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
755 print ("Exception raised:" + _ustr(exc))
756
757 -def nullDebugAction(*args):
758 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 759 pass
760 761 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 762 #~ 'decorator to trim function calls to match the arity of the target' 763 #~ def _trim_arity(func, maxargs=3): 764 #~ if func in singleArgBuiltins: 765 #~ return lambda s,l,t: func(t) 766 #~ limit = 0 767 #~ foundArity = False 768 #~ def wrapper(*args): 769 #~ nonlocal limit,foundArity 770 #~ while 1: 771 #~ try: 772 #~ ret = func(*args[limit:]) 773 #~ foundArity = True 774 #~ return ret 775 #~ except TypeError: 776 #~ if limit == maxargs or foundArity: 777 #~ raise 778 #~ limit += 1 779 #~ continue 780 #~ return wrapper 781 782 # this version is Python 2.x-3.x cross-compatible 783 'decorator to trim function calls to match the arity of the target'
784 -def _trim_arity(func, maxargs=2):
785 if func in singleArgBuiltins: 786 return lambda s,l,t: func(t) 787 limit = [0] 788 foundArity = [False] 789 790 if PY_3: 791 def extract_stack(): 792 frame_summary = traceback.extract_stack()[-3] 793 return [(frame_summary.filename, frame_summary.lineno)]
794 def extract_tb(tb): 795 frames = traceback.extract_tb(tb) 796 frame_summary = frames[-1] 797 return [(frame_summary.filename, frame_summary.lineno)] 798 else: 799 extract_stack = traceback.extract_stack 800 extract_tb = traceback.extract_tb 801 802 # synthesize what would be returned by traceback.extract_stack at the call to 803 # user's parse action 'func', so that we don't incur call penalty at parse time 804 805 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 806 # THE CALL TO FUNC INSIDE WRAPPER, THE CONSTANT ADDED TO 'this_line[1]' BELOW 807 # MUST BE MODIFIED!!!! 808 this_line = extract_stack()[-1] 809 pa_call_line_synth = (this_line[0], this_line[1]+6) 810 811 def wrapper(*args): 812 while 1: 813 try: 814 ret = func(*args[limit[0]:]) 815 foundArity[0] = True 816 return ret 817 except TypeError: 818 # re-raise TypeErrors if they did not come from our arity testing 819 if foundArity[0]: 820 raise 821 else: 822 try: 823 tb = sys.exc_info()[-1] 824 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 825 raise 826 finally: 827 del tb 828 829 if limit[0] <= maxargs: 830 limit[0] += 1 831 continue 832 raise 833 return wrapper 834
835 -class ParserElement(object):
836 """Abstract base level parser element class.""" 837 DEFAULT_WHITE_CHARS = " \n\t\r" 838 verbose_stacktrace = False 839 840 @staticmethod
841 - def setDefaultWhitespaceChars( chars ):
842 """Overrides the default whitespace chars 843 """ 844 ParserElement.DEFAULT_WHITE_CHARS = chars
845 846 @staticmethod
847 - def inlineLiteralsUsing(cls):
848 """ 849 Set class to be used for inclusion of string literals into a parser. 850 """ 851 ParserElement.literalStringClass = cls
852
853 - def __init__( self, savelist=False ):
854 self.parseAction = list() 855 self.failAction = None 856 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 857 self.strRepr = None 858 self.resultsName = None 859 self.saveAsList = savelist 860 self.skipWhitespace = True 861 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 862 self.copyDefaultWhiteChars = True 863 self.mayReturnEmpty = False # used when checking for left-recursion 864 self.keepTabs = False 865 self.ignoreExprs = list() 866 self.debug = False 867 self.streamlined = False 868 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 869 self.errmsg = "" 870 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 871 self.debugActions = ( None, None, None ) #custom debug actions 872 self.re = None 873 self.callPreparse = True # used to avoid redundant calls to preParse 874 self.callDuringTry = False
875
876 - def copy( self ):
877 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 878 for the same parsing pattern, using copies of the original parse element.""" 879 cpy = copy.copy( self ) 880 cpy.parseAction = self.parseAction[:] 881 cpy.ignoreExprs = self.ignoreExprs[:] 882 if self.copyDefaultWhiteChars: 883 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 884 return cpy
885
886 - def setName( self, name ):
887 """Define name for this expression, for use in debugging.""" 888 self.name = name 889 self.errmsg = "Expected " + self.name 890 if hasattr(self,"exception"): 891 self.exception.msg = self.errmsg 892 return self
893
894 - def setResultsName( self, name, listAllMatches=False ):
895 """Define name for referencing matching tokens as a nested attribute 896 of the returned parse results. 897 NOTE: this returns a *copy* of the original C{ParserElement} object; 898 this is so that the client can define a basic element, such as an 899 integer, and reference it in multiple places with different names. 900 901 You can also set results names using the abbreviated syntax, 902 C{expr("name")} in place of C{expr.setResultsName("name")} - 903 see L{I{__call__}<__call__>}. 904 """ 905 newself = self.copy() 906 if name.endswith("*"): 907 name = name[:-1] 908 listAllMatches=True 909 newself.resultsName = name 910 newself.modalResults = not listAllMatches 911 return newself
912
913 - def setBreak(self,breakFlag = True):
914 """Method to invoke the Python pdb debugger when this element is 915 about to be parsed. Set C{breakFlag} to True to enable, False to 916 disable. 917 """ 918 if breakFlag: 919 _parseMethod = self._parse 920 def breaker(instring, loc, doActions=True, callPreParse=True): 921 import pdb 922 pdb.set_trace() 923 return _parseMethod( instring, loc, doActions, callPreParse )
924 breaker._originalParseMethod = _parseMethod 925 self._parse = breaker 926 else: 927 if hasattr(self._parse,"_originalParseMethod"): 928 self._parse = self._parse._originalParseMethod 929 return self
930
931 - def setParseAction( self, *fns, **kwargs ):
932 """Define action to perform when successfully matching parse element definition. 933 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 934 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 935 - s = the original string being parsed (see note below) 936 - loc = the location of the matching substring 937 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 938 If the functions in fns modify the tokens, they can return them as the return 939 value from fn, and the modified list of tokens will replace the original. 940 Otherwise, fn does not need to return any value. 941 942 Note: the default parsing behavior is to expand tabs in the input string 943 before starting the parsing process. See L{I{parseString}<parseString>} for more information 944 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 945 consistent view of the parsed string, the parse location, and line and column 946 positions within the parsed string. 947 """ 948 self.parseAction = list(map(_trim_arity, list(fns))) 949 self.callDuringTry = kwargs.get("callDuringTry", False) 950 return self
951
952 - def addParseAction( self, *fns, **kwargs ):
953 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 954 self.parseAction += list(map(_trim_arity, list(fns))) 955 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 956 return self
957
958 - def addCondition(self, *fns, **kwargs):
959 """Add a boolean predicate function to expression's list of parse actions. See 960 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 961 be used to define a custom message to be used in the raised exception.""" 962 msg = kwargs.get("message") or "failed user-defined condition" 963 for fn in fns: 964 def pa(s,l,t): 965 if not bool(_trim_arity(fn)(s,l,t)): 966 raise ParseException(s,l,msg) 967 return t
968 self.parseAction.append(pa) 969 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 970 return self 971
972 - def setFailAction( self, fn ):
973 """Define action to perform if parsing fails at this expression. 974 Fail acton fn is a callable function that takes the arguments 975 C{fn(s,loc,expr,err)} where: 976 - s = string being parsed 977 - loc = location where expression match was attempted and failed 978 - expr = the parse expression that failed 979 - err = the exception thrown 980 The function returns no value. It may throw C{L{ParseFatalException}} 981 if it is desired to stop parsing immediately.""" 982 self.failAction = fn 983 return self
984
985 - def _skipIgnorables( self, instring, loc ):
986 exprsFound = True 987 while exprsFound: 988 exprsFound = False 989 for e in self.ignoreExprs: 990 try: 991 while 1: 992 loc,dummy = e._parse( instring, loc ) 993 exprsFound = True 994 except ParseException: 995 pass 996 return loc
997
998 - def preParse( self, instring, loc ):
999 if self.ignoreExprs: 1000 loc = self._skipIgnorables( instring, loc ) 1001 1002 if self.skipWhitespace: 1003 wt = self.whiteChars 1004 instrlen = len(instring) 1005 while loc < instrlen and instring[loc] in wt: 1006 loc += 1 1007 1008 return loc
1009
1010 - def parseImpl( self, instring, loc, doActions=True ):
1011 return loc, []
1012
1013 - def postParse( self, instring, loc, tokenlist ):
1014 return tokenlist
1015 1016 #~ @profile
1017 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1018 debugging = ( self.debug ) #and doActions ) 1019 1020 if debugging or self.failAction: 1021 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1022 if (self.debugActions[0] ): 1023 self.debugActions[0]( instring, loc, self ) 1024 if callPreParse and self.callPreparse: 1025 preloc = self.preParse( instring, loc ) 1026 else: 1027 preloc = loc 1028 tokensStart = preloc 1029 try: 1030 try: 1031 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1032 except IndexError: 1033 raise ParseException( instring, len(instring), self.errmsg, self ) 1034 except ParseBaseException as err: 1035 #~ print ("Exception raised:", err) 1036 if self.debugActions[2]: 1037 self.debugActions[2]( instring, tokensStart, self, err ) 1038 if self.failAction: 1039 self.failAction( instring, tokensStart, self, err ) 1040 raise 1041 else: 1042 if callPreParse and self.callPreparse: 1043 preloc = self.preParse( instring, loc ) 1044 else: 1045 preloc = loc 1046 tokensStart = preloc 1047 if self.mayIndexError or loc >= len(instring): 1048 try: 1049 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1050 except IndexError: 1051 raise ParseException( instring, len(instring), self.errmsg, self ) 1052 else: 1053 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1054 1055 tokens = self.postParse( instring, loc, tokens ) 1056 1057 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1058 if self.parseAction and (doActions or self.callDuringTry): 1059 if debugging: 1060 try: 1061 for fn in self.parseAction: 1062 tokens = fn( instring, tokensStart, retTokens ) 1063 if tokens is not None: 1064 retTokens = ParseResults( tokens, 1065 self.resultsName, 1066 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1067 modal=self.modalResults ) 1068 except ParseBaseException as err: 1069 #~ print "Exception raised in user parse action:", err 1070 if (self.debugActions[2] ): 1071 self.debugActions[2]( instring, tokensStart, self, err ) 1072 raise 1073 else: 1074 for fn in self.parseAction: 1075 tokens = fn( instring, tokensStart, retTokens ) 1076 if tokens is not None: 1077 retTokens = ParseResults( tokens, 1078 self.resultsName, 1079 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1080 modal=self.modalResults ) 1081 1082 if debugging: 1083 #~ print ("Matched",self,"->",retTokens.asList()) 1084 if (self.debugActions[1] ): 1085 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1086 1087 return loc, retTokens
1088
1089 - def tryParse( self, instring, loc ):
1090 try: 1091 return self._parse( instring, loc, doActions=False )[0] 1092 except ParseFatalException: 1093 raise ParseException( instring, loc, self.errmsg, self)
1094
1095 - def canParseNext(self, instring, loc):
1096 try: 1097 self.tryParse(instring, loc) 1098 except (ParseException, IndexError): 1099 return False 1100 else: 1101 return True
1102 1103 # this method gets repeatedly called during backtracking with the same arguments - 1104 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1105 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1106 lookup = (self,instring,loc,callPreParse,doActions) 1107 if lookup in ParserElement._exprArgCache: 1108 value = ParserElement._exprArgCache[ lookup ] 1109 if isinstance(value, Exception): 1110 raise value 1111 return (value[0],value[1].copy()) 1112 else: 1113 try: 1114 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1115 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1116 return value 1117 except ParseBaseException as pe: 1118 pe.__traceback__ = None 1119 ParserElement._exprArgCache[ lookup ] = pe 1120 raise
1121 1122 _parse = _parseNoCache 1123 1124 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1125 _exprArgCache = {} 1126 @staticmethod
1127 - def resetCache():
1128 ParserElement._exprArgCache.clear()
1129 1130 _packratEnabled = False 1131 @staticmethod
1132 - def enablePackrat():
1133 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1134 Repeated parse attempts at the same string location (which happens 1135 often in many complex grammars) can immediately return a cached value, 1136 instead of re-executing parsing/validating code. Memoizing is done of 1137 both valid results and parsing exceptions. 1138 1139 This speedup may break existing programs that use parse actions that 1140 have side-effects. For this reason, packrat parsing is disabled when 1141 you first import pyparsing. To activate the packrat feature, your 1142 program must call the class method C{ParserElement.enablePackrat()}. If 1143 your program uses C{psyco} to "compile as you go", you must call 1144 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1145 Python will crash. For best results, call C{enablePackrat()} immediately 1146 after importing pyparsing. 1147 """ 1148 if not ParserElement._packratEnabled: 1149 ParserElement._packratEnabled = True 1150 ParserElement._parse = ParserElement._parseCache
1151
1152 - def parseString( self, instring, parseAll=False ):
1153 """Execute the parse expression with the given string. 1154 This is the main interface to the client code, once the complete 1155 expression has been built. 1156 1157 If you want the grammar to require that the entire input string be 1158 successfully parsed, then set C{parseAll} to True (equivalent to ending 1159 the grammar with C{L{StringEnd()}}). 1160 1161 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1162 in order to report proper column numbers in parse actions. 1163 If the input string contains tabs and 1164 the grammar uses parse actions that use the C{loc} argument to index into the 1165 string being parsed, you can ensure you have a consistent view of the input 1166 string by: 1167 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1168 (see L{I{parseWithTabs}<parseWithTabs>}) 1169 - define your parse action using the full C{(s,loc,toks)} signature, and 1170 reference the input string using the parse action's C{s} argument 1171 - explictly expand the tabs in your input string before calling 1172 C{parseString} 1173 """ 1174 ParserElement.resetCache() 1175 if not self.streamlined: 1176 self.streamline() 1177 #~ self.saveAsList = True 1178 for e in self.ignoreExprs: 1179 e.streamline() 1180 if not self.keepTabs: 1181 instring = instring.expandtabs() 1182 try: 1183 loc, tokens = self._parse( instring, 0 ) 1184 if parseAll: 1185 loc = self.preParse( instring, loc ) 1186 se = Empty() + StringEnd() 1187 se._parse( instring, loc ) 1188 except ParseBaseException as exc: 1189 if ParserElement.verbose_stacktrace: 1190 raise 1191 else: 1192 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1193 raise exc 1194 else: 1195 return tokens
1196
1197 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1198 """Scan the input string for expression matches. Each match will return the 1199 matching tokens, start location, and end location. May be called with optional 1200 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1201 C{overlap} is specified, then overlapping matches will be reported. 1202 1203 Note that the start and end locations are reported relative to the string 1204 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1205 strings with embedded tabs.""" 1206 if not self.streamlined: 1207 self.streamline() 1208 for e in self.ignoreExprs: 1209 e.streamline() 1210 1211 if not self.keepTabs: 1212 instring = _ustr(instring).expandtabs() 1213 instrlen = len(instring) 1214 loc = 0 1215 preparseFn = self.preParse 1216 parseFn = self._parse 1217 ParserElement.resetCache() 1218 matches = 0 1219 try: 1220 while loc <= instrlen and matches < maxMatches: 1221 try: 1222 preloc = preparseFn( instring, loc ) 1223 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1224 except ParseException: 1225 loc = preloc+1 1226 else: 1227 if nextLoc > loc: 1228 matches += 1 1229 yield tokens, preloc, nextLoc 1230 if overlap: 1231 nextloc = preparseFn( instring, loc ) 1232 if nextloc > loc: 1233 loc = nextLoc 1234 else: 1235 loc += 1 1236 else: 1237 loc = nextLoc 1238 else: 1239 loc = preloc+1 1240 except ParseBaseException as exc: 1241 if ParserElement.verbose_stacktrace: 1242 raise 1243 else: 1244 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1245 raise exc
1246
1247 - def transformString( self, instring ):
1248 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1249 be returned from a parse action. To use C{transformString}, define a grammar and 1250 attach a parse action to it that modifies the returned token list. 1251 Invoking C{transformString()} on a target string will then scan for matches, 1252 and replace the matched text patterns according to the logic in the parse 1253 action. C{transformString()} returns the resulting transformed string.""" 1254 out = [] 1255 lastE = 0 1256 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1257 # keep string locs straight between transformString and scanString 1258 self.keepTabs = True 1259 try: 1260 for t,s,e in self.scanString( instring ): 1261 out.append( instring[lastE:s] ) 1262 if t: 1263 if isinstance(t,ParseResults): 1264 out += t.asList() 1265 elif isinstance(t,list): 1266 out += t 1267 else: 1268 out.append(t) 1269 lastE = e 1270 out.append(instring[lastE:]) 1271 out = [o for o in out if o] 1272 return "".join(map(_ustr,_flatten(out))) 1273 except ParseBaseException as exc: 1274 if ParserElement.verbose_stacktrace: 1275 raise 1276 else: 1277 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1278 raise exc
1279
1280 - def searchString( self, instring, maxMatches=_MAX_INT ):
1281 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1282 to match the given parse expression. May be called with optional 1283 C{maxMatches} argument, to clip searching after 'n' matches are found. 1284 """ 1285 try: 1286 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1287 except ParseBaseException as exc: 1288 if ParserElement.verbose_stacktrace: 1289 raise 1290 else: 1291 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1292 raise exc
1293
1294 - def __add__(self, other ):
1295 """Implementation of + operator - returns C{L{And}}""" 1296 if isinstance( other, basestring ): 1297 other = ParserElement.literalStringClass( other ) 1298 if not isinstance( other, ParserElement ): 1299 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1300 SyntaxWarning, stacklevel=2) 1301 return None 1302 return And( [ self, other ] )
1303
1304 - def __radd__(self, other ):
1305 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1306 if isinstance( other, basestring ): 1307 other = ParserElement.literalStringClass( other ) 1308 if not isinstance( other, ParserElement ): 1309 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1310 SyntaxWarning, stacklevel=2) 1311 return None 1312 return other + self
1313
1314 - def __sub__(self, other):
1315 """Implementation of - operator, returns C{L{And}} with error stop""" 1316 if isinstance( other, basestring ): 1317 other = ParserElement.literalStringClass( other ) 1318 if not isinstance( other, ParserElement ): 1319 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1320 SyntaxWarning, stacklevel=2) 1321 return None 1322 return And( [ self, And._ErrorStop(), other ] )
1323
1324 - def __rsub__(self, other ):
1325 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1326 if isinstance( other, basestring ): 1327 other = ParserElement.literalStringClass( other ) 1328 if not isinstance( other, ParserElement ): 1329 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1330 SyntaxWarning, stacklevel=2) 1331 return None 1332 return other - self
1333
1334 - def __mul__(self,other):
1335 """Implementation of * operator, allows use of C{expr * 3} in place of 1336 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1337 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1338 may also include C{None} as in: 1339 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1340 to C{expr*n + L{ZeroOrMore}(expr)} 1341 (read as "at least n instances of C{expr}") 1342 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1343 (read as "0 to n instances of C{expr}") 1344 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1345 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1346 1347 Note that C{expr*(None,n)} does not raise an exception if 1348 more than n exprs exist in the input stream; that is, 1349 C{expr*(None,n)} does not enforce a maximum number of expr 1350 occurrences. If this behavior is desired, then write 1351 C{expr*(None,n) + ~expr} 1352 1353 """ 1354 if isinstance(other,int): 1355 minElements, optElements = other,0 1356 elif isinstance(other,tuple): 1357 other = (other + (None, None))[:2] 1358 if other[0] is None: 1359 other = (0, other[1]) 1360 if isinstance(other[0],int) and other[1] is None: 1361 if other[0] == 0: 1362 return ZeroOrMore(self) 1363 if other[0] == 1: 1364 return OneOrMore(self) 1365 else: 1366 return self*other[0] + ZeroOrMore(self) 1367 elif isinstance(other[0],int) and isinstance(other[1],int): 1368 minElements, optElements = other 1369 optElements -= minElements 1370 else: 1371 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1372 else: 1373 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1374 1375 if minElements < 0: 1376 raise ValueError("cannot multiply ParserElement by negative value") 1377 if optElements < 0: 1378 raise ValueError("second tuple value must be greater or equal to first tuple value") 1379 if minElements == optElements == 0: 1380 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1381 1382 if (optElements): 1383 def makeOptionalList(n): 1384 if n>1: 1385 return Optional(self + makeOptionalList(n-1)) 1386 else: 1387 return Optional(self)
1388 if minElements: 1389 if minElements == 1: 1390 ret = self + makeOptionalList(optElements) 1391 else: 1392 ret = And([self]*minElements) + makeOptionalList(optElements) 1393 else: 1394 ret = makeOptionalList(optElements) 1395 else: 1396 if minElements == 1: 1397 ret = self 1398 else: 1399 ret = And([self]*minElements) 1400 return ret 1401
1402 - def __rmul__(self, other):
1403 return self.__mul__(other)
1404
1405 - def __or__(self, other ):
1406 """Implementation of | operator - returns C{L{MatchFirst}}""" 1407 if isinstance( other, basestring ): 1408 other = ParserElement.literalStringClass( other ) 1409 if not isinstance( other, ParserElement ): 1410 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1411 SyntaxWarning, stacklevel=2) 1412 return None 1413 return MatchFirst( [ self, other ] )
1414
1415 - def __ror__(self, other ):
1416 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1417 if isinstance( other, basestring ): 1418 other = ParserElement.literalStringClass( other ) 1419 if not isinstance( other, ParserElement ): 1420 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1421 SyntaxWarning, stacklevel=2) 1422 return None 1423 return other | self
1424
1425 - def __xor__(self, other ):
1426 """Implementation of ^ operator - returns C{L{Or}}""" 1427 if isinstance( other, basestring ): 1428 other = ParserElement.literalStringClass( other ) 1429 if not isinstance( other, ParserElement ): 1430 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1431 SyntaxWarning, stacklevel=2) 1432 return None 1433 return Or( [ self, other ] )
1434
1435 - def __rxor__(self, other ):
1436 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1437 if isinstance( other, basestring ): 1438 other = ParserElement.literalStringClass( other ) 1439 if not isinstance( other, ParserElement ): 1440 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1441 SyntaxWarning, stacklevel=2) 1442 return None 1443 return other ^ self
1444
1445 - def __and__(self, other ):
1446 """Implementation of & operator - returns C{L{Each}}""" 1447 if isinstance( other, basestring ): 1448 other = ParserElement.literalStringClass( other ) 1449 if not isinstance( other, ParserElement ): 1450 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1451 SyntaxWarning, stacklevel=2) 1452 return None 1453 return Each( [ self, other ] )
1454
1455 - def __rand__(self, other ):
1456 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1457 if isinstance( other, basestring ): 1458 other = ParserElement.literalStringClass( other ) 1459 if not isinstance( other, ParserElement ): 1460 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1461 SyntaxWarning, stacklevel=2) 1462 return None 1463 return other & self
1464
1465 - def __invert__( self ):
1466 """Implementation of ~ operator - returns C{L{NotAny}}""" 1467 return NotAny( self )
1468
1469 - def __call__(self, name=None):
1470 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1471 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1472 could be written as:: 1473 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1474 1475 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1476 passed as C{True}. 1477 1478 If C{name} is omitted, same as calling C{L{copy}}. 1479 """ 1480 if name is not None: 1481 return self.setResultsName(name) 1482 else: 1483 return self.copy()
1484
1485 - def suppress( self ):
1486 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1487 cluttering up returned output. 1488 """ 1489 return Suppress( self )
1490
1491 - def leaveWhitespace( self ):
1492 """Disables the skipping of whitespace before matching the characters in the 1493 C{ParserElement}'s defined pattern. This is normally only used internally by 1494 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1495 """ 1496 self.skipWhitespace = False 1497 return self
1498
1499 - def setWhitespaceChars( self, chars ):
1500 """Overrides the default whitespace chars 1501 """ 1502 self.skipWhitespace = True 1503 self.whiteChars = chars 1504 self.copyDefaultWhiteChars = False 1505 return self
1506
1507 - def parseWithTabs( self ):
1508 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1509 Must be called before C{parseString} when the input grammar contains elements that 1510 match C{<TAB>} characters.""" 1511 self.keepTabs = True 1512 return self
1513
1514 - def ignore( self, other ):
1515 """Define expression to be ignored (e.g., comments) while doing pattern 1516 matching; may be called repeatedly, to define multiple comment or other 1517 ignorable patterns. 1518 """ 1519 if isinstance(other, basestring): 1520 other = Suppress(other) 1521 1522 if isinstance( other, Suppress ): 1523 if other not in self.ignoreExprs: 1524 self.ignoreExprs.append(other) 1525 else: 1526 self.ignoreExprs.append( Suppress( other.copy() ) ) 1527 return self
1528
1529 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1530 """Enable display of debugging messages while doing pattern matching.""" 1531 self.debugActions = (startAction or _defaultStartDebugAction, 1532 successAction or _defaultSuccessDebugAction, 1533 exceptionAction or _defaultExceptionDebugAction) 1534 self.debug = True 1535 return self
1536
1537 - def setDebug( self, flag=True ):
1538 """Enable display of debugging messages while doing pattern matching. 1539 Set C{flag} to True to enable, False to disable.""" 1540 if flag: 1541 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1542 else: 1543 self.debug = False 1544 return self
1545
1546 - def __str__( self ):
1547 return self.name
1548
1549 - def __repr__( self ):
1550 return _ustr(self)
1551
1552 - def streamline( self ):
1553 self.streamlined = True 1554 self.strRepr = None 1555 return self
1556
1557 - def checkRecursion( self, parseElementList ):
1558 pass
1559
1560 - def validate( self, validateTrace=[] ):
1561 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1562 self.checkRecursion( [] )
1563
1564 - def parseFile( self, file_or_filename, parseAll=False ):
1565 """Execute the parse expression on the given file or filename. 1566 If a filename is specified (instead of a file object), 1567 the entire file is opened, read, and closed before parsing. 1568 """ 1569 try: 1570 file_contents = file_or_filename.read() 1571 except AttributeError: 1572 f = open(file_or_filename, "r") 1573 file_contents = f.read() 1574 f.close() 1575 try: 1576 return self.parseString(file_contents, parseAll) 1577 except ParseBaseException as exc: 1578 if ParserElement.verbose_stacktrace: 1579 raise 1580 else: 1581 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1582 raise exc
1583
1584 - def __eq__(self,other):
1585 if isinstance(other, ParserElement): 1586 return self is other or vars(self) == vars(other) 1587 elif isinstance(other, basestring): 1588 try: 1589 self.parseString(_ustr(other), parseAll=True) 1590 return True 1591 except ParseBaseException: 1592 return False 1593 else: 1594 return super(ParserElement,self)==other
1595
1596 - def __ne__(self,other):
1597 return not (self == other)
1598
1599 - def __hash__(self):
1600 return hash(id(self))
1601
1602 - def __req__(self,other):
1603 return self == other
1604
1605 - def __rne__(self,other):
1606 return not (self == other)
1607
1608 - def runTests(self, tests, parseAll=False):
1609 """Execute the parse expression on a series of test strings, showing each 1610 test, the parsed results or where the parse failed. Quick and easy way to 1611 run a parse expression against a list of sample strings. 1612 1613 Parameters: 1614 - tests - a list of separate test strings, or a multiline string of test strings 1615 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1616 """ 1617 if isinstance(tests, basestring): 1618 tests = map(str.strip, tests.splitlines()) 1619 for t in tests: 1620 out = [t] 1621 try: 1622 out.append(self.parseString(t, parseAll=parseAll).dump()) 1623 except ParseException as pe: 1624 if '\n' in t: 1625 out.append(line(pe.loc, t)) 1626 out.append(' '*(col(pe.loc,t)-1) + '^') 1627 else: 1628 out.append(' '*pe.loc + '^') 1629 out.append(str(pe)) 1630 out.append('') 1631 print('\n'.join(out))
1632
1633 1634 -class Token(ParserElement):
1635 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1636 - def __init__( self ):
1637 super(Token,self).__init__( savelist=False )
1638
1639 1640 -class Empty(Token):
1641 """An empty token, will always match."""
1642 - def __init__( self ):
1643 super(Empty,self).__init__() 1644 self.name = "Empty" 1645 self.mayReturnEmpty = True 1646 self.mayIndexError = False
1647
1648 1649 -class NoMatch(Token):
1650 """A token that will never match."""
1651 - def __init__( self ):
1652 super(NoMatch,self).__init__() 1653 self.name = "NoMatch" 1654 self.mayReturnEmpty = True 1655 self.mayIndexError = False 1656 self.errmsg = "Unmatchable token"
1657
1658 - def parseImpl( self, instring, loc, doActions=True ):
1659 raise ParseException(instring, loc, self.errmsg, self)
1660
1661 1662 -class Literal(Token):
1663 """Token to exactly match a specified string."""
1664 - def __init__( self, matchString ):
1665 super(Literal,self).__init__() 1666 self.match = matchString 1667 self.matchLen = len(matchString) 1668 try: 1669 self.firstMatchChar = matchString[0] 1670 except IndexError: 1671 warnings.warn("null string passed to Literal; use Empty() instead", 1672 SyntaxWarning, stacklevel=2) 1673 self.__class__ = Empty 1674 self.name = '"%s"' % _ustr(self.match) 1675 self.errmsg = "Expected " + self.name 1676 self.mayReturnEmpty = False 1677 self.mayIndexError = False
1678 1679 # Performance tuning: this routine gets called a *lot* 1680 # if this is a single character match string and the first character matches, 1681 # short-circuit as quickly as possible, and avoid calling startswith 1682 #~ @profile
1683 - def parseImpl( self, instring, loc, doActions=True ):
1684 if (instring[loc] == self.firstMatchChar and 1685 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1686 return loc+self.matchLen, self.match 1687 raise ParseException(instring, loc, self.errmsg, self)
1688 _L = Literal 1689 ParserElement.literalStringClass = Literal
1690 1691 -class Keyword(Token):
1692 """Token to exactly match a specified string as a keyword, that is, it must be 1693 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1694 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1695 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1696 Accepts two optional constructor arguments in addition to the keyword string: 1697 C{identChars} is a string of characters that would be valid identifier characters, 1698 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1699 matching, default is C{False}. 1700 """ 1701 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1702
1703 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1704 super(Keyword,self).__init__() 1705 self.match = matchString 1706 self.matchLen = len(matchString) 1707 try: 1708 self.firstMatchChar = matchString[0] 1709 except IndexError: 1710 warnings.warn("null string passed to Keyword; use Empty() instead", 1711 SyntaxWarning, stacklevel=2) 1712 self.name = '"%s"' % self.match 1713 self.errmsg = "Expected " + self.name 1714 self.mayReturnEmpty = False 1715 self.mayIndexError = False 1716 self.caseless = caseless 1717 if caseless: 1718 self.caselessmatch = matchString.upper() 1719 identChars = identChars.upper() 1720 self.identChars = set(identChars)
1721
1722 - def parseImpl( self, instring, loc, doActions=True ):
1723 if self.caseless: 1724 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1725 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1726 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1727 return loc+self.matchLen, self.match 1728 else: 1729 if (instring[loc] == self.firstMatchChar and 1730 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1731 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1732 (loc == 0 or instring[loc-1] not in self.identChars) ): 1733 return loc+self.matchLen, self.match 1734 raise ParseException(instring, loc, self.errmsg, self)
1735
1736 - def copy(self):
1737 c = super(Keyword,self).copy() 1738 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1739 return c
1740 1741 @staticmethod
1742 - def setDefaultKeywordChars( chars ):
1743 """Overrides the default Keyword chars 1744 """ 1745 Keyword.DEFAULT_KEYWORD_CHARS = chars
1746
1747 -class CaselessLiteral(Literal):
1748 """Token to match a specified string, ignoring case of letters. 1749 Note: the matched results will always be in the case of the given 1750 match string, NOT the case of the input text. 1751 """
1752 - def __init__( self, matchString ):
1753 super(CaselessLiteral,self).__init__( matchString.upper() ) 1754 # Preserve the defining literal. 1755 self.returnString = matchString 1756 self.name = "'%s'" % self.returnString 1757 self.errmsg = "Expected " + self.name
1758
1759 - def parseImpl( self, instring, loc, doActions=True ):
1760 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1761 return loc+self.matchLen, self.returnString 1762 raise ParseException(instring, loc, self.errmsg, self)
1763
1764 -class CaselessKeyword(Keyword):
1765 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1766 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1767
1768 - def parseImpl( self, instring, loc, doActions=True ):
1769 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1770 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1771 return loc+self.matchLen, self.match 1772 raise ParseException(instring, loc, self.errmsg, self)
1773
1774 -class Word(Token):
1775 """Token for matching words composed of allowed character sets. 1776 Defined with string containing all allowed initial characters, 1777 an optional string containing allowed body characters (if omitted, 1778 defaults to the initial character set), and an optional minimum, 1779 maximum, and/or exact length. The default value for C{min} is 1 (a 1780 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1781 are 0, meaning no maximum or exact length restriction. An optional 1782 C{excludeChars} parameter can list characters that might be found in 1783 the input C{bodyChars} string; useful to define a word of all printables 1784 except for one or two characters, for instance. 1785 """
1786 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1787 super(Word,self).__init__() 1788 if excludeChars: 1789 initChars = ''.join(c for c in initChars if c not in excludeChars) 1790 if bodyChars: 1791 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1792 self.initCharsOrig = initChars 1793 self.initChars = set(initChars) 1794 if bodyChars : 1795 self.bodyCharsOrig = bodyChars 1796 self.bodyChars = set(bodyChars) 1797 else: 1798 self.bodyCharsOrig = initChars 1799 self.bodyChars = set(initChars) 1800 1801 self.maxSpecified = max > 0 1802 1803 if min < 1: 1804 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1805 1806 self.minLen = min 1807 1808 if max > 0: 1809 self.maxLen = max 1810 else: 1811 self.maxLen = _MAX_INT 1812 1813 if exact > 0: 1814 self.maxLen = exact 1815 self.minLen = exact 1816 1817 self.name = _ustr(self) 1818 self.errmsg = "Expected " + self.name 1819 self.mayIndexError = False 1820 self.asKeyword = asKeyword 1821 1822 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1823 if self.bodyCharsOrig == self.initCharsOrig: 1824 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1825 elif len(self.initCharsOrig) == 1: 1826 self.reString = "%s[%s]*" % \ 1827 (re.escape(self.initCharsOrig), 1828 _escapeRegexRangeChars(self.bodyCharsOrig),) 1829 else: 1830 self.reString = "[%s][%s]*" % \ 1831 (_escapeRegexRangeChars(self.initCharsOrig), 1832 _escapeRegexRangeChars(self.bodyCharsOrig),) 1833 if self.asKeyword: 1834 self.reString = r"\b"+self.reString+r"\b" 1835 try: 1836 self.re = re.compile( self.reString ) 1837 except: 1838 self.re = None
1839
1840 - def parseImpl( self, instring, loc, doActions=True ):
1841 if self.re: 1842 result = self.re.match(instring,loc) 1843 if not result: 1844 raise ParseException(instring, loc, self.errmsg, self) 1845 1846 loc = result.end() 1847 return loc, result.group() 1848 1849 if not(instring[ loc ] in self.initChars): 1850 raise ParseException(instring, loc, self.errmsg, self) 1851 1852 start = loc 1853 loc += 1 1854 instrlen = len(instring) 1855 bodychars = self.bodyChars 1856 maxloc = start + self.maxLen 1857 maxloc = min( maxloc, instrlen ) 1858 while loc < maxloc and instring[loc] in bodychars: 1859 loc += 1 1860 1861 throwException = False 1862 if loc - start < self.minLen: 1863 throwException = True 1864 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1865 throwException = True 1866 if self.asKeyword: 1867 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1868 throwException = True 1869 1870 if throwException: 1871 raise ParseException(instring, loc, self.errmsg, self) 1872 1873 return loc, instring[start:loc]
1874
1875 - def __str__( self ):
1876 try: 1877 return super(Word,self).__str__() 1878 except: 1879 pass 1880 1881 1882 if self.strRepr is None: 1883 1884 def charsAsStr(s): 1885 if len(s)>4: 1886 return s[:4]+"..." 1887 else: 1888 return s
1889 1890 if ( self.initCharsOrig != self.bodyCharsOrig ): 1891 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1892 else: 1893 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1894 1895 return self.strRepr
1896
1897 1898 -class Regex(Token):
1899 """Token for matching strings that match a given regular expression. 1900 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1901 """ 1902 compiledREtype = type(re.compile("[A-Z]"))
1903 - def __init__( self, pattern, flags=0):
1904 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1905 super(Regex,self).__init__() 1906 1907 if isinstance(pattern, basestring): 1908 if not pattern: 1909 warnings.warn("null string passed to Regex; use Empty() instead", 1910 SyntaxWarning, stacklevel=2) 1911 1912 self.pattern = pattern 1913 self.flags = flags 1914 1915 try: 1916 self.re = re.compile(self.pattern, self.flags) 1917 self.reString = self.pattern 1918 except sre_constants.error: 1919 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1920 SyntaxWarning, stacklevel=2) 1921 raise 1922 1923 elif isinstance(pattern, Regex.compiledREtype): 1924 self.re = pattern 1925 self.pattern = \ 1926 self.reString = str(pattern) 1927 self.flags = flags 1928 1929 else: 1930 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1931 1932 self.name = _ustr(self) 1933 self.errmsg = "Expected " + self.name 1934 self.mayIndexError = False 1935 self.mayReturnEmpty = True
1936
1937 - def parseImpl( self, instring, loc, doActions=True ):
1938 result = self.re.match(instring,loc) 1939 if not result: 1940 raise ParseException(instring, loc, self.errmsg, self) 1941 1942 loc = result.end() 1943 d = result.groupdict() 1944 ret = ParseResults(result.group()) 1945 if d: 1946 for k in d: 1947 ret[k] = d[k] 1948 return loc,ret
1949
1950 - def __str__( self ):
1951 try: 1952 return super(Regex,self).__str__() 1953 except: 1954 pass 1955 1956 if self.strRepr is None: 1957 self.strRepr = "Re:(%s)" % repr(self.pattern) 1958 1959 return self.strRepr
1960
1961 1962 -class QuotedString(Token):
1963 """Token for matching strings that are delimited by quoting characters. 1964 """
1965 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1966 r"""Defined with the following parameters: 1967 - quoteChar - string of one or more characters defining the quote delimiting string 1968 - escChar - character to escape quotes, typically backslash (default=None) 1969 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1970 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1971 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1972 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1973 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 1974 """ 1975 super(QuotedString,self).__init__() 1976 1977 # remove white space from quote chars - wont work anyway 1978 quoteChar = quoteChar.strip() 1979 if not quoteChar: 1980 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1981 raise SyntaxError() 1982 1983 if endQuoteChar is None: 1984 endQuoteChar = quoteChar 1985 else: 1986 endQuoteChar = endQuoteChar.strip() 1987 if not endQuoteChar: 1988 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1989 raise SyntaxError() 1990 1991 self.quoteChar = quoteChar 1992 self.quoteCharLen = len(quoteChar) 1993 self.firstQuoteChar = quoteChar[0] 1994 self.endQuoteChar = endQuoteChar 1995 self.endQuoteCharLen = len(endQuoteChar) 1996 self.escChar = escChar 1997 self.escQuote = escQuote 1998 self.unquoteResults = unquoteResults 1999 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2000 2001 if multiline: 2002 self.flags = re.MULTILINE | re.DOTALL 2003 self.pattern = r'%s(?:[^%s%s]' % \ 2004 ( re.escape(self.quoteChar), 2005 _escapeRegexRangeChars(self.endQuoteChar[0]), 2006 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2007 else: 2008 self.flags = 0 2009 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2010 ( re.escape(self.quoteChar), 2011 _escapeRegexRangeChars(self.endQuoteChar[0]), 2012 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2013 if len(self.endQuoteChar) > 1: 2014 self.pattern += ( 2015 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2016 _escapeRegexRangeChars(self.endQuoteChar[i])) 2017 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2018 ) 2019 if escQuote: 2020 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2021 if escChar: 2022 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2023 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2024 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2025 2026 try: 2027 self.re = re.compile(self.pattern, self.flags) 2028 self.reString = self.pattern 2029 except sre_constants.error: 2030 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2031 SyntaxWarning, stacklevel=2) 2032 raise 2033 2034 self.name = _ustr(self) 2035 self.errmsg = "Expected " + self.name 2036 self.mayIndexError = False 2037 self.mayReturnEmpty = True
2038
2039 - def parseImpl( self, instring, loc, doActions=True ):
2040 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2041 if not result: 2042 raise ParseException(instring, loc, self.errmsg, self) 2043 2044 loc = result.end() 2045 ret = result.group() 2046 2047 if self.unquoteResults: 2048 2049 # strip off quotes 2050 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2051 2052 if isinstance(ret,basestring): 2053 # replace escaped whitespace 2054 if '\\' in ret and self.convertWhitespaceEscapes: 2055 ws_map = { 2056 r'\t' : '\t', 2057 r'\n' : '\n', 2058 r'\f' : '\f', 2059 r'\r' : '\r', 2060 } 2061 for wslit,wschar in ws_map.items(): 2062 ret = ret.replace(wslit, wschar) 2063 2064 # replace escaped characters 2065 if self.escChar: 2066 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2067 2068 # replace escaped quotes 2069 if self.escQuote: 2070 ret = ret.replace(self.escQuote, self.endQuoteChar) 2071 2072 return loc, ret
2073
2074 - def __str__( self ):
2075 try: 2076 return super(QuotedString,self).__str__() 2077 except: 2078 pass 2079 2080 if self.strRepr is None: 2081 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2082 2083 return self.strRepr
2084
2085 2086 -class CharsNotIn(Token):
2087 """Token for matching words composed of characters *not* in a given set. 2088 Defined with string containing all disallowed characters, and an optional 2089 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2090 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2091 are 0, meaning no maximum or exact length restriction. 2092 """
2093 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2094 super(CharsNotIn,self).__init__() 2095 self.skipWhitespace = False 2096 self.notChars = notChars 2097 2098 if min < 1: 2099 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2100 2101 self.minLen = min 2102 2103 if max > 0: 2104 self.maxLen = max 2105 else: 2106 self.maxLen = _MAX_INT 2107 2108 if exact > 0: 2109 self.maxLen = exact 2110 self.minLen = exact 2111 2112 self.name = _ustr(self) 2113 self.errmsg = "Expected " + self.name 2114 self.mayReturnEmpty = ( self.minLen == 0 ) 2115 self.mayIndexError = False
2116
2117 - def parseImpl( self, instring, loc, doActions=True ):
2118 if instring[loc] in self.notChars: 2119 raise ParseException(instring, loc, self.errmsg, self) 2120 2121 start = loc 2122 loc += 1 2123 notchars = self.notChars 2124 maxlen = min( start+self.maxLen, len(instring) ) 2125 while loc < maxlen and \ 2126 (instring[loc] not in notchars): 2127 loc += 1 2128 2129 if loc - start < self.minLen: 2130 raise ParseException(instring, loc, self.errmsg, self) 2131 2132 return loc, instring[start:loc]
2133
2134 - def __str__( self ):
2135 try: 2136 return super(CharsNotIn, self).__str__() 2137 except: 2138 pass 2139 2140 if self.strRepr is None: 2141 if len(self.notChars) > 4: 2142 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2143 else: 2144 self.strRepr = "!W:(%s)" % self.notChars 2145 2146 return self.strRepr
2147
2148 -class White(Token):
2149 """Special matching class for matching whitespace. Normally, whitespace is ignored 2150 by pyparsing grammars. This class is included when some whitespace structures 2151 are significant. Define with a string containing the whitespace characters to be 2152 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2153 as defined for the C{L{Word}} class.""" 2154 whiteStrs = { 2155 " " : "<SPC>", 2156 "\t": "<TAB>", 2157 "\n": "<LF>", 2158 "\r": "<CR>", 2159 "\f": "<FF>", 2160 }
2161 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2162 super(White,self).__init__() 2163 self.matchWhite = ws 2164 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2165 #~ self.leaveWhitespace() 2166 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2167 self.mayReturnEmpty = True 2168 self.errmsg = "Expected " + self.name 2169 2170 self.minLen = min 2171 2172 if max > 0: 2173 self.maxLen = max 2174 else: 2175 self.maxLen = _MAX_INT 2176 2177 if exact > 0: 2178 self.maxLen = exact 2179 self.minLen = exact
2180
2181 - def parseImpl( self, instring, loc, doActions=True ):
2182 if not(instring[ loc ] in self.matchWhite): 2183 raise ParseException(instring, loc, self.errmsg, self) 2184 start = loc 2185 loc += 1 2186 maxloc = start + self.maxLen 2187 maxloc = min( maxloc, len(instring) ) 2188 while loc < maxloc and instring[loc] in self.matchWhite: 2189 loc += 1 2190 2191 if loc - start < self.minLen: 2192 raise ParseException(instring, loc, self.errmsg, self) 2193 2194 return loc, instring[start:loc]
2195
2196 2197 -class _PositionToken(Token):
2198 - def __init__( self ):
2199 super(_PositionToken,self).__init__() 2200 self.name=self.__class__.__name__ 2201 self.mayReturnEmpty = True 2202 self.mayIndexError = False
2203
2204 -class GoToColumn(_PositionToken):
2205 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2206 - def __init__( self, colno ):
2207 super(GoToColumn,self).__init__() 2208 self.col = colno
2209
2210 - def preParse( self, instring, loc ):
2211 if col(loc,instring) != self.col: 2212 instrlen = len(instring) 2213 if self.ignoreExprs: 2214 loc = self._skipIgnorables( instring, loc ) 2215 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2216 loc += 1 2217 return loc
2218
2219 - def parseImpl( self, instring, loc, doActions=True ):
2220 thiscol = col( loc, instring ) 2221 if thiscol > self.col: 2222 raise ParseException( instring, loc, "Text not in expected column", self ) 2223 newloc = loc + self.col - thiscol 2224 ret = instring[ loc: newloc ] 2225 return newloc, ret
2226
2227 -class LineStart(_PositionToken):
2228 """Matches if current position is at the beginning of a line within the parse string"""
2229 - def __init__( self ):
2230 super(LineStart,self).__init__() 2231 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2232 self.errmsg = "Expected start of line"
2233
2234 - def preParse( self, instring, loc ):
2235 preloc = super(LineStart,self).preParse(instring,loc) 2236 if instring[preloc] == "\n": 2237 loc += 1 2238 return loc
2239
2240 - def parseImpl( self, instring, loc, doActions=True ):
2241 if not( loc==0 or 2242 (loc == self.preParse( instring, 0 )) or 2243 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2244 raise ParseException(instring, loc, self.errmsg, self) 2245 return loc, []
2246
2247 -class LineEnd(_PositionToken):
2248 """Matches if current position is at the end of a line within the parse string"""
2249 - def __init__( self ):
2250 super(LineEnd,self).__init__() 2251 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2252 self.errmsg = "Expected end of line"
2253
2254 - def parseImpl( self, instring, loc, doActions=True ):
2255 if loc<len(instring): 2256 if instring[loc] == "\n": 2257 return loc+1, "\n" 2258 else: 2259 raise ParseException(instring, loc, self.errmsg, self) 2260 elif loc == len(instring): 2261 return loc+1, [] 2262 else: 2263 raise ParseException(instring, loc, self.errmsg, self)
2264
2265 -class StringStart(_PositionToken):
2266 """Matches if current position is at the beginning of the parse string"""
2267 - def __init__( self ):
2268 super(StringStart,self).__init__() 2269 self.errmsg = "Expected start of text"
2270
2271 - def parseImpl( self, instring, loc, doActions=True ):
2272 if loc != 0: 2273 # see if entire string up to here is just whitespace and ignoreables 2274 if loc != self.preParse( instring, 0 ): 2275 raise ParseException(instring, loc, self.errmsg, self) 2276 return loc, []
2277
2278 -class StringEnd(_PositionToken):
2279 """Matches if current position is at the end of the parse string"""
2280 - def __init__( self ):
2281 super(StringEnd,self).__init__() 2282 self.errmsg = "Expected end of text"
2283
2284 - def parseImpl( self, instring, loc, doActions=True ):
2285 if loc < len(instring): 2286 raise ParseException(instring, loc, self.errmsg, self) 2287 elif loc == len(instring): 2288 return loc+1, [] 2289 elif loc > len(instring): 2290 return loc, [] 2291 else: 2292 raise ParseException(instring, loc, self.errmsg, self)
2293
2294 -class WordStart(_PositionToken):
2295 """Matches if the current position is at the beginning of a Word, and 2296 is not preceded by any character in a given set of C{wordChars} 2297 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2298 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2299 the string being parsed, or at the beginning of a line. 2300 """
2301 - def __init__(self, wordChars = printables):
2302 super(WordStart,self).__init__() 2303 self.wordChars = set(wordChars) 2304 self.errmsg = "Not at the start of a word"
2305
2306 - def parseImpl(self, instring, loc, doActions=True ):
2307 if loc != 0: 2308 if (instring[loc-1] in self.wordChars or 2309 instring[loc] not in self.wordChars): 2310 raise ParseException(instring, loc, self.errmsg, self) 2311 return loc, []
2312
2313 -class WordEnd(_PositionToken):
2314 """Matches if the current position is at the end of a Word, and 2315 is not followed by any character in a given set of C{wordChars} 2316 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2317 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2318 the string being parsed, or at the end of a line. 2319 """
2320 - def __init__(self, wordChars = printables):
2321 super(WordEnd,self).__init__() 2322 self.wordChars = set(wordChars) 2323 self.skipWhitespace = False 2324 self.errmsg = "Not at the end of a word"
2325
2326 - def parseImpl(self, instring, loc, doActions=True ):
2327 instrlen = len(instring) 2328 if instrlen>0 and loc<instrlen: 2329 if (instring[loc] in self.wordChars or 2330 instring[loc-1] not in self.wordChars): 2331 raise ParseException(instring, loc, self.errmsg, self) 2332 return loc, []
2333
2334 2335 -class ParseExpression(ParserElement):
2336 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2337 - def __init__( self, exprs, savelist = False ):
2338 super(ParseExpression,self).__init__(savelist) 2339 if isinstance( exprs, _generatorType ): 2340 exprs = list(exprs) 2341 2342 if isinstance( exprs, basestring ): 2343 self.exprs = [ Literal( exprs ) ] 2344 elif isinstance( exprs, collections.Sequence ): 2345 # if sequence of strings provided, wrap with Literal 2346 if all(isinstance(expr, basestring) for expr in exprs): 2347 exprs = map(Literal, exprs) 2348 self.exprs = list(exprs) 2349 else: 2350 try: 2351 self.exprs = list( exprs ) 2352 except TypeError: 2353 self.exprs = [ exprs ] 2354 self.callPreparse = False
2355
2356 - def __getitem__( self, i ):
2357 return self.exprs[i]
2358
2359 - def append( self, other ):
2360 self.exprs.append( other ) 2361 self.strRepr = None 2362 return self
2363
2364 - def leaveWhitespace( self ):
2365 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2366 all contained expressions.""" 2367 self.skipWhitespace = False 2368 self.exprs = [ e.copy() for e in self.exprs ] 2369 for e in self.exprs: 2370 e.leaveWhitespace() 2371 return self
2372
2373 - def ignore( self, other ):
2374 if isinstance( other, Suppress ): 2375 if other not in self.ignoreExprs: 2376 super( ParseExpression, self).ignore( other ) 2377 for e in self.exprs: 2378 e.ignore( self.ignoreExprs[-1] ) 2379 else: 2380 super( ParseExpression, self).ignore( other ) 2381 for e in self.exprs: 2382 e.ignore( self.ignoreExprs[-1] ) 2383 return self
2384
2385 - def __str__( self ):
2386 try: 2387 return super(ParseExpression,self).__str__() 2388 except: 2389 pass 2390 2391 if self.strRepr is None: 2392 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2393 return self.strRepr
2394
2395 - def streamline( self ):
2396 super(ParseExpression,self).streamline() 2397 2398 for e in self.exprs: 2399 e.streamline() 2400 2401 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2402 # but only if there are no parse actions or resultsNames on the nested And's 2403 # (likewise for Or's and MatchFirst's) 2404 if ( len(self.exprs) == 2 ): 2405 other = self.exprs[0] 2406 if ( isinstance( other, self.__class__ ) and 2407 not(other.parseAction) and 2408 other.resultsName is None and 2409 not other.debug ): 2410 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2411 self.strRepr = None 2412 self.mayReturnEmpty |= other.mayReturnEmpty 2413 self.mayIndexError |= other.mayIndexError 2414 2415 other = self.exprs[-1] 2416 if ( isinstance( other, self.__class__ ) and 2417 not(other.parseAction) and 2418 other.resultsName is None and 2419 not other.debug ): 2420 self.exprs = self.exprs[:-1] + other.exprs[:] 2421 self.strRepr = None 2422 self.mayReturnEmpty |= other.mayReturnEmpty 2423 self.mayIndexError |= other.mayIndexError 2424 2425 self.errmsg = "Expected " + _ustr(self) 2426 2427 return self
2428
2429 - def setResultsName( self, name, listAllMatches=False ):
2430 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2431 return ret
2432
2433 - def validate( self, validateTrace=[] ):
2434 tmp = validateTrace[:]+[self] 2435 for e in self.exprs: 2436 e.validate(tmp) 2437 self.checkRecursion( [] )
2438
2439 - def copy(self):
2440 ret = super(ParseExpression,self).copy() 2441 ret.exprs = [e.copy() for e in self.exprs] 2442 return ret
2443
2444 -class And(ParseExpression):
2445 """Requires all given C{ParseExpression}s to be found in the given order. 2446 Expressions may be separated by whitespace. 2447 May be constructed using the C{'+'} operator. 2448 """ 2449
2450 - class _ErrorStop(Empty):
2451 - def __init__(self, *args, **kwargs):
2452 super(And._ErrorStop,self).__init__(*args, **kwargs) 2453 self.name = '-' 2454 self.leaveWhitespace()
2455
2456 - def __init__( self, exprs, savelist = True ):
2457 super(And,self).__init__(exprs, savelist) 2458 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2459 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2460 self.skipWhitespace = self.exprs[0].skipWhitespace 2461 self.callPreparse = True
2462
2463 - def parseImpl( self, instring, loc, doActions=True ):
2464 # pass False as last arg to _parse for first element, since we already 2465 # pre-parsed the string as part of our And pre-parsing 2466 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2467 errorStop = False 2468 for e in self.exprs[1:]: 2469 if isinstance(e, And._ErrorStop): 2470 errorStop = True 2471 continue 2472 if errorStop: 2473 try: 2474 loc, exprtokens = e._parse( instring, loc, doActions ) 2475 except ParseSyntaxException: 2476 raise 2477 except ParseBaseException as pe: 2478 pe.__traceback__ = None 2479 raise ParseSyntaxException(pe) 2480 except IndexError: 2481 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2482 else: 2483 loc, exprtokens = e._parse( instring, loc, doActions ) 2484 if exprtokens or exprtokens.haskeys(): 2485 resultlist += exprtokens 2486 return loc, resultlist
2487
2488 - def __iadd__(self, other ):
2489 if isinstance( other, basestring ): 2490 other = Literal( other ) 2491 return self.append( other ) #And( [ self, other ] )
2492
2493 - def checkRecursion( self, parseElementList ):
2494 subRecCheckList = parseElementList[:] + [ self ] 2495 for e in self.exprs: 2496 e.checkRecursion( subRecCheckList ) 2497 if not e.mayReturnEmpty: 2498 break
2499
2500 - def __str__( self ):
2501 if hasattr(self,"name"): 2502 return self.name 2503 2504 if self.strRepr is None: 2505 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2506 2507 return self.strRepr
2508
2509 2510 -class Or(ParseExpression):
2511 """Requires that at least one C{ParseExpression} is found. 2512 If two expressions match, the expression that matches the longest string will be used. 2513 May be constructed using the C{'^'} operator. 2514 """
2515 - def __init__( self, exprs, savelist = False ):
2516 super(Or,self).__init__(exprs, savelist) 2517 if self.exprs: 2518 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2519 else: 2520 self.mayReturnEmpty = True
2521
2522 - def parseImpl( self, instring, loc, doActions=True ):
2523 maxExcLoc = -1 2524 maxException = None 2525 matches = [] 2526 for e in self.exprs: 2527 try: 2528 loc2 = e.tryParse( instring, loc ) 2529 except ParseException as err: 2530 err.__traceback__ = None 2531 if err.loc > maxExcLoc: 2532 maxException = err 2533 maxExcLoc = err.loc 2534 except IndexError: 2535 if len(instring) > maxExcLoc: 2536 maxException = ParseException(instring,len(instring),e.errmsg,self) 2537 maxExcLoc = len(instring) 2538 else: 2539 # save match among all matches, to retry longest to shortest 2540 matches.append((loc2, e)) 2541 2542 if matches: 2543 matches.sort(key=lambda x: -x[0]) 2544 for _,e in matches: 2545 try: 2546 return e._parse( instring, loc, doActions ) 2547 except ParseException as err: 2548 err.__traceback__ = None 2549 if err.loc > maxExcLoc: 2550 maxException = err 2551 maxExcLoc = err.loc 2552 2553 if maxException is not None: 2554 maxException.msg = self.errmsg 2555 raise maxException 2556 else: 2557 raise ParseException(instring, loc, "no defined alternatives to match", self)
2558 2559
2560 - def __ixor__(self, other ):
2561 if isinstance( other, basestring ): 2562 other = ParserElement.literalStringClass( other ) 2563 return self.append( other ) #Or( [ self, other ] )
2564
2565 - def __str__( self ):
2566 if hasattr(self,"name"): 2567 return self.name 2568 2569 if self.strRepr is None: 2570 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2571 2572 return self.strRepr
2573
2574 - def checkRecursion( self, parseElementList ):
2575 subRecCheckList = parseElementList[:] + [ self ] 2576 for e in self.exprs: 2577 e.checkRecursion( subRecCheckList )
2578
2579 2580 -class MatchFirst(ParseExpression):
2581 """Requires that at least one C{ParseExpression} is found. 2582 If two expressions match, the first one listed is the one that will match. 2583 May be constructed using the C{'|'} operator. 2584 """
2585 - def __init__( self, exprs, savelist = False ):
2586 super(MatchFirst,self).__init__(exprs, savelist) 2587 if self.exprs: 2588 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2589 else: 2590 self.mayReturnEmpty = True
2591
2592 - def parseImpl( self, instring, loc, doActions=True ):
2593 maxExcLoc = -1 2594 maxException = None 2595 for e in self.exprs: 2596 try: 2597 ret = e._parse( instring, loc, doActions ) 2598 return ret 2599 except ParseException as err: 2600 if err.loc > maxExcLoc: 2601 maxException = err 2602 maxExcLoc = err.loc 2603 except IndexError: 2604 if len(instring) > maxExcLoc: 2605 maxException = ParseException(instring,len(instring),e.errmsg,self) 2606 maxExcLoc = len(instring) 2607 2608 # only got here if no expression matched, raise exception for match that made it the furthest 2609 else: 2610 if maxException is not None: 2611 maxException.msg = self.errmsg 2612 raise maxException 2613 else: 2614 raise ParseException(instring, loc, "no defined alternatives to match", self)
2615
2616 - def __ior__(self, other ):
2617 if isinstance( other, basestring ): 2618 other = ParserElement.literalStringClass( other ) 2619 return self.append( other ) #MatchFirst( [ self, other ] )
2620
2621 - def __str__( self ):
2622 if hasattr(self,"name"): 2623 return self.name 2624 2625 if self.strRepr is None: 2626 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2627 2628 return self.strRepr
2629
2630 - def checkRecursion( self, parseElementList ):
2631 subRecCheckList = parseElementList[:] + [ self ] 2632 for e in self.exprs: 2633 e.checkRecursion( subRecCheckList )
2634
2635 2636 -class Each(ParseExpression):
2637 """Requires all given C{ParseExpression}s to be found, but in any order. 2638 Expressions may be separated by whitespace. 2639 May be constructed using the C{'&'} operator. 2640 """
2641 - def __init__( self, exprs, savelist = True ):
2642 super(Each,self).__init__(exprs, savelist) 2643 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2644 self.skipWhitespace = True 2645 self.initExprGroups = True
2646
2647 - def parseImpl( self, instring, loc, doActions=True ):
2648 if self.initExprGroups: 2649 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2650 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2651 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2652 self.optionals = opt1 + opt2 2653 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2654 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2655 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2656 self.required += self.multirequired 2657 self.initExprGroups = False 2658 tmpLoc = loc 2659 tmpReqd = self.required[:] 2660 tmpOpt = self.optionals[:] 2661 matchOrder = [] 2662 2663 keepMatching = True 2664 while keepMatching: 2665 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2666 failed = [] 2667 for e in tmpExprs: 2668 try: 2669 tmpLoc = e.tryParse( instring, tmpLoc ) 2670 except ParseException: 2671 failed.append(e) 2672 else: 2673 matchOrder.append(self.opt1map.get(id(e),e)) 2674 if e in tmpReqd: 2675 tmpReqd.remove(e) 2676 elif e in tmpOpt: 2677 tmpOpt.remove(e) 2678 if len(failed) == len(tmpExprs): 2679 keepMatching = False 2680 2681 if tmpReqd: 2682 missing = ", ".join(_ustr(e) for e in tmpReqd) 2683 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2684 2685 # add any unmatched Optionals, in case they have default values defined 2686 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2687 2688 resultlist = [] 2689 for e in matchOrder: 2690 loc,results = e._parse(instring,loc,doActions) 2691 resultlist.append(results) 2692 2693 finalResults = ParseResults() 2694 for r in resultlist: 2695 dups = {} 2696 for k in r.keys(): 2697 if k in finalResults: 2698 tmp = ParseResults(finalResults[k]) 2699 tmp += ParseResults(r[k]) 2700 dups[k] = tmp 2701 finalResults += ParseResults(r) 2702 for k,v in dups.items(): 2703 finalResults[k] = v 2704 return loc, finalResults
2705
2706 - def __str__( self ):
2707 if hasattr(self,"name"): 2708 return self.name 2709 2710 if self.strRepr is None: 2711 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2712 2713 return self.strRepr
2714
2715 - def checkRecursion( self, parseElementList ):
2716 subRecCheckList = parseElementList[:] + [ self ] 2717 for e in self.exprs: 2718 e.checkRecursion( subRecCheckList )
2719
2720 2721 -class ParseElementEnhance(ParserElement):
2722 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2723 - def __init__( self, expr, savelist=False ):
2724 super(ParseElementEnhance,self).__init__(savelist) 2725 if isinstance( expr, basestring ): 2726 expr = Literal(expr) 2727 self.expr = expr 2728 self.strRepr = None 2729 if expr is not None: 2730 self.mayIndexError = expr.mayIndexError 2731 self.mayReturnEmpty = expr.mayReturnEmpty 2732 self.setWhitespaceChars( expr.whiteChars ) 2733 self.skipWhitespace = expr.skipWhitespace 2734 self.saveAsList = expr.saveAsList 2735 self.callPreparse = expr.callPreparse 2736 self.ignoreExprs.extend(expr.ignoreExprs)
2737
2738 - def parseImpl( self, instring, loc, doActions=True ):
2739 if self.expr is not None: 2740 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2741 else: 2742 raise ParseException("",loc,self.errmsg,self)
2743
2744 - def leaveWhitespace( self ):
2745 self.skipWhitespace = False 2746 self.expr = self.expr.copy() 2747 if self.expr is not None: 2748 self.expr.leaveWhitespace() 2749 return self
2750
2751 - def ignore( self, other ):
2752 if isinstance( other, Suppress ): 2753 if other not in self.ignoreExprs: 2754 super( ParseElementEnhance, self).ignore( other ) 2755 if self.expr is not None: 2756 self.expr.ignore( self.ignoreExprs[-1] ) 2757 else: 2758 super( ParseElementEnhance, self).ignore( other ) 2759 if self.expr is not None: 2760 self.expr.ignore( self.ignoreExprs[-1] ) 2761 return self
2762
2763 - def streamline( self ):
2764 super(ParseElementEnhance,self).streamline() 2765 if self.expr is not None: 2766 self.expr.streamline() 2767 return self
2768
2769 - def checkRecursion( self, parseElementList ):
2770 if self in parseElementList: 2771 raise RecursiveGrammarException( parseElementList+[self] ) 2772 subRecCheckList = parseElementList[:] + [ self ] 2773 if self.expr is not None: 2774 self.expr.checkRecursion( subRecCheckList )
2775
2776 - def validate( self, validateTrace=[] ):
2777 tmp = validateTrace[:]+[self] 2778 if self.expr is not None: 2779 self.expr.validate(tmp) 2780 self.checkRecursion( [] )
2781
2782 - def __str__( self ):
2783 try: 2784 return super(ParseElementEnhance,self).__str__() 2785 except: 2786 pass 2787 2788 if self.strRepr is None and self.expr is not None: 2789 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2790 return self.strRepr
2791
2792 2793 -class FollowedBy(ParseElementEnhance):
2794 """Lookahead matching of the given parse expression. C{FollowedBy} 2795 does *not* advance the parsing position within the input string, it only 2796 verifies that the specified parse expression matches at the current 2797 position. C{FollowedBy} always returns a null token list."""
2798 - def __init__( self, expr ):
2799 super(FollowedBy,self).__init__(expr) 2800 self.mayReturnEmpty = True
2801
2802 - def parseImpl( self, instring, loc, doActions=True ):
2803 self.expr.tryParse( instring, loc ) 2804 return loc, []
2805
2806 2807 -class NotAny(ParseElementEnhance):
2808 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2809 does *not* advance the parsing position within the input string, it only 2810 verifies that the specified parse expression does *not* match at the current 2811 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2812 always returns a null token list. May be constructed using the '~' operator."""
2813 - def __init__( self, expr ):
2814 super(NotAny,self).__init__(expr) 2815 #~ self.leaveWhitespace() 2816 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2817 self.mayReturnEmpty = True 2818 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2819
2820 - def parseImpl( self, instring, loc, doActions=True ):
2821 if self.expr.canParseNext(instring, loc): 2822 raise ParseException(instring, loc, self.errmsg, self) 2823 return loc, []
2824
2825 - def __str__( self ):
2826 if hasattr(self,"name"): 2827 return self.name 2828 2829 if self.strRepr is None: 2830 self.strRepr = "~{" + _ustr(self.expr) + "}" 2831 2832 return self.strRepr
2833
2834 2835 -class OneOrMore(ParseElementEnhance):
2836 """Repetition of one or more of the given expression. 2837 2838 Parameters: 2839 - expr - expression that must match one or more times 2840 - stopOn - (default=None) - expression for a terminating sentinel 2841 (only required if the sentinel would ordinarily match the repetition 2842 expression) 2843 """
2844 - def __init__( self, expr, stopOn=None):
2845 super(OneOrMore, self).__init__(expr) 2846 ender = stopOn 2847 if isinstance(ender, basestring): 2848 ender = Literal(ender) 2849 self.not_ender = ~ender if ender is not None else None
2850
2851 - def parseImpl( self, instring, loc, doActions=True ):
2852 self_expr_parse = self.expr._parse 2853 self_skip_ignorables = self._skipIgnorables 2854 check_ender = self.not_ender is not None 2855 if check_ender: 2856 try_not_ender = self.not_ender.tryParse 2857 2858 # must be at least one (but first see if we are the stopOn sentinel; 2859 # if so, fail) 2860 if check_ender: 2861 try_not_ender(instring, loc) 2862 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2863 try: 2864 hasIgnoreExprs = (not not self.ignoreExprs) 2865 while 1: 2866 if check_ender: 2867 try_not_ender(instring, loc) 2868 if hasIgnoreExprs: 2869 preloc = self_skip_ignorables( instring, loc ) 2870 else: 2871 preloc = loc 2872 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2873 if tmptokens or tmptokens.haskeys(): 2874 tokens += tmptokens 2875 except (ParseException,IndexError): 2876 pass 2877 2878 return loc, tokens
2879
2880 - def __str__( self ):
2881 if hasattr(self,"name"): 2882 return self.name 2883 2884 if self.strRepr is None: 2885 self.strRepr = "{" + _ustr(self.expr) + "}..." 2886 2887 return self.strRepr
2888
2889 - def setResultsName( self, name, listAllMatches=False ):
2890 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2891 ret.saveAsList = True 2892 return ret
2893
2894 -class ZeroOrMore(OneOrMore):
2895 """Optional repetition of zero or more of the given expression. 2896 2897 Parameters: 2898 - expr - expression that must match zero or more times 2899 - stopOn - (default=None) - expression for a terminating sentinel 2900 (only required if the sentinel would ordinarily match the repetition 2901 expression) 2902 """
2903 - def __init__( self, expr, stopOn=None):
2904 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2905 self.mayReturnEmpty = True
2906
2907 - def parseImpl( self, instring, loc, doActions=True ):
2908 try: 2909 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 2910 except (ParseException,IndexError): 2911 return loc, []
2912
2913 - def __str__( self ):
2914 if hasattr(self,"name"): 2915 return self.name 2916 2917 if self.strRepr is None: 2918 self.strRepr = "[" + _ustr(self.expr) + "]..." 2919 2920 return self.strRepr
2921
2922 -class _NullToken(object):
2923 - def __bool__(self):
2924 return False
2925 __nonzero__ = __bool__
2926 - def __str__(self):
2927 return ""
2928 2929 _optionalNotMatched = _NullToken()
2930 -class Optional(ParseElementEnhance):
2931 """Optional matching of the given expression. 2932 2933 Parameters: 2934 - expr - expression that must match zero or more times 2935 - default (optional) - value to be returned if the optional expression 2936 is not found. 2937 """
2938 - def __init__( self, expr, default=_optionalNotMatched ):
2939 super(Optional,self).__init__( expr, savelist=False ) 2940 self.defaultValue = default 2941 self.mayReturnEmpty = True
2942
2943 - def parseImpl( self, instring, loc, doActions=True ):
2944 try: 2945 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2946 except (ParseException,IndexError): 2947 if self.defaultValue is not _optionalNotMatched: 2948 if self.expr.resultsName: 2949 tokens = ParseResults([ self.defaultValue ]) 2950 tokens[self.expr.resultsName] = self.defaultValue 2951 else: 2952 tokens = [ self.defaultValue ] 2953 else: 2954 tokens = [] 2955 return loc, tokens
2956
2957 - def __str__( self ):
2958 if hasattr(self,"name"): 2959 return self.name 2960 2961 if self.strRepr is None: 2962 self.strRepr = "[" + _ustr(self.expr) + "]" 2963 2964 return self.strRepr
2965
2966 -class SkipTo(ParseElementEnhance):
2967 """Token for skipping over all undefined text until the matched expression is found. 2968 2969 Parameters: 2970 - expr - target expression marking the end of the data to be skipped 2971 - include - (default=False) if True, the target expression is also parsed 2972 (the skipped text and target expression are returned as a 2-element list). 2973 - ignore - (default=None) used to define grammars (typically quoted strings and 2974 comments) that might contain false matches to the target expression 2975 - failOn - (default=None) define expressions that are not allowed to be 2976 included in the skipped test; if found before the target expression is found, 2977 the SkipTo is not a match 2978 """
2979 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2980 super( SkipTo, self ).__init__( other ) 2981 self.ignoreExpr = ignore 2982 self.mayReturnEmpty = True 2983 self.mayIndexError = False 2984 self.includeMatch = include 2985 self.asList = False 2986 if isinstance(failOn, basestring): 2987 self.failOn = Literal(failOn) 2988 else: 2989 self.failOn = failOn 2990 self.errmsg = "No match found for "+_ustr(self.expr)
2991
2992 - def parseImpl( self, instring, loc, doActions=True ):
2993 startloc = loc 2994 instrlen = len(instring) 2995 expr = self.expr 2996 expr_parse = self.expr._parse 2997 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 2998 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 2999 3000 tmploc = loc 3001 while tmploc <= instrlen: 3002 if self_failOn_canParseNext is not None: 3003 # break if failOn expression matches 3004 if self_failOn_canParseNext(instring, tmploc): 3005 break 3006 3007 if self_ignoreExpr_tryParse is not None: 3008 # advance past ignore expressions 3009 while 1: 3010 try: 3011 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3012 except ParseBaseException: 3013 break 3014 3015 try: 3016 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3017 except (ParseException, IndexError): 3018 # no match, advance loc in string 3019 tmploc += 1 3020 else: 3021 # matched skipto expr, done 3022 break 3023 3024 else: 3025 # ran off the end of the input string without matching skipto expr, fail 3026 raise ParseException(instring, loc, self.errmsg, self) 3027 3028 # build up return values 3029 loc = tmploc 3030 skiptext = instring[startloc:loc] 3031 skipresult = ParseResults(skiptext) 3032 3033 if self.includeMatch: 3034 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3035 skipresult += mat 3036 3037 return loc, skipresult
3038
3039 -class Forward(ParseElementEnhance):
3040 """Forward declaration of an expression to be defined later - 3041 used for recursive grammars, such as algebraic infix notation. 3042 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3043 3044 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3045 Specifically, '|' has a lower precedence than '<<', so that:: 3046 fwdExpr << a | b | c 3047 will actually be evaluated as:: 3048 (fwdExpr << a) | b | c 3049 thereby leaving b and c out as parseable alternatives. It is recommended that you 3050 explicitly group the values inserted into the C{Forward}:: 3051 fwdExpr << (a | b | c) 3052 Converting to use the '<<=' operator instead will avoid this problem. 3053 """
3054 - def __init__( self, other=None ):
3055 super(Forward,self).__init__( other, savelist=False )
3056
3057 - def __lshift__( self, other ):
3058 if isinstance( other, basestring ): 3059 other = ParserElement.literalStringClass(other) 3060 self.expr = other 3061 self.strRepr = None 3062 self.mayIndexError = self.expr.mayIndexError 3063 self.mayReturnEmpty = self.expr.mayReturnEmpty 3064 self.setWhitespaceChars( self.expr.whiteChars ) 3065 self.skipWhitespace = self.expr.skipWhitespace 3066 self.saveAsList = self.expr.saveAsList 3067 self.ignoreExprs.extend(self.expr.ignoreExprs) 3068 return self
3069
3070 - def __ilshift__(self, other):
3071 return self << other
3072
3073 - def leaveWhitespace( self ):
3074 self.skipWhitespace = False 3075 return self
3076
3077 - def streamline( self ):
3078 if not self.streamlined: 3079 self.streamlined = True 3080 if self.expr is not None: 3081 self.expr.streamline() 3082 return self
3083
3084 - def validate( self, validateTrace=[] ):
3085 if self not in validateTrace: 3086 tmp = validateTrace[:]+[self] 3087 if self.expr is not None: 3088 self.expr.validate(tmp) 3089 self.checkRecursion([])
3090
3091 - def __str__( self ):
3092 if hasattr(self,"name"): 3093 return self.name 3094 return self.__class__.__name__ + ": ..." 3095 3096 # stubbed out for now - creates awful memory and perf issues 3097 self._revertClass = self.__class__ 3098 self.__class__ = _ForwardNoRecurse 3099 try: 3100 if self.expr is not None: 3101 retString = _ustr(self.expr) 3102 else: 3103 retString = "None" 3104 finally: 3105 self.__class__ = self._revertClass 3106 return self.__class__.__name__ + ": " + retString
3107
3108 - def copy(self):
3109 if self.expr is not None: 3110 return super(Forward,self).copy() 3111 else: 3112 ret = Forward() 3113 ret <<= self 3114 return ret
3115
3116 -class _ForwardNoRecurse(Forward):
3117 - def __str__( self ):
3118 return "..."
3119
3120 -class TokenConverter(ParseElementEnhance):
3121 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3122 - def __init__( self, expr, savelist=False ):
3123 super(TokenConverter,self).__init__( expr )#, savelist ) 3124 self.saveAsList = False
3125
3126 -class Combine(TokenConverter):
3127 """Converter to concatenate all matching tokens to a single string. 3128 By default, the matching patterns must also be contiguous in the input string; 3129 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3130 """
3131 - def __init__( self, expr, joinString="", adjacent=True ):
3132 super(Combine,self).__init__( expr ) 3133 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3134 if adjacent: 3135 self.leaveWhitespace() 3136 self.adjacent = adjacent 3137 self.skipWhitespace = True 3138 self.joinString = joinString 3139 self.callPreparse = True
3140
3141 - def ignore( self, other ):
3142 if self.adjacent: 3143 ParserElement.ignore(self, other) 3144 else: 3145 super( Combine, self).ignore( other ) 3146 return self
3147
3148 - def postParse( self, instring, loc, tokenlist ):
3149 retToks = tokenlist.copy() 3150 del retToks[:] 3151 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3152 3153 if self.resultsName and retToks.haskeys(): 3154 return [ retToks ] 3155 else: 3156 return retToks
3157
3158 -class Group(TokenConverter):
3159 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3160 - def __init__( self, expr ):
3161 super(Group,self).__init__( expr ) 3162 self.saveAsList = True
3163
3164 - def postParse( self, instring, loc, tokenlist ):
3165 return [ tokenlist ]
3166
3167 -class Dict(TokenConverter):
3168 """Converter to return a repetitive expression as a list, but also as a dictionary. 3169 Each element can also be referenced using the first token in the expression as its key. 3170 Useful for tabular report scraping when the first column can be used as a item key. 3171 """
3172 - def __init__( self, expr ):
3173 super(Dict,self).__init__( expr ) 3174 self.saveAsList = True
3175
3176 - def postParse( self, instring, loc, tokenlist ):
3177 for i,tok in enumerate(tokenlist): 3178 if len(tok) == 0: 3179 continue 3180 ikey = tok[0] 3181 if isinstance(ikey,int): 3182 ikey = _ustr(tok[0]).strip() 3183 if len(tok)==1: 3184 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3185 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3186 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3187 else: 3188 dictvalue = tok.copy() #ParseResults(i) 3189 del dictvalue[0] 3190 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3191 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3192 else: 3193 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3194 3195 if self.resultsName: 3196 return [ tokenlist ] 3197 else: 3198 return tokenlist
3199
3200 3201 -class Suppress(TokenConverter):
3202 """Converter for ignoring the results of a parsed expression."""
3203 - def postParse( self, instring, loc, tokenlist ):
3204 return []
3205
3206 - def suppress( self ):
3207 return self
3208
3209 3210 -class OnlyOnce(object):
3211 """Wrapper for parse actions, to ensure they are only called once."""
3212 - def __init__(self, methodCall):
3213 self.callable = _trim_arity(methodCall) 3214 self.called = False
3215 - def __call__(self,s,l,t):
3216 if not self.called: 3217 results = self.callable(s,l,t) 3218 self.called = True 3219 return results 3220 raise ParseException(s,l,"")
3221 - def reset(self):
3222 self.called = False
3223
3224 -def traceParseAction(f):
3225 """Decorator for debugging parse actions.""" 3226 f = _trim_arity(f) 3227 def z(*paArgs): 3228 thisFunc = f.func_name 3229 s,l,t = paArgs[-3:] 3230 if len(paArgs)>3: 3231 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3232 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3233 try: 3234 ret = f(*paArgs) 3235 except Exception as exc: 3236 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3237 raise 3238 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3239 return ret
3240 try: 3241 z.__name__ = f.__name__ 3242 except AttributeError: 3243 pass 3244 return z 3245
3246 # 3247 # global helpers 3248 # 3249 -def delimitedList( expr, delim=",", combine=False ):
3250 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3251 By default, the list elements and delimiters can have intervening whitespace, and 3252 comments, but this can be overridden by passing C{combine=True} in the constructor. 3253 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3254 string, with the delimiters included; otherwise, the matching tokens are returned 3255 as a list of tokens, with the delimiters suppressed. 3256 """ 3257 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3258 if combine: 3259 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3260 else: 3261 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3262
3263 -def countedArray( expr, intExpr=None ):
3264 """Helper to define a counted list of expressions. 3265 This helper defines a pattern of the form:: 3266 integer expr expr expr... 3267 where the leading integer tells how many expr expressions follow. 3268 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3269 """ 3270 arrayExpr = Forward() 3271 def countFieldParseAction(s,l,t): 3272 n = t[0] 3273 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3274 return []
3275 if intExpr is None: 3276 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3277 else: 3278 intExpr = intExpr.copy() 3279 intExpr.setName("arrayLen") 3280 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3281 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3282
3283 -def _flatten(L):
3284 ret = [] 3285 for i in L: 3286 if isinstance(i,list): 3287 ret.extend(_flatten(i)) 3288 else: 3289 ret.append(i) 3290 return ret
3291
3292 -def matchPreviousLiteral(expr):
3293 """Helper to define an expression that is indirectly defined from 3294 the tokens matched in a previous expression, that is, it looks 3295 for a 'repeat' of a previous expression. For example:: 3296 first = Word(nums) 3297 second = matchPreviousLiteral(first) 3298 matchExpr = first + ":" + second 3299 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3300 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3301 If this is not desired, use C{matchPreviousExpr}. 3302 Do *not* use with packrat parsing enabled. 3303 """ 3304 rep = Forward() 3305 def copyTokenToRepeater(s,l,t): 3306 if t: 3307 if len(t) == 1: 3308 rep << t[0] 3309 else: 3310 # flatten t tokens 3311 tflat = _flatten(t.asList()) 3312 rep << And(Literal(tt) for tt in tflat) 3313 else: 3314 rep << Empty()
3315 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3316 rep.setName('(prev) ' + _ustr(expr)) 3317 return rep 3318
3319 -def matchPreviousExpr(expr):
3320 """Helper to define an expression that is indirectly defined from 3321 the tokens matched in a previous expression, that is, it looks 3322 for a 'repeat' of a previous expression. For example:: 3323 first = Word(nums) 3324 second = matchPreviousExpr(first) 3325 matchExpr = first + ":" + second 3326 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3327 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3328 the expressions are evaluated first, and then compared, so 3329 C{"1"} is compared with C{"10"}. 3330 Do *not* use with packrat parsing enabled. 3331 """ 3332 rep = Forward() 3333 e2 = expr.copy() 3334 rep <<= e2 3335 def copyTokenToRepeater(s,l,t): 3336 matchTokens = _flatten(t.asList()) 3337 def mustMatchTheseTokens(s,l,t): 3338 theseTokens = _flatten(t.asList()) 3339 if theseTokens != matchTokens: 3340 raise ParseException("",0,"")
3341 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3342 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3343 rep.setName('(prev) ' + _ustr(expr)) 3344 return rep 3345
3346 -def _escapeRegexRangeChars(s):
3347 #~ escape these chars: ^-] 3348 for c in r"\^-]": 3349 s = s.replace(c,_bslash+c) 3350 s = s.replace("\n",r"\n") 3351 s = s.replace("\t",r"\t") 3352 return _ustr(s)
3353
3354 -def oneOf( strs, caseless=False, useRegex=True ):
3355 """Helper to quickly define a set of alternative Literals, and makes sure to do 3356 longest-first testing when there is a conflict, regardless of the input order, 3357 but returns a C{L{MatchFirst}} for best performance. 3358 3359 Parameters: 3360 - strs - a string of space-delimited literals, or a list of string literals 3361 - caseless - (default=False) - treat all literals as caseless 3362 - useRegex - (default=True) - as an optimization, will generate a Regex 3363 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3364 if creating a C{Regex} raises an exception) 3365 """ 3366 if caseless: 3367 isequal = ( lambda a,b: a.upper() == b.upper() ) 3368 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3369 parseElementClass = CaselessLiteral 3370 else: 3371 isequal = ( lambda a,b: a == b ) 3372 masks = ( lambda a,b: b.startswith(a) ) 3373 parseElementClass = Literal 3374 3375 symbols = [] 3376 if isinstance(strs,basestring): 3377 symbols = strs.split() 3378 elif isinstance(strs, collections.Sequence): 3379 symbols = list(strs[:]) 3380 elif isinstance(strs, _generatorType): 3381 symbols = list(strs) 3382 else: 3383 warnings.warn("Invalid argument to oneOf, expected string or list", 3384 SyntaxWarning, stacklevel=2) 3385 if not symbols: 3386 return NoMatch() 3387 3388 i = 0 3389 while i < len(symbols)-1: 3390 cur = symbols[i] 3391 for j,other in enumerate(symbols[i+1:]): 3392 if ( isequal(other, cur) ): 3393 del symbols[i+j+1] 3394 break 3395 elif ( masks(cur, other) ): 3396 del symbols[i+j+1] 3397 symbols.insert(i,other) 3398 cur = other 3399 break 3400 else: 3401 i += 1 3402 3403 if not caseless and useRegex: 3404 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3405 try: 3406 if len(symbols)==len("".join(symbols)): 3407 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3408 else: 3409 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3410 except: 3411 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3412 SyntaxWarning, stacklevel=2) 3413 3414 3415 # last resort, just use MatchFirst 3416 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3417
3418 -def dictOf( key, value ):
3419 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3420 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3421 in the proper order. The key pattern can include delimiting markers or punctuation, 3422 as long as they are suppressed, thereby leaving the significant key text. The value 3423 pattern can include named results, so that the C{Dict} results can include named token 3424 fields. 3425 """ 3426 return Dict( ZeroOrMore( Group ( key + value ) ) )
3427
3428 -def originalTextFor(expr, asString=True):
3429 """Helper to return the original, untokenized text for a given expression. Useful to 3430 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3431 revert separate tokens with intervening whitespace back to the original matching 3432 input text. By default, returns astring containing the original parsed text. 3433 3434 If the optional C{asString} argument is passed as C{False}, then the return value is a 3435 C{L{ParseResults}} containing any results names that were originally matched, and a 3436 single token containing the original matched text from the input string. So if 3437 the expression passed to C{L{originalTextFor}} contains expressions with defined 3438 results names, you must set C{asString} to C{False} if you want to preserve those 3439 results name values.""" 3440 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3441 endlocMarker = locMarker.copy() 3442 endlocMarker.callPreparse = False 3443 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3444 if asString: 3445 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3446 else: 3447 def extractText(s,l,t): 3448 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3449 matchExpr.setParseAction(extractText) 3450 return matchExpr 3451
3452 -def ungroup(expr):
3453 """Helper to undo pyparsing's default grouping of And expressions, even 3454 if all but one are non-empty.""" 3455 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3456
3457 -def locatedExpr(expr):
3458 """Helper to decorate a returned token with its starting and ending locations in the input string. 3459 This helper adds the following results names: 3460 - locn_start = location where matched expression begins 3461 - locn_end = location where matched expression ends 3462 - value = the actual parsed results 3463 3464 Be careful if the input text contains C{<TAB>} characters, you may want to call 3465 C{L{ParserElement.parseWithTabs}} 3466 """ 3467 locator = Empty().setParseAction(lambda s,l,t: l) 3468 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3469 3470 3471 # convenience constants for positional expressions 3472 empty = Empty().setName("empty") 3473 lineStart = LineStart().setName("lineStart") 3474 lineEnd = LineEnd().setName("lineEnd") 3475 stringStart = StringStart().setName("stringStart") 3476 stringEnd = StringEnd().setName("stringEnd") 3477 3478 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3479 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3480 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3481 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3482 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3483 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3484 3485 -def srange(s):
3486 r"""Helper to easily define string ranges for use in Word construction. Borrows 3487 syntax from regexp '[]' string range definitions:: 3488 srange("[0-9]") -> "0123456789" 3489 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3490 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3491 The input string must be enclosed in []'s, and the returned string is the expanded 3492 character set joined into a single string. 3493 The values enclosed in the []'s may be:: 3494 a single character 3495 an escaped character with a leading backslash (such as \- or \]) 3496 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3497 (\0x## is also supported for backwards compatibility) 3498 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3499 a range of any of the above, separated by a dash ('a-z', etc.) 3500 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3501 """ 3502 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3503 try: 3504 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3505 except: 3506 return ""
3507
3508 -def matchOnlyAtCol(n):
3509 """Helper method for defining parse actions that require matching at a specific 3510 column in the input text. 3511 """ 3512 def verifyCol(strg,locn,toks): 3513 if col(locn,strg) != n: 3514 raise ParseException(strg,locn,"matched token not at column %d" % n)
3515 return verifyCol 3516
3517 -def replaceWith(replStr):
3518 """Helper method for common parse actions that simply return a literal value. Especially 3519 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3520 """ 3521 return lambda s,l,t: [replStr]
3522
3523 -def removeQuotes(s,l,t):
3524 """Helper parse action for removing quotation marks from parsed quoted strings. 3525 To use, add this parse action to quoted string using:: 3526 quotedString.setParseAction( removeQuotes ) 3527 """ 3528 return t[0][1:-1]
3529
3530 -def upcaseTokens(s,l,t):
3531 """Helper parse action to convert tokens to upper case.""" 3532 return [ tt.upper() for tt in map(_ustr,t) ]
3533
3534 -def downcaseTokens(s,l,t):
3535 """Helper parse action to convert tokens to lower case.""" 3536 return [ tt.lower() for tt in map(_ustr,t) ]
3537
3538 -def _makeTags(tagStr, xml):
3539 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3540 if isinstance(tagStr,basestring): 3541 resname = tagStr 3542 tagStr = Keyword(tagStr, caseless=not xml) 3543 else: 3544 resname = tagStr.name 3545 3546 tagAttrName = Word(alphas,alphanums+"_-:") 3547 if (xml): 3548 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3549 openTag = Suppress("<") + tagStr("tag") + \ 3550 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3551 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3552 else: 3553 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3554 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3555 openTag = Suppress("<") + tagStr("tag") + \ 3556 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3557 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3558 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3559 closeTag = Combine(_L("</") + tagStr + ">") 3560 3561 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3562 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3563 openTag.tag = resname 3564 closeTag.tag = resname 3565 return openTag, closeTag
3566
3567 -def makeHTMLTags(tagStr):
3568 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3569 return _makeTags( tagStr, False )
3570
3571 -def makeXMLTags(tagStr):
3572 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3573 return _makeTags( tagStr, True )
3574
3575 -def withAttribute(*args,**attrDict):
3576 """Helper to create a validating parse action to be used with start tags created 3577 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3578 with a required attribute value, to avoid false matches on common tags such as 3579 C{<TD>} or C{<DIV>}. 3580 3581 Call C{withAttribute} with a series of attribute names and values. Specify the list 3582 of filter attributes names and values as: 3583 - keyword arguments, as in C{(align="right")}, or 3584 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3585 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3586 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3587 For attribute names with a namespace prefix, you must use the second form. Attribute 3588 names are matched insensitive to upper/lower case. 3589 3590 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3591 3592 To verify that the attribute exists, but without specifying a value, pass 3593 C{withAttribute.ANY_VALUE} as the value. 3594 """ 3595 if args: 3596 attrs = args[:] 3597 else: 3598 attrs = attrDict.items() 3599 attrs = [(k,v) for k,v in attrs] 3600 def pa(s,l,tokens): 3601 for attrName,attrValue in attrs: 3602 if attrName not in tokens: 3603 raise ParseException(s,l,"no matching attribute " + attrName) 3604 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3605 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3606 (attrName, tokens[attrName], attrValue))
3607 return pa 3608 withAttribute.ANY_VALUE = object()
3609 3610 -def withClass(classname, namespace=''):
3611 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3612 difficult because C{class} is a reserved word in Python. 3613 """ 3614 classattr = "%s:class" % namespace if namespace else "class" 3615 return withAttribute(**{classattr : classname})
3616 3617 opAssoc = _Constants() 3618 opAssoc.LEFT = object() 3619 opAssoc.RIGHT = object()
3620 3621 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3622 """Helper method for constructing grammars of expressions made up of 3623 operators working in a precedence hierarchy. Operators may be unary or 3624 binary, left- or right-associative. Parse actions can also be attached 3625 to operator expressions. 3626 3627 Parameters: 3628 - baseExpr - expression representing the most basic element for the nested 3629 - opList - list of tuples, one for each operator precedence level in the 3630 expression grammar; each tuple is of the form 3631 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3632 - opExpr is the pyparsing expression for the operator; 3633 may also be a string, which will be converted to a Literal; 3634 if numTerms is 3, opExpr is a tuple of two expressions, for the 3635 two operators separating the 3 terms 3636 - numTerms is the number of terms for this operator (must 3637 be 1, 2, or 3) 3638 - rightLeftAssoc is the indicator whether the operator is 3639 right or left associative, using the pyparsing-defined 3640 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3641 - parseAction is the parse action to be associated with 3642 expressions matching this operator expression (the 3643 parse action tuple member may be omitted) 3644 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3645 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3646 """ 3647 ret = Forward() 3648 lastExpr = baseExpr | ( lpar + ret + rpar ) 3649 for i,operDef in enumerate(opList): 3650 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3651 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3652 if arity == 3: 3653 if opExpr is None or len(opExpr) != 2: 3654 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3655 opExpr1, opExpr2 = opExpr 3656 thisExpr = Forward().setName(termName) 3657 if rightLeftAssoc == opAssoc.LEFT: 3658 if arity == 1: 3659 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3660 elif arity == 2: 3661 if opExpr is not None: 3662 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3663 else: 3664 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3665 elif arity == 3: 3666 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3667 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3668 else: 3669 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3670 elif rightLeftAssoc == opAssoc.RIGHT: 3671 if arity == 1: 3672 # try to avoid LR with this extra test 3673 if not isinstance(opExpr, Optional): 3674 opExpr = Optional(opExpr) 3675 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3676 elif arity == 2: 3677 if opExpr is not None: 3678 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3679 else: 3680 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3681 elif arity == 3: 3682 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3683 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3684 else: 3685 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3686 else: 3687 raise ValueError("operator must indicate right or left associativity") 3688 if pa: 3689 matchExpr.setParseAction( pa ) 3690 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3691 lastExpr = thisExpr 3692 ret <<= lastExpr 3693 return ret
3694 operatorPrecedence = infixNotation 3695 3696 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 3697 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 3698 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 3699 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 3700 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3701 3702 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3703 """Helper method for defining nested lists enclosed in opening and closing 3704 delimiters ("(" and ")" are the default). 3705 3706 Parameters: 3707 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3708 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3709 - content - expression for items within the nested lists (default=None) 3710 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3711 3712 If an expression is not provided for the content argument, the nested 3713 expression will capture all whitespace-delimited content between delimiters 3714 as a list of separate values. 3715 3716 Use the C{ignoreExpr} argument to define expressions that may contain 3717 opening or closing characters that should not be treated as opening 3718 or closing characters for nesting, such as quotedString or a comment 3719 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3720 The default is L{quotedString}, but if no expressions are to be ignored, 3721 then pass C{None} for this argument. 3722 """ 3723 if opener == closer: 3724 raise ValueError("opening and closing strings cannot be the same") 3725 if content is None: 3726 if isinstance(opener,basestring) and isinstance(closer,basestring): 3727 if len(opener) == 1 and len(closer)==1: 3728 if ignoreExpr is not None: 3729 content = (Combine(OneOrMore(~ignoreExpr + 3730 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3731 ).setParseAction(lambda t:t[0].strip())) 3732 else: 3733 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3734 ).setParseAction(lambda t:t[0].strip())) 3735 else: 3736 if ignoreExpr is not None: 3737 content = (Combine(OneOrMore(~ignoreExpr + 3738 ~Literal(opener) + ~Literal(closer) + 3739 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3740 ).setParseAction(lambda t:t[0].strip())) 3741 else: 3742 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3743 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3744 ).setParseAction(lambda t:t[0].strip())) 3745 else: 3746 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3747 ret = Forward() 3748 if ignoreExpr is not None: 3749 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3750 else: 3751 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3752 ret.setName('nested %s%s expression' % (opener,closer)) 3753 return ret
3754
3755 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3756 """Helper method for defining space-delimited indentation blocks, such as 3757 those used to define block statements in Python source code. 3758 3759 Parameters: 3760 - blockStatementExpr - expression defining syntax of statement that 3761 is repeated within the indented block 3762 - indentStack - list created by caller to manage indentation stack 3763 (multiple statementWithIndentedBlock expressions within a single grammar 3764 should share a common indentStack) 3765 - indent - boolean indicating whether block must be indented beyond the 3766 the current level; set to False for block of left-most statements 3767 (default=True) 3768 3769 A valid block must contain at least one C{blockStatement}. 3770 """ 3771 def checkPeerIndent(s,l,t): 3772 if l >= len(s): return 3773 curCol = col(l,s) 3774 if curCol != indentStack[-1]: 3775 if curCol > indentStack[-1]: 3776 raise ParseFatalException(s,l,"illegal nesting") 3777 raise ParseException(s,l,"not a peer entry")
3778 3779 def checkSubIndent(s,l,t): 3780 curCol = col(l,s) 3781 if curCol > indentStack[-1]: 3782 indentStack.append( curCol ) 3783 else: 3784 raise ParseException(s,l,"not a subentry") 3785 3786 def checkUnindent(s,l,t): 3787 if l >= len(s): return 3788 curCol = col(l,s) 3789 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3790 raise ParseException(s,l,"not an unindent") 3791 indentStack.pop() 3792 3793 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3794 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3795 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3796 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3797 if indent: 3798 smExpr = Group( Optional(NL) + 3799 #~ FollowedBy(blockStatementExpr) + 3800 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3801 else: 3802 smExpr = Group( Optional(NL) + 3803 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3804 blockStatementExpr.ignore(_bslash + LineEnd()) 3805 return smExpr.setName('indented block') 3806 3807 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3808 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3809 3810 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3811 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3812 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3813 -def replaceHTMLEntity(t):
3814 """Helper parser action to replace common HTML entities with their special characters""" 3815 return _htmlEntityMap.get(t.entity)
3816 3817 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3818 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 3819 3820 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3821 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3822 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 3823 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 3824 3825 javaStyleComment = cppStyleComment 3826 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3827 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3828 Optional( Word(" \t") + 3829 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3830 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3831 3832 3833 if __name__ == "__main__": 3834 3835 selectToken = CaselessLiteral( "select" ) 3836 fromToken = CaselessLiteral( "from" ) 3837 3838 ident = Word( alphas, alphanums + "_$" ) 3839 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3840 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3841 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3842 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3843 simpleSQL = ( selectToken + \ 3844 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3845 fromToken + \ 3846 tableNameList.setResultsName( "tables" ) ) 3847 3848 simpleSQL.runTests("""\ 3849 SELECT * from XYZZY, ABC 3850 select * from SYS.XYZZY 3851 Select A from Sys.dual 3852 Select AA,BB,CC from Sys.dual 3853 Select A, B, C from Sys.dual 3854 Select A, B, C from Sys.dual 3855 Xelect A, B, C from Sys.dual 3856 Select A, B, C frox Sys.dual 3857 Select 3858 Select ^^^ frox Sys.dual 3859 Select A, B, C from Sys.dual, Table2""") 3860