1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.2"
61 __versionTime__ = "29 Apr 2016 15:10 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import functools
74 import itertools
75 import traceback
76
77
78
79 __all__ = [
80 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
81 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
82 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
83 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
84 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
85 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
86 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
87 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
88 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
89 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
90 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
91 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
92 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
93 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
94 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
95 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
96 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
97 ]
98
99 PY_3 = sys.version.startswith('3')
100 if PY_3:
101 _MAX_INT = sys.maxsize
102 basestring = str
103 unichr = chr
104 _ustr = str
105
106
107 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
108
109 else:
110 _MAX_INT = sys.maxint
111 range = xrange
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
116 then < returns the unicode object | encodes it with the default encoding | ... >.
117 """
118 if isinstance(obj,unicode):
119 return obj
120
121 try:
122
123
124 return str(obj)
125
126 except UnicodeEncodeError:
127
128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
129 xmlcharref = Regex('&#\d+;')
130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
131 return xmlcharref.transformString(ret)
132
133
134 singleArgBuiltins = []
135 import __builtin__
136 for fname in "sum len sorted reversed list tuple set any all min max".split():
137 try:
138 singleArgBuiltins.append(getattr(__builtin__,fname))
139 except AttributeError:
140 continue
141
142 _generatorType = type((y for y in range(1)))
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147
148 from_symbols = '&><"\''
149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
156
157 alphas = string.ascii_uppercase + string.ascii_lowercase
158 nums = "0123456789"
159 hexnums = nums + "ABCDEFabcdef"
160 alphanums = alphas + nums
161 _bslash = chr(92)
162 printables = "".join(c for c in string.printable if c not in string.whitespace)
165 """base exception class for all parsing runtime exceptions"""
166
167
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc
170 if msg is None:
171 self.msg = pstr
172 self.pstr = ""
173 else:
174 self.msg = msg
175 self.pstr = pstr
176 self.parserElement = elem
177
179 """supported attributes by name are:
180 - lineno - returns the line number of the exception text
181 - col - returns the column number of the exception text
182 - line - returns the line containing the exception text
183 """
184 if( aname == "lineno" ):
185 return lineno( self.loc, self.pstr )
186 elif( aname in ("col", "column") ):
187 return col( self.loc, self.pstr )
188 elif( aname == "line" ):
189 return line( self.loc, self.pstr )
190 else:
191 raise AttributeError(aname)
192
194 return "%s (at char %d), (line:%d, col:%d)" % \
195 ( self.msg, self.loc, self.lineno, self.column )
209 return "lineno col line".split() + dir(type(self))
210
212 """exception thrown when parse expressions don't match class;
213 supported attributes by name are:
214 - lineno - returns the line number of the exception text
215 - col - returns the column number of the exception text
216 - line - returns the line containing the exception text
217 """
218 pass
219
221 """user-throwable exception thrown when inconsistent parse content
222 is found; stops all parsing immediately"""
223 pass
224
226 """just like C{L{ParseFatalException}}, but thrown internally when an
227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
228 an unbacktrackable syntax error has been found"""
232
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
260 return repr(self.tup)
262 self.tup = (self.tup[0],i)
263
265 """Structured parse results, to provide multiple means of access to the parsed data:
266 - as a list (C{len(results)})
267 - by list index (C{results[0], results[1]}, etc.)
268 - by attribute (C{results.<resultsName>})
269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls):
272 return toklist
273 retobj = object.__new__(cls)
274 retobj.__doinit = True
275 return retobj
276
277
278
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit:
281 self.__doinit = False
282 self.__name = None
283 self.__parent = None
284 self.__accumNames = {}
285 self.__asList = asList
286 self.__modal = modal
287 if toklist is None:
288 toklist = []
289 if isinstance(toklist, list):
290 self.__toklist = toklist[:]
291 elif isinstance(toklist, _generatorType):
292 self.__toklist = list(toklist)
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name)
302 self.__name = name
303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,(int,slice)):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353
354
355
356
357
358
359 for name,occurrences in self.__tokdict.items():
360 for j in removed:
361 for k, (value, position) in enumerate(occurrences):
362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
363 else:
364 del self.__tokdict[i]
365
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
375 """Returns all named result keys."""
376 if hasattr(self.__tokdict, "iterkeys"):
377 return self.__tokdict.iterkeys()
378 else:
379 return iter(self.__tokdict)
380
382 """Returns all named result values."""
383 return (self[k] for k in self.iterkeys())
384
386 return ((k, self[k]) for k in self.iterkeys())
387
388 if PY_3:
389 keys = iterkeys
390 values = itervalues
391 items = iteritems
392 else:
394 """Returns all named result keys."""
395 return list(self.iterkeys())
396
398 """Returns all named result values."""
399 return list(self.itervalues())
400
402 """Returns all named result keys and values as a list of tuples."""
403 return list(self.iteritems())
404
406 """Since keys() returns an iterator, this method is helpful in bypassing
407 code that looks for the existence of any defined results names."""
408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last).
412 Supports both list and dict semantics for pop(). If passed no
413 argument or an integer argument, it will use list semantics
414 and pop tokens from the list of parsed tokens. If passed a
415 non-integer argument (most likely a string), it will use dict
416 semantics and pop the corresponding value from any defined
417 results names. A second default return value argument is
418 supported, just as in dict.pop()."""
419 if not args:
420 args = [-1]
421 for k,v in kwargs.items():
422 if k == 'default':
423 args = (args[0], v)
424 else:
425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
426 if (isinstance(args[0], int) or
427 len(args) == 1 or
428 args[0] in self):
429 index = args[0]
430 ret = self[index]
431 del self[index]
432 return ret
433 else:
434 defaultvalue = args[1]
435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no
439 such name, then returns the given C{defaultValue} or C{None} if no
440 C{defaultValue} is specified."""
441 if key in self:
442 return self[key]
443 else:
444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens."""
448 self.__toklist.insert(index, insStr)
449
450
451
452
453
454 for name,occurrences in self.__tokdict.items():
455 for k, (value, position) in enumerate(occurrences):
456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
459 """Add single element to end of ParseResults list of elements."""
460 self.__toklist.append(item)
461
463 """Add sequence of elements to end of ParseResults list of elements."""
464 if isinstance(itemseq, ParseResults):
465 self += itemseq
466 else:
467 self.__toklist.extend(itemseq)
468
470 """Clear all elements and results names."""
471 del self.__toklist[:]
472 self.__tokdict.clear()
473
475 try:
476 return self[name]
477 except KeyError:
478 return ""
479
480 if name in self.__tokdict:
481 if name not in self.__accumNames:
482 return self.__tokdict[name][-1][0]
483 else:
484 return ParseResults([ v[0] for v in self.__tokdict[name] ])
485 else:
486 return ""
487
489 ret = self.copy()
490 ret += other
491 return ret
492
494 if other.__tokdict:
495 offset = len(self.__toklist)
496 addoffset = lambda a: offset if a<0 else a+offset
497 otheritems = other.__tokdict.items()
498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
499 for (k,vlist) in otheritems for v in vlist]
500 for k,v in otherdictitems:
501 self[k] = v
502 if isinstance(v[0],ParseResults):
503 v[0].__parent = wkref(self)
504
505 self.__toklist += other.__toklist
506 self.__accumNames.update( other.__accumNames )
507 return self
508
510 if isinstance(other,int) and other == 0:
511
512 return self.copy()
513 else:
514
515 return other + self
516
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
524 out = []
525 for item in self.__toklist:
526 if out and sep:
527 out.append(sep)
528 if isinstance( item, ParseResults ):
529 out += item._asStringList()
530 else:
531 out.append( _ustr(item) )
532 return out
533
535 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
539 """Returns the named parse results as a nested dictionary."""
540 if PY_3:
541 item_fn = self.items
542 else:
543 item_fn = self.iteritems
544
545 def toItem(obj):
546 if isinstance(obj, ParseResults):
547 if obj.haskeys():
548 return obj.asDict()
549 else:
550 return [toItem(v) for v in obj]
551 else:
552 return obj
553
554 return dict((k,toItem(v)) for k,v in item_fn())
555
557 """Returns a new copy of a C{ParseResults} object."""
558 ret = ParseResults( self.__toklist )
559 ret.__tokdict = self.__tokdict.copy()
560 ret.__parent = self.__parent
561 ret.__accumNames.update( self.__accumNames )
562 ret.__name = self.__name
563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
567 nl = "\n"
568 out = []
569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
570 for v in vlist)
571 nextLevelIndent = indent + " "
572
573
574 if not formatted:
575 indent = ""
576 nextLevelIndent = ""
577 nl = ""
578
579 selfTag = None
580 if doctag is not None:
581 selfTag = doctag
582 else:
583 if self.__name:
584 selfTag = self.__name
585
586 if not selfTag:
587 if namedItemsOnly:
588 return ""
589 else:
590 selfTag = "ITEM"
591
592 out += [ nl, indent, "<", selfTag, ">" ]
593
594 for i,res in enumerate(self.__toklist):
595 if isinstance(res,ParseResults):
596 if i in namedItems:
597 out += [ res.asXML(namedItems[i],
598 namedItemsOnly and doctag is None,
599 nextLevelIndent,
600 formatted)]
601 else:
602 out += [ res.asXML(None,
603 namedItemsOnly and doctag is None,
604 nextLevelIndent,
605 formatted)]
606 else:
607
608 resTag = None
609 if i in namedItems:
610 resTag = namedItems[i]
611 if not resTag:
612 if namedItemsOnly:
613 continue
614 else:
615 resTag = "ITEM"
616 xmlBodyText = _xml_escape(_ustr(res))
617 out += [ nl, nextLevelIndent, "<", resTag, ">",
618 xmlBodyText,
619 "</", resTag, ">" ]
620
621 out += [ nl, indent, "</", selfTag, ">" ]
622 return "".join(out)
623
625 for k,vlist in self.__tokdict.items():
626 for v,loc in vlist:
627 if sub is v:
628 return k
629 return None
630
632 """Returns the results name for this token expression."""
633 if self.__name:
634 return self.__name
635 elif self.__parent:
636 par = self.__parent()
637 if par:
638 return par.__lookup(self)
639 else:
640 return None
641 elif (len(self) == 1 and
642 len(self.__tokdict) == 1 and
643 self.__tokdict.values()[0][0][1] in (0,-1)):
644 return self.__tokdict.keys()[0]
645 else:
646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}.
650 Accepts an optional C{indent} argument so that this string can be embedded
651 in a nested display of other data."""
652 out = []
653 NL = '\n'
654 out.append( indent+_ustr(self.asList()) )
655 if self.haskeys():
656 items = sorted(self.items())
657 for k,v in items:
658 if out:
659 out.append(NL)
660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
661 if isinstance(v,ParseResults):
662 if v:
663 out.append( v.dump(indent,depth+1) )
664 else:
665 out.append(_ustr(v))
666 else:
667 out.append(_ustr(v))
668 elif any(isinstance(vv,ParseResults) for vv in self):
669 v = self
670 for i,vv in enumerate(v):
671 if isinstance(vv,ParseResults):
672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
673 else:
674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
675
676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module.
680 Accepts additional positional or keyword args as defined for the
681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
682 pprint.pprint(self.asList(), *args, **kwargs)
683
684
686 return ( self.__toklist,
687 ( self.__tokdict.copy(),
688 self.__parent is not None and self.__parent() or None,
689 self.__accumNames,
690 self.__name ) )
691
693 self.__toklist = state[0]
694 (self.__tokdict,
695 par,
696 inAccumNames,
697 self.__name) = state[1]
698 self.__accumNames = {}
699 self.__accumNames.update(inAccumNames)
700 if par is not None:
701 self.__parent = wkref(par)
702 else:
703 self.__parent = None
704
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
709 return (dir(type(self)) + list(self.keys()))
710
711 collections.MutableMapping.register(ParseResults)
712
713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators.
715 The first column is number 1.
716
717 Note: the default parsing behavior is to expand tabs in the input string
718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
720 consistent view of the parsed string, the parse location, and line and column
721 positions within the parsed string.
722 """
723 s = strg
724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
727 """Returns current line number within a string, counting newlines as line separators.
728 The first line is number 1.
729
730 Note: the default parsing behavior is to expand tabs in the input string
731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
733 consistent view of the parsed string, the parse location, and line and column
734 positions within the parsed string.
735 """
736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators.
740 """
741 lastCR = strg.rfind("\n", 0, loc)
742 nextCR = strg.find("\n", loc)
743 if nextCR >= 0:
744 return strg[lastCR+1:nextCR]
745 else:
746 return strg[lastCR+1:]
747
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
755 print ("Exception raised:" + _ustr(exc))
756
758 """'Do-nothing' debug action, to suppress debugging output during parsing."""
759 pass
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783 'decorator to trim function calls to match the arity of the target'
785 if func in singleArgBuiltins:
786 return lambda s,l,t: func(t)
787 limit = [0]
788 foundArity = [False]
789
790 if PY_3:
791 def extract_stack():
792 frame_summary = traceback.extract_stack()[-3]
793 return [(frame_summary.filename, frame_summary.lineno)]
794 def extract_tb(tb):
795 frames = traceback.extract_tb(tb)
796 frame_summary = frames[-1]
797 return [(frame_summary.filename, frame_summary.lineno)]
798 else:
799 extract_stack = traceback.extract_stack
800 extract_tb = traceback.extract_tb
801
802
803
804
805
806
807
808 this_line = extract_stack()[-1]
809 pa_call_line_synth = (this_line[0], this_line[1]+6)
810
811 def wrapper(*args):
812 while 1:
813 try:
814 ret = func(*args[limit[0]:])
815 foundArity[0] = True
816 return ret
817 except TypeError:
818
819 if foundArity[0]:
820 raise
821 else:
822 try:
823 tb = sys.exc_info()[-1]
824 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
825 raise
826 finally:
827 del tb
828
829 if limit[0] <= maxargs:
830 limit[0] += 1
831 continue
832 raise
833 return wrapper
834
836 """Abstract base level parser element class."""
837 DEFAULT_WHITE_CHARS = " \n\t\r"
838 verbose_stacktrace = False
839
840 @staticmethod
845
846 @staticmethod
848 """
849 Set class to be used for inclusion of string literals into a parser.
850 """
851 ParserElement.literalStringClass = cls
852
854 self.parseAction = list()
855 self.failAction = None
856
857 self.strRepr = None
858 self.resultsName = None
859 self.saveAsList = savelist
860 self.skipWhitespace = True
861 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
862 self.copyDefaultWhiteChars = True
863 self.mayReturnEmpty = False
864 self.keepTabs = False
865 self.ignoreExprs = list()
866 self.debug = False
867 self.streamlined = False
868 self.mayIndexError = True
869 self.errmsg = ""
870 self.modalResults = True
871 self.debugActions = ( None, None, None )
872 self.re = None
873 self.callPreparse = True
874 self.callDuringTry = False
875
877 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
878 for the same parsing pattern, using copies of the original parse element."""
879 cpy = copy.copy( self )
880 cpy.parseAction = self.parseAction[:]
881 cpy.ignoreExprs = self.ignoreExprs[:]
882 if self.copyDefaultWhiteChars:
883 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
884 return cpy
885
887 """Define name for this expression, for use in debugging."""
888 self.name = name
889 self.errmsg = "Expected " + self.name
890 if hasattr(self,"exception"):
891 self.exception.msg = self.errmsg
892 return self
893
895 """Define name for referencing matching tokens as a nested attribute
896 of the returned parse results.
897 NOTE: this returns a *copy* of the original C{ParserElement} object;
898 this is so that the client can define a basic element, such as an
899 integer, and reference it in multiple places with different names.
900
901 You can also set results names using the abbreviated syntax,
902 C{expr("name")} in place of C{expr.setResultsName("name")} -
903 see L{I{__call__}<__call__>}.
904 """
905 newself = self.copy()
906 if name.endswith("*"):
907 name = name[:-1]
908 listAllMatches=True
909 newself.resultsName = name
910 newself.modalResults = not listAllMatches
911 return newself
912
914 """Method to invoke the Python pdb debugger when this element is
915 about to be parsed. Set C{breakFlag} to True to enable, False to
916 disable.
917 """
918 if breakFlag:
919 _parseMethod = self._parse
920 def breaker(instring, loc, doActions=True, callPreParse=True):
921 import pdb
922 pdb.set_trace()
923 return _parseMethod( instring, loc, doActions, callPreParse )
924 breaker._originalParseMethod = _parseMethod
925 self._parse = breaker
926 else:
927 if hasattr(self._parse,"_originalParseMethod"):
928 self._parse = self._parse._originalParseMethod
929 return self
930
932 """Define action to perform when successfully matching parse element definition.
933 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
934 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
935 - s = the original string being parsed (see note below)
936 - loc = the location of the matching substring
937 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
938 If the functions in fns modify the tokens, they can return them as the return
939 value from fn, and the modified list of tokens will replace the original.
940 Otherwise, fn does not need to return any value.
941
942 Note: the default parsing behavior is to expand tabs in the input string
943 before starting the parsing process. See L{I{parseString}<parseString>} for more information
944 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
945 consistent view of the parsed string, the parse location, and line and column
946 positions within the parsed string.
947 """
948 self.parseAction = list(map(_trim_arity, list(fns)))
949 self.callDuringTry = kwargs.get("callDuringTry", False)
950 return self
951
953 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
954 self.parseAction += list(map(_trim_arity, list(fns)))
955 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
956 return self
957
959 """Add a boolean predicate function to expression's list of parse actions. See
960 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can
961 be used to define a custom message to be used in the raised exception."""
962 msg = kwargs.get("message") or "failed user-defined condition"
963 for fn in fns:
964 def pa(s,l,t):
965 if not bool(_trim_arity(fn)(s,l,t)):
966 raise ParseException(s,l,msg)
967 return t
968 self.parseAction.append(pa)
969 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
970 return self
971
973 """Define action to perform if parsing fails at this expression.
974 Fail acton fn is a callable function that takes the arguments
975 C{fn(s,loc,expr,err)} where:
976 - s = string being parsed
977 - loc = location where expression match was attempted and failed
978 - expr = the parse expression that failed
979 - err = the exception thrown
980 The function returns no value. It may throw C{L{ParseFatalException}}
981 if it is desired to stop parsing immediately."""
982 self.failAction = fn
983 return self
984
986 exprsFound = True
987 while exprsFound:
988 exprsFound = False
989 for e in self.ignoreExprs:
990 try:
991 while 1:
992 loc,dummy = e._parse( instring, loc )
993 exprsFound = True
994 except ParseException:
995 pass
996 return loc
997
999 if self.ignoreExprs:
1000 loc = self._skipIgnorables( instring, loc )
1001
1002 if self.skipWhitespace:
1003 wt = self.whiteChars
1004 instrlen = len(instring)
1005 while loc < instrlen and instring[loc] in wt:
1006 loc += 1
1007
1008 return loc
1009
1010 - def parseImpl( self, instring, loc, doActions=True ):
1012
1013 - def postParse( self, instring, loc, tokenlist ):
1015
1016
1017 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1018 debugging = ( self.debug )
1019
1020 if debugging or self.failAction:
1021
1022 if (self.debugActions[0] ):
1023 self.debugActions[0]( instring, loc, self )
1024 if callPreParse and self.callPreparse:
1025 preloc = self.preParse( instring, loc )
1026 else:
1027 preloc = loc
1028 tokensStart = preloc
1029 try:
1030 try:
1031 loc,tokens = self.parseImpl( instring, preloc, doActions )
1032 except IndexError:
1033 raise ParseException( instring, len(instring), self.errmsg, self )
1034 except ParseBaseException as err:
1035
1036 if self.debugActions[2]:
1037 self.debugActions[2]( instring, tokensStart, self, err )
1038 if self.failAction:
1039 self.failAction( instring, tokensStart, self, err )
1040 raise
1041 else:
1042 if callPreParse and self.callPreparse:
1043 preloc = self.preParse( instring, loc )
1044 else:
1045 preloc = loc
1046 tokensStart = preloc
1047 if self.mayIndexError or loc >= len(instring):
1048 try:
1049 loc,tokens = self.parseImpl( instring, preloc, doActions )
1050 except IndexError:
1051 raise ParseException( instring, len(instring), self.errmsg, self )
1052 else:
1053 loc,tokens = self.parseImpl( instring, preloc, doActions )
1054
1055 tokens = self.postParse( instring, loc, tokens )
1056
1057 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1058 if self.parseAction and (doActions or self.callDuringTry):
1059 if debugging:
1060 try:
1061 for fn in self.parseAction:
1062 tokens = fn( instring, tokensStart, retTokens )
1063 if tokens is not None:
1064 retTokens = ParseResults( tokens,
1065 self.resultsName,
1066 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1067 modal=self.modalResults )
1068 except ParseBaseException as err:
1069
1070 if (self.debugActions[2] ):
1071 self.debugActions[2]( instring, tokensStart, self, err )
1072 raise
1073 else:
1074 for fn in self.parseAction:
1075 tokens = fn( instring, tokensStart, retTokens )
1076 if tokens is not None:
1077 retTokens = ParseResults( tokens,
1078 self.resultsName,
1079 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1080 modal=self.modalResults )
1081
1082 if debugging:
1083
1084 if (self.debugActions[1] ):
1085 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1086
1087 return loc, retTokens
1088
1094
1096 try:
1097 self.tryParse(instring, loc)
1098 except (ParseException, IndexError):
1099 return False
1100 else:
1101 return True
1102
1103
1104
1105 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1106 lookup = (self,instring,loc,callPreParse,doActions)
1107 if lookup in ParserElement._exprArgCache:
1108 value = ParserElement._exprArgCache[ lookup ]
1109 if isinstance(value, Exception):
1110 raise value
1111 return (value[0],value[1].copy())
1112 else:
1113 try:
1114 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1115 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1116 return value
1117 except ParseBaseException as pe:
1118 pe.__traceback__ = None
1119 ParserElement._exprArgCache[ lookup ] = pe
1120 raise
1121
1122 _parse = _parseNoCache
1123
1124
1125 _exprArgCache = {}
1126 @staticmethod
1129
1130 _packratEnabled = False
1131 @staticmethod
1133 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1134 Repeated parse attempts at the same string location (which happens
1135 often in many complex grammars) can immediately return a cached value,
1136 instead of re-executing parsing/validating code. Memoizing is done of
1137 both valid results and parsing exceptions.
1138
1139 This speedup may break existing programs that use parse actions that
1140 have side-effects. For this reason, packrat parsing is disabled when
1141 you first import pyparsing. To activate the packrat feature, your
1142 program must call the class method C{ParserElement.enablePackrat()}. If
1143 your program uses C{psyco} to "compile as you go", you must call
1144 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1145 Python will crash. For best results, call C{enablePackrat()} immediately
1146 after importing pyparsing.
1147 """
1148 if not ParserElement._packratEnabled:
1149 ParserElement._packratEnabled = True
1150 ParserElement._parse = ParserElement._parseCache
1151
1153 """Execute the parse expression with the given string.
1154 This is the main interface to the client code, once the complete
1155 expression has been built.
1156
1157 If you want the grammar to require that the entire input string be
1158 successfully parsed, then set C{parseAll} to True (equivalent to ending
1159 the grammar with C{L{StringEnd()}}).
1160
1161 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1162 in order to report proper column numbers in parse actions.
1163 If the input string contains tabs and
1164 the grammar uses parse actions that use the C{loc} argument to index into the
1165 string being parsed, you can ensure you have a consistent view of the input
1166 string by:
1167 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1168 (see L{I{parseWithTabs}<parseWithTabs>})
1169 - define your parse action using the full C{(s,loc,toks)} signature, and
1170 reference the input string using the parse action's C{s} argument
1171 - explictly expand the tabs in your input string before calling
1172 C{parseString}
1173 """
1174 ParserElement.resetCache()
1175 if not self.streamlined:
1176 self.streamline()
1177
1178 for e in self.ignoreExprs:
1179 e.streamline()
1180 if not self.keepTabs:
1181 instring = instring.expandtabs()
1182 try:
1183 loc, tokens = self._parse( instring, 0 )
1184 if parseAll:
1185 loc = self.preParse( instring, loc )
1186 se = Empty() + StringEnd()
1187 se._parse( instring, loc )
1188 except ParseBaseException as exc:
1189 if ParserElement.verbose_stacktrace:
1190 raise
1191 else:
1192
1193 raise exc
1194 else:
1195 return tokens
1196
1198 """Scan the input string for expression matches. Each match will return the
1199 matching tokens, start location, and end location. May be called with optional
1200 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1201 C{overlap} is specified, then overlapping matches will be reported.
1202
1203 Note that the start and end locations are reported relative to the string
1204 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1205 strings with embedded tabs."""
1206 if not self.streamlined:
1207 self.streamline()
1208 for e in self.ignoreExprs:
1209 e.streamline()
1210
1211 if not self.keepTabs:
1212 instring = _ustr(instring).expandtabs()
1213 instrlen = len(instring)
1214 loc = 0
1215 preparseFn = self.preParse
1216 parseFn = self._parse
1217 ParserElement.resetCache()
1218 matches = 0
1219 try:
1220 while loc <= instrlen and matches < maxMatches:
1221 try:
1222 preloc = preparseFn( instring, loc )
1223 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1224 except ParseException:
1225 loc = preloc+1
1226 else:
1227 if nextLoc > loc:
1228 matches += 1
1229 yield tokens, preloc, nextLoc
1230 if overlap:
1231 nextloc = preparseFn( instring, loc )
1232 if nextloc > loc:
1233 loc = nextLoc
1234 else:
1235 loc += 1
1236 else:
1237 loc = nextLoc
1238 else:
1239 loc = preloc+1
1240 except ParseBaseException as exc:
1241 if ParserElement.verbose_stacktrace:
1242 raise
1243 else:
1244
1245 raise exc
1246
1279
1281 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1282 to match the given parse expression. May be called with optional
1283 C{maxMatches} argument, to clip searching after 'n' matches are found.
1284 """
1285 try:
1286 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1287 except ParseBaseException as exc:
1288 if ParserElement.verbose_stacktrace:
1289 raise
1290 else:
1291
1292 raise exc
1293
1295 """Implementation of + operator - returns C{L{And}}"""
1296 if isinstance( other, basestring ):
1297 other = ParserElement.literalStringClass( other )
1298 if not isinstance( other, ParserElement ):
1299 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1300 SyntaxWarning, stacklevel=2)
1301 return None
1302 return And( [ self, other ] )
1303
1305 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1306 if isinstance( other, basestring ):
1307 other = ParserElement.literalStringClass( other )
1308 if not isinstance( other, ParserElement ):
1309 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1310 SyntaxWarning, stacklevel=2)
1311 return None
1312 return other + self
1313
1315 """Implementation of - operator, returns C{L{And}} with error stop"""
1316 if isinstance( other, basestring ):
1317 other = ParserElement.literalStringClass( other )
1318 if not isinstance( other, ParserElement ):
1319 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1320 SyntaxWarning, stacklevel=2)
1321 return None
1322 return And( [ self, And._ErrorStop(), other ] )
1323
1325 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1326 if isinstance( other, basestring ):
1327 other = ParserElement.literalStringClass( other )
1328 if not isinstance( other, ParserElement ):
1329 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1330 SyntaxWarning, stacklevel=2)
1331 return None
1332 return other - self
1333
1335 """Implementation of * operator, allows use of C{expr * 3} in place of
1336 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1337 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1338 may also include C{None} as in:
1339 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1340 to C{expr*n + L{ZeroOrMore}(expr)}
1341 (read as "at least n instances of C{expr}")
1342 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1343 (read as "0 to n instances of C{expr}")
1344 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1345 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1346
1347 Note that C{expr*(None,n)} does not raise an exception if
1348 more than n exprs exist in the input stream; that is,
1349 C{expr*(None,n)} does not enforce a maximum number of expr
1350 occurrences. If this behavior is desired, then write
1351 C{expr*(None,n) + ~expr}
1352
1353 """
1354 if isinstance(other,int):
1355 minElements, optElements = other,0
1356 elif isinstance(other,tuple):
1357 other = (other + (None, None))[:2]
1358 if other[0] is None:
1359 other = (0, other[1])
1360 if isinstance(other[0],int) and other[1] is None:
1361 if other[0] == 0:
1362 return ZeroOrMore(self)
1363 if other[0] == 1:
1364 return OneOrMore(self)
1365 else:
1366 return self*other[0] + ZeroOrMore(self)
1367 elif isinstance(other[0],int) and isinstance(other[1],int):
1368 minElements, optElements = other
1369 optElements -= minElements
1370 else:
1371 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1372 else:
1373 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1374
1375 if minElements < 0:
1376 raise ValueError("cannot multiply ParserElement by negative value")
1377 if optElements < 0:
1378 raise ValueError("second tuple value must be greater or equal to first tuple value")
1379 if minElements == optElements == 0:
1380 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1381
1382 if (optElements):
1383 def makeOptionalList(n):
1384 if n>1:
1385 return Optional(self + makeOptionalList(n-1))
1386 else:
1387 return Optional(self)
1388 if minElements:
1389 if minElements == 1:
1390 ret = self + makeOptionalList(optElements)
1391 else:
1392 ret = And([self]*minElements) + makeOptionalList(optElements)
1393 else:
1394 ret = makeOptionalList(optElements)
1395 else:
1396 if minElements == 1:
1397 ret = self
1398 else:
1399 ret = And([self]*minElements)
1400 return ret
1401
1404
1406 """Implementation of | operator - returns C{L{MatchFirst}}"""
1407 if isinstance( other, basestring ):
1408 other = ParserElement.literalStringClass( other )
1409 if not isinstance( other, ParserElement ):
1410 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1411 SyntaxWarning, stacklevel=2)
1412 return None
1413 return MatchFirst( [ self, other ] )
1414
1416 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1417 if isinstance( other, basestring ):
1418 other = ParserElement.literalStringClass( other )
1419 if not isinstance( other, ParserElement ):
1420 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1421 SyntaxWarning, stacklevel=2)
1422 return None
1423 return other | self
1424
1426 """Implementation of ^ operator - returns C{L{Or}}"""
1427 if isinstance( other, basestring ):
1428 other = ParserElement.literalStringClass( other )
1429 if not isinstance( other, ParserElement ):
1430 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1431 SyntaxWarning, stacklevel=2)
1432 return None
1433 return Or( [ self, other ] )
1434
1436 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1437 if isinstance( other, basestring ):
1438 other = ParserElement.literalStringClass( other )
1439 if not isinstance( other, ParserElement ):
1440 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1441 SyntaxWarning, stacklevel=2)
1442 return None
1443 return other ^ self
1444
1446 """Implementation of & operator - returns C{L{Each}}"""
1447 if isinstance( other, basestring ):
1448 other = ParserElement.literalStringClass( other )
1449 if not isinstance( other, ParserElement ):
1450 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1451 SyntaxWarning, stacklevel=2)
1452 return None
1453 return Each( [ self, other ] )
1454
1456 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1457 if isinstance( other, basestring ):
1458 other = ParserElement.literalStringClass( other )
1459 if not isinstance( other, ParserElement ):
1460 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1461 SyntaxWarning, stacklevel=2)
1462 return None
1463 return other & self
1464
1466 """Implementation of ~ operator - returns C{L{NotAny}}"""
1467 return NotAny( self )
1468
1470 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1471 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1472 could be written as::
1473 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1474
1475 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1476 passed as C{True}.
1477
1478 If C{name} is omitted, same as calling C{L{copy}}.
1479 """
1480 if name is not None:
1481 return self.setResultsName(name)
1482 else:
1483 return self.copy()
1484
1486 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1487 cluttering up returned output.
1488 """
1489 return Suppress( self )
1490
1492 """Disables the skipping of whitespace before matching the characters in the
1493 C{ParserElement}'s defined pattern. This is normally only used internally by
1494 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1495 """
1496 self.skipWhitespace = False
1497 return self
1498
1500 """Overrides the default whitespace chars
1501 """
1502 self.skipWhitespace = True
1503 self.whiteChars = chars
1504 self.copyDefaultWhiteChars = False
1505 return self
1506
1508 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1509 Must be called before C{parseString} when the input grammar contains elements that
1510 match C{<TAB>} characters."""
1511 self.keepTabs = True
1512 return self
1513
1515 """Define expression to be ignored (e.g., comments) while doing pattern
1516 matching; may be called repeatedly, to define multiple comment or other
1517 ignorable patterns.
1518 """
1519 if isinstance(other, basestring):
1520 other = Suppress(other)
1521
1522 if isinstance( other, Suppress ):
1523 if other not in self.ignoreExprs:
1524 self.ignoreExprs.append(other)
1525 else:
1526 self.ignoreExprs.append( Suppress( other.copy() ) )
1527 return self
1528
1529 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1530 """Enable display of debugging messages while doing pattern matching."""
1531 self.debugActions = (startAction or _defaultStartDebugAction,
1532 successAction or _defaultSuccessDebugAction,
1533 exceptionAction or _defaultExceptionDebugAction)
1534 self.debug = True
1535 return self
1536
1538 """Enable display of debugging messages while doing pattern matching.
1539 Set C{flag} to True to enable, False to disable."""
1540 if flag:
1541 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1542 else:
1543 self.debug = False
1544 return self
1545
1548
1551
1553 self.streamlined = True
1554 self.strRepr = None
1555 return self
1556
1559
1560 - def validate( self, validateTrace=[] ):
1561 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1562 self.checkRecursion( [] )
1563
1564 - def parseFile( self, file_or_filename, parseAll=False ):
1565 """Execute the parse expression on the given file or filename.
1566 If a filename is specified (instead of a file object),
1567 the entire file is opened, read, and closed before parsing.
1568 """
1569 try:
1570 file_contents = file_or_filename.read()
1571 except AttributeError:
1572 f = open(file_or_filename, "r")
1573 file_contents = f.read()
1574 f.close()
1575 try:
1576 return self.parseString(file_contents, parseAll)
1577 except ParseBaseException as exc:
1578 if ParserElement.verbose_stacktrace:
1579 raise
1580 else:
1581
1582 raise exc
1583
1585 if isinstance(other, ParserElement):
1586 return self is other or vars(self) == vars(other)
1587 elif isinstance(other, basestring):
1588 try:
1589 self.parseString(_ustr(other), parseAll=True)
1590 return True
1591 except ParseBaseException:
1592 return False
1593 else:
1594 return super(ParserElement,self)==other
1595
1597 return not (self == other)
1598
1600 return hash(id(self))
1601
1603 return self == other
1604
1606 return not (self == other)
1607
1608 - def runTests(self, tests, parseAll=False):
1609 """Execute the parse expression on a series of test strings, showing each
1610 test, the parsed results or where the parse failed. Quick and easy way to
1611 run a parse expression against a list of sample strings.
1612
1613 Parameters:
1614 - tests - a list of separate test strings, or a multiline string of test strings
1615 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
1616 """
1617 if isinstance(tests, basestring):
1618 tests = map(str.strip, tests.splitlines())
1619 for t in tests:
1620 out = [t]
1621 try:
1622 out.append(self.parseString(t, parseAll=parseAll).dump())
1623 except ParseException as pe:
1624 if '\n' in t:
1625 out.append(line(pe.loc, t))
1626 out.append(' '*(col(pe.loc,t)-1) + '^')
1627 else:
1628 out.append(' '*pe.loc + '^')
1629 out.append(str(pe))
1630 out.append('')
1631 print('\n'.join(out))
1632
1633
1634 -class Token(ParserElement):
1635 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1638
1639
1640 -class Empty(Token):
1641 """An empty token, will always match."""
1643 super(Empty,self).__init__()
1644 self.name = "Empty"
1645 self.mayReturnEmpty = True
1646 self.mayIndexError = False
1647
1650 """A token that will never match."""
1652 super(NoMatch,self).__init__()
1653 self.name = "NoMatch"
1654 self.mayReturnEmpty = True
1655 self.mayIndexError = False
1656 self.errmsg = "Unmatchable token"
1657
1658 - def parseImpl( self, instring, loc, doActions=True ):
1660
1663 """Token to exactly match a specified string."""
1665 super(Literal,self).__init__()
1666 self.match = matchString
1667 self.matchLen = len(matchString)
1668 try:
1669 self.firstMatchChar = matchString[0]
1670 except IndexError:
1671 warnings.warn("null string passed to Literal; use Empty() instead",
1672 SyntaxWarning, stacklevel=2)
1673 self.__class__ = Empty
1674 self.name = '"%s"' % _ustr(self.match)
1675 self.errmsg = "Expected " + self.name
1676 self.mayReturnEmpty = False
1677 self.mayIndexError = False
1678
1679
1680
1681
1682
1683 - def parseImpl( self, instring, loc, doActions=True ):
1684 if (instring[loc] == self.firstMatchChar and
1685 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1686 return loc+self.matchLen, self.match
1687 raise ParseException(instring, loc, self.errmsg, self)
1688 _L = Literal
1689 ParserElement.literalStringClass = Literal
1692 """Token to exactly match a specified string as a keyword, that is, it must be
1693 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1694 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1695 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1696 Accepts two optional constructor arguments in addition to the keyword string:
1697 C{identChars} is a string of characters that would be valid identifier characters,
1698 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1699 matching, default is C{False}.
1700 """
1701 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1702
1704 super(Keyword,self).__init__()
1705 self.match = matchString
1706 self.matchLen = len(matchString)
1707 try:
1708 self.firstMatchChar = matchString[0]
1709 except IndexError:
1710 warnings.warn("null string passed to Keyword; use Empty() instead",
1711 SyntaxWarning, stacklevel=2)
1712 self.name = '"%s"' % self.match
1713 self.errmsg = "Expected " + self.name
1714 self.mayReturnEmpty = False
1715 self.mayIndexError = False
1716 self.caseless = caseless
1717 if caseless:
1718 self.caselessmatch = matchString.upper()
1719 identChars = identChars.upper()
1720 self.identChars = set(identChars)
1721
1722 - def parseImpl( self, instring, loc, doActions=True ):
1723 if self.caseless:
1724 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1725 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1726 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1727 return loc+self.matchLen, self.match
1728 else:
1729 if (instring[loc] == self.firstMatchChar and
1730 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1731 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1732 (loc == 0 or instring[loc-1] not in self.identChars) ):
1733 return loc+self.matchLen, self.match
1734 raise ParseException(instring, loc, self.errmsg, self)
1735
1740
1741 @staticmethod
1746
1748 """Token to match a specified string, ignoring case of letters.
1749 Note: the matched results will always be in the case of the given
1750 match string, NOT the case of the input text.
1751 """
1753 super(CaselessLiteral,self).__init__( matchString.upper() )
1754
1755 self.returnString = matchString
1756 self.name = "'%s'" % self.returnString
1757 self.errmsg = "Expected " + self.name
1758
1759 - def parseImpl( self, instring, loc, doActions=True ):
1760 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1761 return loc+self.matchLen, self.returnString
1762 raise ParseException(instring, loc, self.errmsg, self)
1763
1767
1768 - def parseImpl( self, instring, loc, doActions=True ):
1769 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1770 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1771 return loc+self.matchLen, self.match
1772 raise ParseException(instring, loc, self.errmsg, self)
1773
1775 """Token for matching words composed of allowed character sets.
1776 Defined with string containing all allowed initial characters,
1777 an optional string containing allowed body characters (if omitted,
1778 defaults to the initial character set), and an optional minimum,
1779 maximum, and/or exact length. The default value for C{min} is 1 (a
1780 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1781 are 0, meaning no maximum or exact length restriction. An optional
1782 C{excludeChars} parameter can list characters that might be found in
1783 the input C{bodyChars} string; useful to define a word of all printables
1784 except for one or two characters, for instance.
1785 """
1786 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1787 super(Word,self).__init__()
1788 if excludeChars:
1789 initChars = ''.join(c for c in initChars if c not in excludeChars)
1790 if bodyChars:
1791 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1792 self.initCharsOrig = initChars
1793 self.initChars = set(initChars)
1794 if bodyChars :
1795 self.bodyCharsOrig = bodyChars
1796 self.bodyChars = set(bodyChars)
1797 else:
1798 self.bodyCharsOrig = initChars
1799 self.bodyChars = set(initChars)
1800
1801 self.maxSpecified = max > 0
1802
1803 if min < 1:
1804 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1805
1806 self.minLen = min
1807
1808 if max > 0:
1809 self.maxLen = max
1810 else:
1811 self.maxLen = _MAX_INT
1812
1813 if exact > 0:
1814 self.maxLen = exact
1815 self.minLen = exact
1816
1817 self.name = _ustr(self)
1818 self.errmsg = "Expected " + self.name
1819 self.mayIndexError = False
1820 self.asKeyword = asKeyword
1821
1822 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1823 if self.bodyCharsOrig == self.initCharsOrig:
1824 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1825 elif len(self.initCharsOrig) == 1:
1826 self.reString = "%s[%s]*" % \
1827 (re.escape(self.initCharsOrig),
1828 _escapeRegexRangeChars(self.bodyCharsOrig),)
1829 else:
1830 self.reString = "[%s][%s]*" % \
1831 (_escapeRegexRangeChars(self.initCharsOrig),
1832 _escapeRegexRangeChars(self.bodyCharsOrig),)
1833 if self.asKeyword:
1834 self.reString = r"\b"+self.reString+r"\b"
1835 try:
1836 self.re = re.compile( self.reString )
1837 except:
1838 self.re = None
1839
1840 - def parseImpl( self, instring, loc, doActions=True ):
1841 if self.re:
1842 result = self.re.match(instring,loc)
1843 if not result:
1844 raise ParseException(instring, loc, self.errmsg, self)
1845
1846 loc = result.end()
1847 return loc, result.group()
1848
1849 if not(instring[ loc ] in self.initChars):
1850 raise ParseException(instring, loc, self.errmsg, self)
1851
1852 start = loc
1853 loc += 1
1854 instrlen = len(instring)
1855 bodychars = self.bodyChars
1856 maxloc = start + self.maxLen
1857 maxloc = min( maxloc, instrlen )
1858 while loc < maxloc and instring[loc] in bodychars:
1859 loc += 1
1860
1861 throwException = False
1862 if loc - start < self.minLen:
1863 throwException = True
1864 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1865 throwException = True
1866 if self.asKeyword:
1867 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1868 throwException = True
1869
1870 if throwException:
1871 raise ParseException(instring, loc, self.errmsg, self)
1872
1873 return loc, instring[start:loc]
1874
1876 try:
1877 return super(Word,self).__str__()
1878 except:
1879 pass
1880
1881
1882 if self.strRepr is None:
1883
1884 def charsAsStr(s):
1885 if len(s)>4:
1886 return s[:4]+"..."
1887 else:
1888 return s
1889
1890 if ( self.initCharsOrig != self.bodyCharsOrig ):
1891 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1892 else:
1893 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1894
1895 return self.strRepr
1896
1897
1898 -class Regex(Token):
1899 """Token for matching strings that match a given regular expression.
1900 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1901 """
1902 compiledREtype = type(re.compile("[A-Z]"))
1903 - def __init__( self, pattern, flags=0):
1904 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1905 super(Regex,self).__init__()
1906
1907 if isinstance(pattern, basestring):
1908 if not pattern:
1909 warnings.warn("null string passed to Regex; use Empty() instead",
1910 SyntaxWarning, stacklevel=2)
1911
1912 self.pattern = pattern
1913 self.flags = flags
1914
1915 try:
1916 self.re = re.compile(self.pattern, self.flags)
1917 self.reString = self.pattern
1918 except sre_constants.error:
1919 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1920 SyntaxWarning, stacklevel=2)
1921 raise
1922
1923 elif isinstance(pattern, Regex.compiledREtype):
1924 self.re = pattern
1925 self.pattern = \
1926 self.reString = str(pattern)
1927 self.flags = flags
1928
1929 else:
1930 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1931
1932 self.name = _ustr(self)
1933 self.errmsg = "Expected " + self.name
1934 self.mayIndexError = False
1935 self.mayReturnEmpty = True
1936
1937 - def parseImpl( self, instring, loc, doActions=True ):
1938 result = self.re.match(instring,loc)
1939 if not result:
1940 raise ParseException(instring, loc, self.errmsg, self)
1941
1942 loc = result.end()
1943 d = result.groupdict()
1944 ret = ParseResults(result.group())
1945 if d:
1946 for k in d:
1947 ret[k] = d[k]
1948 return loc,ret
1949
1951 try:
1952 return super(Regex,self).__str__()
1953 except:
1954 pass
1955
1956 if self.strRepr is None:
1957 self.strRepr = "Re:(%s)" % repr(self.pattern)
1958
1959 return self.strRepr
1960
1963 """Token for matching strings that are delimited by quoting characters.
1964 """
1965 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1966 r"""Defined with the following parameters:
1967 - quoteChar - string of one or more characters defining the quote delimiting string
1968 - escChar - character to escape quotes, typically backslash (default=None)
1969 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1970 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1971 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1972 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1973 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
1974 """
1975 super(QuotedString,self).__init__()
1976
1977
1978 quoteChar = quoteChar.strip()
1979 if not quoteChar:
1980 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1981 raise SyntaxError()
1982
1983 if endQuoteChar is None:
1984 endQuoteChar = quoteChar
1985 else:
1986 endQuoteChar = endQuoteChar.strip()
1987 if not endQuoteChar:
1988 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1989 raise SyntaxError()
1990
1991 self.quoteChar = quoteChar
1992 self.quoteCharLen = len(quoteChar)
1993 self.firstQuoteChar = quoteChar[0]
1994 self.endQuoteChar = endQuoteChar
1995 self.endQuoteCharLen = len(endQuoteChar)
1996 self.escChar = escChar
1997 self.escQuote = escQuote
1998 self.unquoteResults = unquoteResults
1999 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2000
2001 if multiline:
2002 self.flags = re.MULTILINE | re.DOTALL
2003 self.pattern = r'%s(?:[^%s%s]' % \
2004 ( re.escape(self.quoteChar),
2005 _escapeRegexRangeChars(self.endQuoteChar[0]),
2006 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2007 else:
2008 self.flags = 0
2009 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2010 ( re.escape(self.quoteChar),
2011 _escapeRegexRangeChars(self.endQuoteChar[0]),
2012 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2013 if len(self.endQuoteChar) > 1:
2014 self.pattern += (
2015 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2016 _escapeRegexRangeChars(self.endQuoteChar[i]))
2017 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2018 )
2019 if escQuote:
2020 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2021 if escChar:
2022 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2023 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2024 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2025
2026 try:
2027 self.re = re.compile(self.pattern, self.flags)
2028 self.reString = self.pattern
2029 except sre_constants.error:
2030 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2031 SyntaxWarning, stacklevel=2)
2032 raise
2033
2034 self.name = _ustr(self)
2035 self.errmsg = "Expected " + self.name
2036 self.mayIndexError = False
2037 self.mayReturnEmpty = True
2038
2039 - def parseImpl( self, instring, loc, doActions=True ):
2040 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2041 if not result:
2042 raise ParseException(instring, loc, self.errmsg, self)
2043
2044 loc = result.end()
2045 ret = result.group()
2046
2047 if self.unquoteResults:
2048
2049
2050 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2051
2052 if isinstance(ret,basestring):
2053
2054 if '\\' in ret and self.convertWhitespaceEscapes:
2055 ws_map = {
2056 r'\t' : '\t',
2057 r'\n' : '\n',
2058 r'\f' : '\f',
2059 r'\r' : '\r',
2060 }
2061 for wslit,wschar in ws_map.items():
2062 ret = ret.replace(wslit, wschar)
2063
2064
2065 if self.escChar:
2066 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2067
2068
2069 if self.escQuote:
2070 ret = ret.replace(self.escQuote, self.endQuoteChar)
2071
2072 return loc, ret
2073
2075 try:
2076 return super(QuotedString,self).__str__()
2077 except:
2078 pass
2079
2080 if self.strRepr is None:
2081 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2082
2083 return self.strRepr
2084
2087 """Token for matching words composed of characters *not* in a given set.
2088 Defined with string containing all disallowed characters, and an optional
2089 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2090 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2091 are 0, meaning no maximum or exact length restriction.
2092 """
2093 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2094 super(CharsNotIn,self).__init__()
2095 self.skipWhitespace = False
2096 self.notChars = notChars
2097
2098 if min < 1:
2099 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2100
2101 self.minLen = min
2102
2103 if max > 0:
2104 self.maxLen = max
2105 else:
2106 self.maxLen = _MAX_INT
2107
2108 if exact > 0:
2109 self.maxLen = exact
2110 self.minLen = exact
2111
2112 self.name = _ustr(self)
2113 self.errmsg = "Expected " + self.name
2114 self.mayReturnEmpty = ( self.minLen == 0 )
2115 self.mayIndexError = False
2116
2117 - def parseImpl( self, instring, loc, doActions=True ):
2118 if instring[loc] in self.notChars:
2119 raise ParseException(instring, loc, self.errmsg, self)
2120
2121 start = loc
2122 loc += 1
2123 notchars = self.notChars
2124 maxlen = min( start+self.maxLen, len(instring) )
2125 while loc < maxlen and \
2126 (instring[loc] not in notchars):
2127 loc += 1
2128
2129 if loc - start < self.minLen:
2130 raise ParseException(instring, loc, self.errmsg, self)
2131
2132 return loc, instring[start:loc]
2133
2135 try:
2136 return super(CharsNotIn, self).__str__()
2137 except:
2138 pass
2139
2140 if self.strRepr is None:
2141 if len(self.notChars) > 4:
2142 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2143 else:
2144 self.strRepr = "!W:(%s)" % self.notChars
2145
2146 return self.strRepr
2147
2149 """Special matching class for matching whitespace. Normally, whitespace is ignored
2150 by pyparsing grammars. This class is included when some whitespace structures
2151 are significant. Define with a string containing the whitespace characters to be
2152 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2153 as defined for the C{L{Word}} class."""
2154 whiteStrs = {
2155 " " : "<SPC>",
2156 "\t": "<TAB>",
2157 "\n": "<LF>",
2158 "\r": "<CR>",
2159 "\f": "<FF>",
2160 }
2161 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2162 super(White,self).__init__()
2163 self.matchWhite = ws
2164 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2165
2166 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2167 self.mayReturnEmpty = True
2168 self.errmsg = "Expected " + self.name
2169
2170 self.minLen = min
2171
2172 if max > 0:
2173 self.maxLen = max
2174 else:
2175 self.maxLen = _MAX_INT
2176
2177 if exact > 0:
2178 self.maxLen = exact
2179 self.minLen = exact
2180
2181 - def parseImpl( self, instring, loc, doActions=True ):
2182 if not(instring[ loc ] in self.matchWhite):
2183 raise ParseException(instring, loc, self.errmsg, self)
2184 start = loc
2185 loc += 1
2186 maxloc = start + self.maxLen
2187 maxloc = min( maxloc, len(instring) )
2188 while loc < maxloc and instring[loc] in self.matchWhite:
2189 loc += 1
2190
2191 if loc - start < self.minLen:
2192 raise ParseException(instring, loc, self.errmsg, self)
2193
2194 return loc, instring[start:loc]
2195
2199 super(_PositionToken,self).__init__()
2200 self.name=self.__class__.__name__
2201 self.mayReturnEmpty = True
2202 self.mayIndexError = False
2203
2205 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2209
2211 if col(loc,instring) != self.col:
2212 instrlen = len(instring)
2213 if self.ignoreExprs:
2214 loc = self._skipIgnorables( instring, loc )
2215 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2216 loc += 1
2217 return loc
2218
2219 - def parseImpl( self, instring, loc, doActions=True ):
2220 thiscol = col( loc, instring )
2221 if thiscol > self.col:
2222 raise ParseException( instring, loc, "Text not in expected column", self )
2223 newloc = loc + self.col - thiscol
2224 ret = instring[ loc: newloc ]
2225 return newloc, ret
2226
2228 """Matches if current position is at the beginning of a line within the parse string"""
2233
2235 preloc = super(LineStart,self).preParse(instring,loc)
2236 if instring[preloc] == "\n":
2237 loc += 1
2238 return loc
2239
2240 - def parseImpl( self, instring, loc, doActions=True ):
2241 if not( loc==0 or
2242 (loc == self.preParse( instring, 0 )) or
2243 (instring[loc-1] == "\n") ):
2244 raise ParseException(instring, loc, self.errmsg, self)
2245 return loc, []
2246
2248 """Matches if current position is at the end of a line within the parse string"""
2253
2254 - def parseImpl( self, instring, loc, doActions=True ):
2255 if loc<len(instring):
2256 if instring[loc] == "\n":
2257 return loc+1, "\n"
2258 else:
2259 raise ParseException(instring, loc, self.errmsg, self)
2260 elif loc == len(instring):
2261 return loc+1, []
2262 else:
2263 raise ParseException(instring, loc, self.errmsg, self)
2264
2266 """Matches if current position is at the beginning of the parse string"""
2270
2271 - def parseImpl( self, instring, loc, doActions=True ):
2272 if loc != 0:
2273
2274 if loc != self.preParse( instring, 0 ):
2275 raise ParseException(instring, loc, self.errmsg, self)
2276 return loc, []
2277
2279 """Matches if current position is at the end of the parse string"""
2283
2284 - def parseImpl( self, instring, loc, doActions=True ):
2285 if loc < len(instring):
2286 raise ParseException(instring, loc, self.errmsg, self)
2287 elif loc == len(instring):
2288 return loc+1, []
2289 elif loc > len(instring):
2290 return loc, []
2291 else:
2292 raise ParseException(instring, loc, self.errmsg, self)
2293
2295 """Matches if the current position is at the beginning of a Word, and
2296 is not preceded by any character in a given set of C{wordChars}
2297 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2298 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2299 the string being parsed, or at the beginning of a line.
2300 """
2302 super(WordStart,self).__init__()
2303 self.wordChars = set(wordChars)
2304 self.errmsg = "Not at the start of a word"
2305
2306 - def parseImpl(self, instring, loc, doActions=True ):
2307 if loc != 0:
2308 if (instring[loc-1] in self.wordChars or
2309 instring[loc] not in self.wordChars):
2310 raise ParseException(instring, loc, self.errmsg, self)
2311 return loc, []
2312
2314 """Matches if the current position is at the end of a Word, and
2315 is not followed by any character in a given set of C{wordChars}
2316 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2317 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2318 the string being parsed, or at the end of a line.
2319 """
2321 super(WordEnd,self).__init__()
2322 self.wordChars = set(wordChars)
2323 self.skipWhitespace = False
2324 self.errmsg = "Not at the end of a word"
2325
2326 - def parseImpl(self, instring, loc, doActions=True ):
2327 instrlen = len(instring)
2328 if instrlen>0 and loc<instrlen:
2329 if (instring[loc] in self.wordChars or
2330 instring[loc-1] not in self.wordChars):
2331 raise ParseException(instring, loc, self.errmsg, self)
2332 return loc, []
2333
2336 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2337 - def __init__( self, exprs, savelist = False ):
2338 super(ParseExpression,self).__init__(savelist)
2339 if isinstance( exprs, _generatorType ):
2340 exprs = list(exprs)
2341
2342 if isinstance( exprs, basestring ):
2343 self.exprs = [ Literal( exprs ) ]
2344 elif isinstance( exprs, collections.Sequence ):
2345
2346 if all(isinstance(expr, basestring) for expr in exprs):
2347 exprs = map(Literal, exprs)
2348 self.exprs = list(exprs)
2349 else:
2350 try:
2351 self.exprs = list( exprs )
2352 except TypeError:
2353 self.exprs = [ exprs ]
2354 self.callPreparse = False
2355
2357 return self.exprs[i]
2358
2360 self.exprs.append( other )
2361 self.strRepr = None
2362 return self
2363
2365 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2366 all contained expressions."""
2367 self.skipWhitespace = False
2368 self.exprs = [ e.copy() for e in self.exprs ]
2369 for e in self.exprs:
2370 e.leaveWhitespace()
2371 return self
2372
2374 if isinstance( other, Suppress ):
2375 if other not in self.ignoreExprs:
2376 super( ParseExpression, self).ignore( other )
2377 for e in self.exprs:
2378 e.ignore( self.ignoreExprs[-1] )
2379 else:
2380 super( ParseExpression, self).ignore( other )
2381 for e in self.exprs:
2382 e.ignore( self.ignoreExprs[-1] )
2383 return self
2384
2386 try:
2387 return super(ParseExpression,self).__str__()
2388 except:
2389 pass
2390
2391 if self.strRepr is None:
2392 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2393 return self.strRepr
2394
2396 super(ParseExpression,self).streamline()
2397
2398 for e in self.exprs:
2399 e.streamline()
2400
2401
2402
2403
2404 if ( len(self.exprs) == 2 ):
2405 other = self.exprs[0]
2406 if ( isinstance( other, self.__class__ ) and
2407 not(other.parseAction) and
2408 other.resultsName is None and
2409 not other.debug ):
2410 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2411 self.strRepr = None
2412 self.mayReturnEmpty |= other.mayReturnEmpty
2413 self.mayIndexError |= other.mayIndexError
2414
2415 other = self.exprs[-1]
2416 if ( isinstance( other, self.__class__ ) and
2417 not(other.parseAction) and
2418 other.resultsName is None and
2419 not other.debug ):
2420 self.exprs = self.exprs[:-1] + other.exprs[:]
2421 self.strRepr = None
2422 self.mayReturnEmpty |= other.mayReturnEmpty
2423 self.mayIndexError |= other.mayIndexError
2424
2425 self.errmsg = "Expected " + _ustr(self)
2426
2427 return self
2428
2432
2433 - def validate( self, validateTrace=[] ):
2434 tmp = validateTrace[:]+[self]
2435 for e in self.exprs:
2436 e.validate(tmp)
2437 self.checkRecursion( [] )
2438
2443
2444 -class And(ParseExpression):
2445 """Requires all given C{ParseExpression}s to be found in the given order.
2446 Expressions may be separated by whitespace.
2447 May be constructed using the C{'+'} operator.
2448 """
2449
2455
2456 - def __init__( self, exprs, savelist = True ):
2457 super(And,self).__init__(exprs, savelist)
2458 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2459 self.setWhitespaceChars( self.exprs[0].whiteChars )
2460 self.skipWhitespace = self.exprs[0].skipWhitespace
2461 self.callPreparse = True
2462
2463 - def parseImpl( self, instring, loc, doActions=True ):
2464
2465
2466 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2467 errorStop = False
2468 for e in self.exprs[1:]:
2469 if isinstance(e, And._ErrorStop):
2470 errorStop = True
2471 continue
2472 if errorStop:
2473 try:
2474 loc, exprtokens = e._parse( instring, loc, doActions )
2475 except ParseSyntaxException:
2476 raise
2477 except ParseBaseException as pe:
2478 pe.__traceback__ = None
2479 raise ParseSyntaxException(pe)
2480 except IndexError:
2481 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2482 else:
2483 loc, exprtokens = e._parse( instring, loc, doActions )
2484 if exprtokens or exprtokens.haskeys():
2485 resultlist += exprtokens
2486 return loc, resultlist
2487
2489 if isinstance( other, basestring ):
2490 other = Literal( other )
2491 return self.append( other )
2492
2494 subRecCheckList = parseElementList[:] + [ self ]
2495 for e in self.exprs:
2496 e.checkRecursion( subRecCheckList )
2497 if not e.mayReturnEmpty:
2498 break
2499
2501 if hasattr(self,"name"):
2502 return self.name
2503
2504 if self.strRepr is None:
2505 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2506
2507 return self.strRepr
2508
2509
2510 -class Or(ParseExpression):
2511 """Requires that at least one C{ParseExpression} is found.
2512 If two expressions match, the expression that matches the longest string will be used.
2513 May be constructed using the C{'^'} operator.
2514 """
2515 - def __init__( self, exprs, savelist = False ):
2516 super(Or,self).__init__(exprs, savelist)
2517 if self.exprs:
2518 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2519 else:
2520 self.mayReturnEmpty = True
2521
2522 - def parseImpl( self, instring, loc, doActions=True ):
2523 maxExcLoc = -1
2524 maxException = None
2525 matches = []
2526 for e in self.exprs:
2527 try:
2528 loc2 = e.tryParse( instring, loc )
2529 except ParseException as err:
2530 err.__traceback__ = None
2531 if err.loc > maxExcLoc:
2532 maxException = err
2533 maxExcLoc = err.loc
2534 except IndexError:
2535 if len(instring) > maxExcLoc:
2536 maxException = ParseException(instring,len(instring),e.errmsg,self)
2537 maxExcLoc = len(instring)
2538 else:
2539
2540 matches.append((loc2, e))
2541
2542 if matches:
2543 matches.sort(key=lambda x: -x[0])
2544 for _,e in matches:
2545 try:
2546 return e._parse( instring, loc, doActions )
2547 except ParseException as err:
2548 err.__traceback__ = None
2549 if err.loc > maxExcLoc:
2550 maxException = err
2551 maxExcLoc = err.loc
2552
2553 if maxException is not None:
2554 maxException.msg = self.errmsg
2555 raise maxException
2556 else:
2557 raise ParseException(instring, loc, "no defined alternatives to match", self)
2558
2559
2561 if isinstance( other, basestring ):
2562 other = ParserElement.literalStringClass( other )
2563 return self.append( other )
2564
2566 if hasattr(self,"name"):
2567 return self.name
2568
2569 if self.strRepr is None:
2570 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2571
2572 return self.strRepr
2573
2575 subRecCheckList = parseElementList[:] + [ self ]
2576 for e in self.exprs:
2577 e.checkRecursion( subRecCheckList )
2578
2581 """Requires that at least one C{ParseExpression} is found.
2582 If two expressions match, the first one listed is the one that will match.
2583 May be constructed using the C{'|'} operator.
2584 """
2585 - def __init__( self, exprs, savelist = False ):
2586 super(MatchFirst,self).__init__(exprs, savelist)
2587 if self.exprs:
2588 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2589 else:
2590 self.mayReturnEmpty = True
2591
2592 - def parseImpl( self, instring, loc, doActions=True ):
2593 maxExcLoc = -1
2594 maxException = None
2595 for e in self.exprs:
2596 try:
2597 ret = e._parse( instring, loc, doActions )
2598 return ret
2599 except ParseException as err:
2600 if err.loc > maxExcLoc:
2601 maxException = err
2602 maxExcLoc = err.loc
2603 except IndexError:
2604 if len(instring) > maxExcLoc:
2605 maxException = ParseException(instring,len(instring),e.errmsg,self)
2606 maxExcLoc = len(instring)
2607
2608
2609 else:
2610 if maxException is not None:
2611 maxException.msg = self.errmsg
2612 raise maxException
2613 else:
2614 raise ParseException(instring, loc, "no defined alternatives to match", self)
2615
2617 if isinstance( other, basestring ):
2618 other = ParserElement.literalStringClass( other )
2619 return self.append( other )
2620
2622 if hasattr(self,"name"):
2623 return self.name
2624
2625 if self.strRepr is None:
2626 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2627
2628 return self.strRepr
2629
2631 subRecCheckList = parseElementList[:] + [ self ]
2632 for e in self.exprs:
2633 e.checkRecursion( subRecCheckList )
2634
2635
2636 -class Each(ParseExpression):
2637 """Requires all given C{ParseExpression}s to be found, but in any order.
2638 Expressions may be separated by whitespace.
2639 May be constructed using the C{'&'} operator.
2640 """
2641 - def __init__( self, exprs, savelist = True ):
2642 super(Each,self).__init__(exprs, savelist)
2643 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2644 self.skipWhitespace = True
2645 self.initExprGroups = True
2646
2647 - def parseImpl( self, instring, loc, doActions=True ):
2648 if self.initExprGroups:
2649 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2650 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2651 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2652 self.optionals = opt1 + opt2
2653 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2654 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2655 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2656 self.required += self.multirequired
2657 self.initExprGroups = False
2658 tmpLoc = loc
2659 tmpReqd = self.required[:]
2660 tmpOpt = self.optionals[:]
2661 matchOrder = []
2662
2663 keepMatching = True
2664 while keepMatching:
2665 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2666 failed = []
2667 for e in tmpExprs:
2668 try:
2669 tmpLoc = e.tryParse( instring, tmpLoc )
2670 except ParseException:
2671 failed.append(e)
2672 else:
2673 matchOrder.append(self.opt1map.get(id(e),e))
2674 if e in tmpReqd:
2675 tmpReqd.remove(e)
2676 elif e in tmpOpt:
2677 tmpOpt.remove(e)
2678 if len(failed) == len(tmpExprs):
2679 keepMatching = False
2680
2681 if tmpReqd:
2682 missing = ", ".join(_ustr(e) for e in tmpReqd)
2683 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2684
2685
2686 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2687
2688 resultlist = []
2689 for e in matchOrder:
2690 loc,results = e._parse(instring,loc,doActions)
2691 resultlist.append(results)
2692
2693 finalResults = ParseResults()
2694 for r in resultlist:
2695 dups = {}
2696 for k in r.keys():
2697 if k in finalResults:
2698 tmp = ParseResults(finalResults[k])
2699 tmp += ParseResults(r[k])
2700 dups[k] = tmp
2701 finalResults += ParseResults(r)
2702 for k,v in dups.items():
2703 finalResults[k] = v
2704 return loc, finalResults
2705
2707 if hasattr(self,"name"):
2708 return self.name
2709
2710 if self.strRepr is None:
2711 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2712
2713 return self.strRepr
2714
2716 subRecCheckList = parseElementList[:] + [ self ]
2717 for e in self.exprs:
2718 e.checkRecursion( subRecCheckList )
2719
2722 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2723 - def __init__( self, expr, savelist=False ):
2724 super(ParseElementEnhance,self).__init__(savelist)
2725 if isinstance( expr, basestring ):
2726 expr = Literal(expr)
2727 self.expr = expr
2728 self.strRepr = None
2729 if expr is not None:
2730 self.mayIndexError = expr.mayIndexError
2731 self.mayReturnEmpty = expr.mayReturnEmpty
2732 self.setWhitespaceChars( expr.whiteChars )
2733 self.skipWhitespace = expr.skipWhitespace
2734 self.saveAsList = expr.saveAsList
2735 self.callPreparse = expr.callPreparse
2736 self.ignoreExprs.extend(expr.ignoreExprs)
2737
2738 - def parseImpl( self, instring, loc, doActions=True ):
2739 if self.expr is not None:
2740 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2741 else:
2742 raise ParseException("",loc,self.errmsg,self)
2743
2745 self.skipWhitespace = False
2746 self.expr = self.expr.copy()
2747 if self.expr is not None:
2748 self.expr.leaveWhitespace()
2749 return self
2750
2752 if isinstance( other, Suppress ):
2753 if other not in self.ignoreExprs:
2754 super( ParseElementEnhance, self).ignore( other )
2755 if self.expr is not None:
2756 self.expr.ignore( self.ignoreExprs[-1] )
2757 else:
2758 super( ParseElementEnhance, self).ignore( other )
2759 if self.expr is not None:
2760 self.expr.ignore( self.ignoreExprs[-1] )
2761 return self
2762
2768
2770 if self in parseElementList:
2771 raise RecursiveGrammarException( parseElementList+[self] )
2772 subRecCheckList = parseElementList[:] + [ self ]
2773 if self.expr is not None:
2774 self.expr.checkRecursion( subRecCheckList )
2775
2776 - def validate( self, validateTrace=[] ):
2777 tmp = validateTrace[:]+[self]
2778 if self.expr is not None:
2779 self.expr.validate(tmp)
2780 self.checkRecursion( [] )
2781
2783 try:
2784 return super(ParseElementEnhance,self).__str__()
2785 except:
2786 pass
2787
2788 if self.strRepr is None and self.expr is not None:
2789 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2790 return self.strRepr
2791
2794 """Lookahead matching of the given parse expression. C{FollowedBy}
2795 does *not* advance the parsing position within the input string, it only
2796 verifies that the specified parse expression matches at the current
2797 position. C{FollowedBy} always returns a null token list."""
2801
2802 - def parseImpl( self, instring, loc, doActions=True ):
2803 self.expr.tryParse( instring, loc )
2804 return loc, []
2805
2806
2807 -class NotAny(ParseElementEnhance):
2808 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2809 does *not* advance the parsing position within the input string, it only
2810 verifies that the specified parse expression does *not* match at the current
2811 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2812 always returns a null token list. May be constructed using the '~' operator."""
2814 super(NotAny,self).__init__(expr)
2815
2816 self.skipWhitespace = False
2817 self.mayReturnEmpty = True
2818 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2819
2820 - def parseImpl( self, instring, loc, doActions=True ):
2824
2826 if hasattr(self,"name"):
2827 return self.name
2828
2829 if self.strRepr is None:
2830 self.strRepr = "~{" + _ustr(self.expr) + "}"
2831
2832 return self.strRepr
2833
2836 """Repetition of one or more of the given expression.
2837
2838 Parameters:
2839 - expr - expression that must match one or more times
2840 - stopOn - (default=None) - expression for a terminating sentinel
2841 (only required if the sentinel would ordinarily match the repetition
2842 expression)
2843 """
2844 - def __init__( self, expr, stopOn=None):
2845 super(OneOrMore, self).__init__(expr)
2846 ender = stopOn
2847 if isinstance(ender, basestring):
2848 ender = Literal(ender)
2849 self.not_ender = ~ender if ender is not None else None
2850
2851 - def parseImpl( self, instring, loc, doActions=True ):
2852 self_expr_parse = self.expr._parse
2853 self_skip_ignorables = self._skipIgnorables
2854 check_ender = self.not_ender is not None
2855 if check_ender:
2856 try_not_ender = self.not_ender.tryParse
2857
2858
2859
2860 if check_ender:
2861 try_not_ender(instring, loc)
2862 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2863 try:
2864 hasIgnoreExprs = (not not self.ignoreExprs)
2865 while 1:
2866 if check_ender:
2867 try_not_ender(instring, loc)
2868 if hasIgnoreExprs:
2869 preloc = self_skip_ignorables( instring, loc )
2870 else:
2871 preloc = loc
2872 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2873 if tmptokens or tmptokens.haskeys():
2874 tokens += tmptokens
2875 except (ParseException,IndexError):
2876 pass
2877
2878 return loc, tokens
2879
2881 if hasattr(self,"name"):
2882 return self.name
2883
2884 if self.strRepr is None:
2885 self.strRepr = "{" + _ustr(self.expr) + "}..."
2886
2887 return self.strRepr
2888
2893
2895 """Optional repetition of zero or more of the given expression.
2896
2897 Parameters:
2898 - expr - expression that must match zero or more times
2899 - stopOn - (default=None) - expression for a terminating sentinel
2900 (only required if the sentinel would ordinarily match the repetition
2901 expression)
2902 """
2903 - def __init__( self, expr, stopOn=None):
2906
2907 - def parseImpl( self, instring, loc, doActions=True ):
2912
2914 if hasattr(self,"name"):
2915 return self.name
2916
2917 if self.strRepr is None:
2918 self.strRepr = "[" + _ustr(self.expr) + "]..."
2919
2920 return self.strRepr
2921
2928
2929 _optionalNotMatched = _NullToken()
2931 """Optional matching of the given expression.
2932
2933 Parameters:
2934 - expr - expression that must match zero or more times
2935 - default (optional) - value to be returned if the optional expression
2936 is not found.
2937 """
2939 super(Optional,self).__init__( expr, savelist=False )
2940 self.defaultValue = default
2941 self.mayReturnEmpty = True
2942
2943 - def parseImpl( self, instring, loc, doActions=True ):
2944 try:
2945 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2946 except (ParseException,IndexError):
2947 if self.defaultValue is not _optionalNotMatched:
2948 if self.expr.resultsName:
2949 tokens = ParseResults([ self.defaultValue ])
2950 tokens[self.expr.resultsName] = self.defaultValue
2951 else:
2952 tokens = [ self.defaultValue ]
2953 else:
2954 tokens = []
2955 return loc, tokens
2956
2958 if hasattr(self,"name"):
2959 return self.name
2960
2961 if self.strRepr is None:
2962 self.strRepr = "[" + _ustr(self.expr) + "]"
2963
2964 return self.strRepr
2965
2966 -class SkipTo(ParseElementEnhance):
2967 """Token for skipping over all undefined text until the matched expression is found.
2968
2969 Parameters:
2970 - expr - target expression marking the end of the data to be skipped
2971 - include - (default=False) if True, the target expression is also parsed
2972 (the skipped text and target expression are returned as a 2-element list).
2973 - ignore - (default=None) used to define grammars (typically quoted strings and
2974 comments) that might contain false matches to the target expression
2975 - failOn - (default=None) define expressions that are not allowed to be
2976 included in the skipped test; if found before the target expression is found,
2977 the SkipTo is not a match
2978 """
2979 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2980 super( SkipTo, self ).__init__( other )
2981 self.ignoreExpr = ignore
2982 self.mayReturnEmpty = True
2983 self.mayIndexError = False
2984 self.includeMatch = include
2985 self.asList = False
2986 if isinstance(failOn, basestring):
2987 self.failOn = Literal(failOn)
2988 else:
2989 self.failOn = failOn
2990 self.errmsg = "No match found for "+_ustr(self.expr)
2991
2992 - def parseImpl( self, instring, loc, doActions=True ):
2993 startloc = loc
2994 instrlen = len(instring)
2995 expr = self.expr
2996 expr_parse = self.expr._parse
2997 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
2998 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
2999
3000 tmploc = loc
3001 while tmploc <= instrlen:
3002 if self_failOn_canParseNext is not None:
3003
3004 if self_failOn_canParseNext(instring, tmploc):
3005 break
3006
3007 if self_ignoreExpr_tryParse is not None:
3008
3009 while 1:
3010 try:
3011 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3012 except ParseBaseException:
3013 break
3014
3015 try:
3016 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3017 except (ParseException, IndexError):
3018
3019 tmploc += 1
3020 else:
3021
3022 break
3023
3024 else:
3025
3026 raise ParseException(instring, loc, self.errmsg, self)
3027
3028
3029 loc = tmploc
3030 skiptext = instring[startloc:loc]
3031 skipresult = ParseResults(skiptext)
3032
3033 if self.includeMatch:
3034 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3035 skipresult += mat
3036
3037 return loc, skipresult
3038
3039 -class Forward(ParseElementEnhance):
3040 """Forward declaration of an expression to be defined later -
3041 used for recursive grammars, such as algebraic infix notation.
3042 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3043
3044 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3045 Specifically, '|' has a lower precedence than '<<', so that::
3046 fwdExpr << a | b | c
3047 will actually be evaluated as::
3048 (fwdExpr << a) | b | c
3049 thereby leaving b and c out as parseable alternatives. It is recommended that you
3050 explicitly group the values inserted into the C{Forward}::
3051 fwdExpr << (a | b | c)
3052 Converting to use the '<<=' operator instead will avoid this problem.
3053 """
3056
3058 if isinstance( other, basestring ):
3059 other = ParserElement.literalStringClass(other)
3060 self.expr = other
3061 self.strRepr = None
3062 self.mayIndexError = self.expr.mayIndexError
3063 self.mayReturnEmpty = self.expr.mayReturnEmpty
3064 self.setWhitespaceChars( self.expr.whiteChars )
3065 self.skipWhitespace = self.expr.skipWhitespace
3066 self.saveAsList = self.expr.saveAsList
3067 self.ignoreExprs.extend(self.expr.ignoreExprs)
3068 return self
3069
3071 return self << other
3072
3074 self.skipWhitespace = False
3075 return self
3076
3078 if not self.streamlined:
3079 self.streamlined = True
3080 if self.expr is not None:
3081 self.expr.streamline()
3082 return self
3083
3084 - def validate( self, validateTrace=[] ):
3085 if self not in validateTrace:
3086 tmp = validateTrace[:]+[self]
3087 if self.expr is not None:
3088 self.expr.validate(tmp)
3089 self.checkRecursion([])
3090
3092 if hasattr(self,"name"):
3093 return self.name
3094 return self.__class__.__name__ + ": ..."
3095
3096
3097 self._revertClass = self.__class__
3098 self.__class__ = _ForwardNoRecurse
3099 try:
3100 if self.expr is not None:
3101 retString = _ustr(self.expr)
3102 else:
3103 retString = "None"
3104 finally:
3105 self.__class__ = self._revertClass
3106 return self.__class__.__name__ + ": " + retString
3107
3109 if self.expr is not None:
3110 return super(Forward,self).copy()
3111 else:
3112 ret = Forward()
3113 ret <<= self
3114 return ret
3115
3119
3121 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3122 - def __init__( self, expr, savelist=False ):
3125
3127 """Converter to concatenate all matching tokens to a single string.
3128 By default, the matching patterns must also be contiguous in the input string;
3129 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3130 """
3131 - def __init__( self, expr, joinString="", adjacent=True ):
3132 super(Combine,self).__init__( expr )
3133
3134 if adjacent:
3135 self.leaveWhitespace()
3136 self.adjacent = adjacent
3137 self.skipWhitespace = True
3138 self.joinString = joinString
3139 self.callPreparse = True
3140
3147
3148 - def postParse( self, instring, loc, tokenlist ):
3149 retToks = tokenlist.copy()
3150 del retToks[:]
3151 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3152
3153 if self.resultsName and retToks.haskeys():
3154 return [ retToks ]
3155 else:
3156 return retToks
3157
3158 -class Group(TokenConverter):
3159 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3161 super(Group,self).__init__( expr )
3162 self.saveAsList = True
3163
3164 - def postParse( self, instring, loc, tokenlist ):
3165 return [ tokenlist ]
3166
3167 -class Dict(TokenConverter):
3168 """Converter to return a repetitive expression as a list, but also as a dictionary.
3169 Each element can also be referenced using the first token in the expression as its key.
3170 Useful for tabular report scraping when the first column can be used as a item key.
3171 """
3173 super(Dict,self).__init__( expr )
3174 self.saveAsList = True
3175
3176 - def postParse( self, instring, loc, tokenlist ):
3177 for i,tok in enumerate(tokenlist):
3178 if len(tok) == 0:
3179 continue
3180 ikey = tok[0]
3181 if isinstance(ikey,int):
3182 ikey = _ustr(tok[0]).strip()
3183 if len(tok)==1:
3184 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3185 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3186 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3187 else:
3188 dictvalue = tok.copy()
3189 del dictvalue[0]
3190 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3191 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3192 else:
3193 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3194
3195 if self.resultsName:
3196 return [ tokenlist ]
3197 else:
3198 return tokenlist
3199
3202 """Converter for ignoring the results of a parsed expression."""
3203 - def postParse( self, instring, loc, tokenlist ):
3205
3208
3211 """Wrapper for parse actions, to ensure they are only called once."""
3213 self.callable = _trim_arity(methodCall)
3214 self.called = False
3216 if not self.called:
3217 results = self.callable(s,l,t)
3218 self.called = True
3219 return results
3220 raise ParseException(s,l,"")
3223
3225 """Decorator for debugging parse actions."""
3226 f = _trim_arity(f)
3227 def z(*paArgs):
3228 thisFunc = f.func_name
3229 s,l,t = paArgs[-3:]
3230 if len(paArgs)>3:
3231 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3232 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3233 try:
3234 ret = f(*paArgs)
3235 except Exception as exc:
3236 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3237 raise
3238 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3239 return ret
3240 try:
3241 z.__name__ = f.__name__
3242 except AttributeError:
3243 pass
3244 return z
3245
3246
3247
3248
3249 -def delimitedList( expr, delim=",", combine=False ):
3250 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3251 By default, the list elements and delimiters can have intervening whitespace, and
3252 comments, but this can be overridden by passing C{combine=True} in the constructor.
3253 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3254 string, with the delimiters included; otherwise, the matching tokens are returned
3255 as a list of tokens, with the delimiters suppressed.
3256 """
3257 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3258 if combine:
3259 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3260 else:
3261 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3262
3264 """Helper to define a counted list of expressions.
3265 This helper defines a pattern of the form::
3266 integer expr expr expr...
3267 where the leading integer tells how many expr expressions follow.
3268 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3269 """
3270 arrayExpr = Forward()
3271 def countFieldParseAction(s,l,t):
3272 n = t[0]
3273 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3274 return []
3275 if intExpr is None:
3276 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3277 else:
3278 intExpr = intExpr.copy()
3279 intExpr.setName("arrayLen")
3280 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3281 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3282
3284 ret = []
3285 for i in L:
3286 if isinstance(i,list):
3287 ret.extend(_flatten(i))
3288 else:
3289 ret.append(i)
3290 return ret
3291
3293 """Helper to define an expression that is indirectly defined from
3294 the tokens matched in a previous expression, that is, it looks
3295 for a 'repeat' of a previous expression. For example::
3296 first = Word(nums)
3297 second = matchPreviousLiteral(first)
3298 matchExpr = first + ":" + second
3299 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3300 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3301 If this is not desired, use C{matchPreviousExpr}.
3302 Do *not* use with packrat parsing enabled.
3303 """
3304 rep = Forward()
3305 def copyTokenToRepeater(s,l,t):
3306 if t:
3307 if len(t) == 1:
3308 rep << t[0]
3309 else:
3310
3311 tflat = _flatten(t.asList())
3312 rep << And(Literal(tt) for tt in tflat)
3313 else:
3314 rep << Empty()
3315 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3316 rep.setName('(prev) ' + _ustr(expr))
3317 return rep
3318
3320 """Helper to define an expression that is indirectly defined from
3321 the tokens matched in a previous expression, that is, it looks
3322 for a 'repeat' of a previous expression. For example::
3323 first = Word(nums)
3324 second = matchPreviousExpr(first)
3325 matchExpr = first + ":" + second
3326 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3327 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3328 the expressions are evaluated first, and then compared, so
3329 C{"1"} is compared with C{"10"}.
3330 Do *not* use with packrat parsing enabled.
3331 """
3332 rep = Forward()
3333 e2 = expr.copy()
3334 rep <<= e2
3335 def copyTokenToRepeater(s,l,t):
3336 matchTokens = _flatten(t.asList())
3337 def mustMatchTheseTokens(s,l,t):
3338 theseTokens = _flatten(t.asList())
3339 if theseTokens != matchTokens:
3340 raise ParseException("",0,"")
3341 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3342 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3343 rep.setName('(prev) ' + _ustr(expr))
3344 return rep
3345
3347
3348 for c in r"\^-]":
3349 s = s.replace(c,_bslash+c)
3350 s = s.replace("\n",r"\n")
3351 s = s.replace("\t",r"\t")
3352 return _ustr(s)
3353
3354 -def oneOf( strs, caseless=False, useRegex=True ):
3355 """Helper to quickly define a set of alternative Literals, and makes sure to do
3356 longest-first testing when there is a conflict, regardless of the input order,
3357 but returns a C{L{MatchFirst}} for best performance.
3358
3359 Parameters:
3360 - strs - a string of space-delimited literals, or a list of string literals
3361 - caseless - (default=False) - treat all literals as caseless
3362 - useRegex - (default=True) - as an optimization, will generate a Regex
3363 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3364 if creating a C{Regex} raises an exception)
3365 """
3366 if caseless:
3367 isequal = ( lambda a,b: a.upper() == b.upper() )
3368 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3369 parseElementClass = CaselessLiteral
3370 else:
3371 isequal = ( lambda a,b: a == b )
3372 masks = ( lambda a,b: b.startswith(a) )
3373 parseElementClass = Literal
3374
3375 symbols = []
3376 if isinstance(strs,basestring):
3377 symbols = strs.split()
3378 elif isinstance(strs, collections.Sequence):
3379 symbols = list(strs[:])
3380 elif isinstance(strs, _generatorType):
3381 symbols = list(strs)
3382 else:
3383 warnings.warn("Invalid argument to oneOf, expected string or list",
3384 SyntaxWarning, stacklevel=2)
3385 if not symbols:
3386 return NoMatch()
3387
3388 i = 0
3389 while i < len(symbols)-1:
3390 cur = symbols[i]
3391 for j,other in enumerate(symbols[i+1:]):
3392 if ( isequal(other, cur) ):
3393 del symbols[i+j+1]
3394 break
3395 elif ( masks(cur, other) ):
3396 del symbols[i+j+1]
3397 symbols.insert(i,other)
3398 cur = other
3399 break
3400 else:
3401 i += 1
3402
3403 if not caseless and useRegex:
3404
3405 try:
3406 if len(symbols)==len("".join(symbols)):
3407 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3408 else:
3409 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3410 except:
3411 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3412 SyntaxWarning, stacklevel=2)
3413
3414
3415
3416 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3417
3419 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3420 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3421 in the proper order. The key pattern can include delimiting markers or punctuation,
3422 as long as they are suppressed, thereby leaving the significant key text. The value
3423 pattern can include named results, so that the C{Dict} results can include named token
3424 fields.
3425 """
3426 return Dict( ZeroOrMore( Group ( key + value ) ) )
3427
3428 -def originalTextFor(expr, asString=True):
3429 """Helper to return the original, untokenized text for a given expression. Useful to
3430 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3431 revert separate tokens with intervening whitespace back to the original matching
3432 input text. By default, returns astring containing the original parsed text.
3433
3434 If the optional C{asString} argument is passed as C{False}, then the return value is a
3435 C{L{ParseResults}} containing any results names that were originally matched, and a
3436 single token containing the original matched text from the input string. So if
3437 the expression passed to C{L{originalTextFor}} contains expressions with defined
3438 results names, you must set C{asString} to C{False} if you want to preserve those
3439 results name values."""
3440 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3441 endlocMarker = locMarker.copy()
3442 endlocMarker.callPreparse = False
3443 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3444 if asString:
3445 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3446 else:
3447 def extractText(s,l,t):
3448 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3449 matchExpr.setParseAction(extractText)
3450 return matchExpr
3451
3453 """Helper to undo pyparsing's default grouping of And expressions, even
3454 if all but one are non-empty."""
3455 return TokenConverter(expr).setParseAction(lambda t:t[0])
3456
3458 """Helper to decorate a returned token with its starting and ending locations in the input string.
3459 This helper adds the following results names:
3460 - locn_start = location where matched expression begins
3461 - locn_end = location where matched expression ends
3462 - value = the actual parsed results
3463
3464 Be careful if the input text contains C{<TAB>} characters, you may want to call
3465 C{L{ParserElement.parseWithTabs}}
3466 """
3467 locator = Empty().setParseAction(lambda s,l,t: l)
3468 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3469
3470
3471
3472 empty = Empty().setName("empty")
3473 lineStart = LineStart().setName("lineStart")
3474 lineEnd = LineEnd().setName("lineEnd")
3475 stringStart = StringStart().setName("stringStart")
3476 stringEnd = StringEnd().setName("stringEnd")
3477
3478 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3479 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3480 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3481 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3482 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3483 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3486 r"""Helper to easily define string ranges for use in Word construction. Borrows
3487 syntax from regexp '[]' string range definitions::
3488 srange("[0-9]") -> "0123456789"
3489 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3490 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3491 The input string must be enclosed in []'s, and the returned string is the expanded
3492 character set joined into a single string.
3493 The values enclosed in the []'s may be::
3494 a single character
3495 an escaped character with a leading backslash (such as \- or \])
3496 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3497 (\0x## is also supported for backwards compatibility)
3498 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3499 a range of any of the above, separated by a dash ('a-z', etc.)
3500 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3501 """
3502 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3503 try:
3504 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3505 except:
3506 return ""
3507
3509 """Helper method for defining parse actions that require matching at a specific
3510 column in the input text.
3511 """
3512 def verifyCol(strg,locn,toks):
3513 if col(locn,strg) != n:
3514 raise ParseException(strg,locn,"matched token not at column %d" % n)
3515 return verifyCol
3516
3518 """Helper method for common parse actions that simply return a literal value. Especially
3519 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3520 """
3521 return lambda s,l,t: [replStr]
3522
3524 """Helper parse action for removing quotation marks from parsed quoted strings.
3525 To use, add this parse action to quoted string using::
3526 quotedString.setParseAction( removeQuotes )
3527 """
3528 return t[0][1:-1]
3529
3531 """Helper parse action to convert tokens to upper case."""
3532 return [ tt.upper() for tt in map(_ustr,t) ]
3533
3535 """Helper parse action to convert tokens to lower case."""
3536 return [ tt.lower() for tt in map(_ustr,t) ]
3537
3566
3570
3574
3576 """Helper to create a validating parse action to be used with start tags created
3577 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3578 with a required attribute value, to avoid false matches on common tags such as
3579 C{<TD>} or C{<DIV>}.
3580
3581 Call C{withAttribute} with a series of attribute names and values. Specify the list
3582 of filter attributes names and values as:
3583 - keyword arguments, as in C{(align="right")}, or
3584 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3585 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3586 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3587 For attribute names with a namespace prefix, you must use the second form. Attribute
3588 names are matched insensitive to upper/lower case.
3589
3590 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3591
3592 To verify that the attribute exists, but without specifying a value, pass
3593 C{withAttribute.ANY_VALUE} as the value.
3594 """
3595 if args:
3596 attrs = args[:]
3597 else:
3598 attrs = attrDict.items()
3599 attrs = [(k,v) for k,v in attrs]
3600 def pa(s,l,tokens):
3601 for attrName,attrValue in attrs:
3602 if attrName not in tokens:
3603 raise ParseException(s,l,"no matching attribute " + attrName)
3604 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3605 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3606 (attrName, tokens[attrName], attrValue))
3607 return pa
3608 withAttribute.ANY_VALUE = object()
3609
3610 -def withClass(classname, namespace=''):
3611 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3612 difficult because C{class} is a reserved word in Python.
3613 """
3614 classattr = "%s:class" % namespace if namespace else "class"
3615 return withAttribute(**{classattr : classname})
3616
3617 opAssoc = _Constants()
3618 opAssoc.LEFT = object()
3619 opAssoc.RIGHT = object()
3622 """Helper method for constructing grammars of expressions made up of
3623 operators working in a precedence hierarchy. Operators may be unary or
3624 binary, left- or right-associative. Parse actions can also be attached
3625 to operator expressions.
3626
3627 Parameters:
3628 - baseExpr - expression representing the most basic element for the nested
3629 - opList - list of tuples, one for each operator precedence level in the
3630 expression grammar; each tuple is of the form
3631 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3632 - opExpr is the pyparsing expression for the operator;
3633 may also be a string, which will be converted to a Literal;
3634 if numTerms is 3, opExpr is a tuple of two expressions, for the
3635 two operators separating the 3 terms
3636 - numTerms is the number of terms for this operator (must
3637 be 1, 2, or 3)
3638 - rightLeftAssoc is the indicator whether the operator is
3639 right or left associative, using the pyparsing-defined
3640 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3641 - parseAction is the parse action to be associated with
3642 expressions matching this operator expression (the
3643 parse action tuple member may be omitted)
3644 - lpar - expression for matching left-parentheses (default=Suppress('('))
3645 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3646 """
3647 ret = Forward()
3648 lastExpr = baseExpr | ( lpar + ret + rpar )
3649 for i,operDef in enumerate(opList):
3650 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3651 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3652 if arity == 3:
3653 if opExpr is None or len(opExpr) != 2:
3654 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3655 opExpr1, opExpr2 = opExpr
3656 thisExpr = Forward().setName(termName)
3657 if rightLeftAssoc == opAssoc.LEFT:
3658 if arity == 1:
3659 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3660 elif arity == 2:
3661 if opExpr is not None:
3662 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3663 else:
3664 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3665 elif arity == 3:
3666 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3667 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3668 else:
3669 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3670 elif rightLeftAssoc == opAssoc.RIGHT:
3671 if arity == 1:
3672
3673 if not isinstance(opExpr, Optional):
3674 opExpr = Optional(opExpr)
3675 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3676 elif arity == 2:
3677 if opExpr is not None:
3678 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3679 else:
3680 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3681 elif arity == 3:
3682 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3683 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3684 else:
3685 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3686 else:
3687 raise ValueError("operator must indicate right or left associativity")
3688 if pa:
3689 matchExpr.setParseAction( pa )
3690 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3691 lastExpr = thisExpr
3692 ret <<= lastExpr
3693 return ret
3694 operatorPrecedence = infixNotation
3695
3696 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
3697 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
3698 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
3699 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
3700 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3703 """Helper method for defining nested lists enclosed in opening and closing
3704 delimiters ("(" and ")" are the default).
3705
3706 Parameters:
3707 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3708 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3709 - content - expression for items within the nested lists (default=None)
3710 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3711
3712 If an expression is not provided for the content argument, the nested
3713 expression will capture all whitespace-delimited content between delimiters
3714 as a list of separate values.
3715
3716 Use the C{ignoreExpr} argument to define expressions that may contain
3717 opening or closing characters that should not be treated as opening
3718 or closing characters for nesting, such as quotedString or a comment
3719 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3720 The default is L{quotedString}, but if no expressions are to be ignored,
3721 then pass C{None} for this argument.
3722 """
3723 if opener == closer:
3724 raise ValueError("opening and closing strings cannot be the same")
3725 if content is None:
3726 if isinstance(opener,basestring) and isinstance(closer,basestring):
3727 if len(opener) == 1 and len(closer)==1:
3728 if ignoreExpr is not None:
3729 content = (Combine(OneOrMore(~ignoreExpr +
3730 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3731 ).setParseAction(lambda t:t[0].strip()))
3732 else:
3733 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3734 ).setParseAction(lambda t:t[0].strip()))
3735 else:
3736 if ignoreExpr is not None:
3737 content = (Combine(OneOrMore(~ignoreExpr +
3738 ~Literal(opener) + ~Literal(closer) +
3739 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3740 ).setParseAction(lambda t:t[0].strip()))
3741 else:
3742 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3743 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3744 ).setParseAction(lambda t:t[0].strip()))
3745 else:
3746 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3747 ret = Forward()
3748 if ignoreExpr is not None:
3749 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3750 else:
3751 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3752 ret.setName('nested %s%s expression' % (opener,closer))
3753 return ret
3754
3755 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3756 """Helper method for defining space-delimited indentation blocks, such as
3757 those used to define block statements in Python source code.
3758
3759 Parameters:
3760 - blockStatementExpr - expression defining syntax of statement that
3761 is repeated within the indented block
3762 - indentStack - list created by caller to manage indentation stack
3763 (multiple statementWithIndentedBlock expressions within a single grammar
3764 should share a common indentStack)
3765 - indent - boolean indicating whether block must be indented beyond the
3766 the current level; set to False for block of left-most statements
3767 (default=True)
3768
3769 A valid block must contain at least one C{blockStatement}.
3770 """
3771 def checkPeerIndent(s,l,t):
3772 if l >= len(s): return
3773 curCol = col(l,s)
3774 if curCol != indentStack[-1]:
3775 if curCol > indentStack[-1]:
3776 raise ParseFatalException(s,l,"illegal nesting")
3777 raise ParseException(s,l,"not a peer entry")
3778
3779 def checkSubIndent(s,l,t):
3780 curCol = col(l,s)
3781 if curCol > indentStack[-1]:
3782 indentStack.append( curCol )
3783 else:
3784 raise ParseException(s,l,"not a subentry")
3785
3786 def checkUnindent(s,l,t):
3787 if l >= len(s): return
3788 curCol = col(l,s)
3789 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3790 raise ParseException(s,l,"not an unindent")
3791 indentStack.pop()
3792
3793 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3794 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3795 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3796 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3797 if indent:
3798 smExpr = Group( Optional(NL) +
3799
3800 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3801 else:
3802 smExpr = Group( Optional(NL) +
3803 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3804 blockStatementExpr.ignore(_bslash + LineEnd())
3805 return smExpr.setName('indented block')
3806
3807 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3808 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3809
3810 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3811 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3812 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3814 """Helper parser action to replace common HTML entities with their special characters"""
3815 return _htmlEntityMap.get(t.entity)
3816
3817
3818 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
3819
3820 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3821 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3822 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
3823 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
3824
3825 javaStyleComment = cppStyleComment
3826 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3827 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3828 Optional( Word(" \t") +
3829 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3830 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3831
3832
3833 if __name__ == "__main__":
3834
3835 selectToken = CaselessLiteral( "select" )
3836 fromToken = CaselessLiteral( "from" )
3837
3838 ident = Word( alphas, alphanums + "_$" )
3839 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3840 columnNameList = Group( delimitedList( columnName ) ).setName("columns")
3841 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3842 tableNameList = Group( delimitedList( tableName ) ).setName("tables")
3843 simpleSQL = ( selectToken + \
3844 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3845 fromToken + \
3846 tableNameList.setResultsName( "tables" ) )
3847
3848 simpleSQL.runTests("""\
3849 SELECT * from XYZZY, ABC
3850 select * from SYS.XYZZY
3851 Select A from Sys.dual
3852 Select AA,BB,CC from Sys.dual
3853 Select A, B, C from Sys.dual
3854 Select A, B, C from Sys.dual
3855 Xelect A, B, C from Sys.dual
3856 Select A, B, C frox Sys.dual
3857 Select
3858 Select ^^^ frox Sys.dual
3859 Select A, B, C from Sys.dual, Table2""")
3860