| # ----------------------------------------------------------------------------- | 
 | # ply: lex.py | 
 | # | 
 | # Copyright (C) 2001-2011, | 
 | # David M. Beazley (Dabeaz LLC) | 
 | # All rights reserved. | 
 | # | 
 | # Redistribution and use in source and binary forms, with or without | 
 | # modification, are permitted provided that the following conditions are | 
 | # met: | 
 | #  | 
 | # * Redistributions of source code must retain the above copyright notice, | 
 | #   this list of conditions and the following disclaimer.   | 
 | # * Redistributions in binary form must reproduce the above copyright notice,  | 
 | #   this list of conditions and the following disclaimer in the documentation | 
 | #   and/or other materials provided with the distribution.   | 
 | # * Neither the name of the David Beazley or Dabeaz LLC may be used to | 
 | #   endorse or promote products derived from this software without | 
 | #  specific prior written permission.  | 
 | # | 
 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | __version__    = "3.4" | 
 | __tabversion__ = "3.2"       # Version of table file used | 
 |  | 
 | import re, sys, types, copy, os | 
 |  | 
 | # This tuple contains known string types | 
 | try: | 
 |     # Python 2.6 | 
 |     StringTypes = (types.StringType, types.UnicodeType) | 
 | except AttributeError: | 
 |     # Python 3.0 | 
 |     StringTypes = (str, bytes) | 
 |  | 
 | # Extract the code attribute of a function. Different implementations | 
 | # are for Python 2/3 compatibility. | 
 |  | 
 | if sys.version_info[0] < 3: | 
 |     def func_code(f): | 
 |         return f.func_code | 
 | else: | 
 |     def func_code(f): | 
 |         return f.__code__ | 
 |  | 
 | # This regular expression is used to match valid token names | 
 | _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') | 
 |  | 
 | # Exception thrown when invalid token encountered and no default error | 
 | # handler is defined. | 
 |  | 
 | class LexError(Exception): | 
 |     def __init__(self,message,s): | 
 |          self.args = (message,) | 
 |          self.text = s | 
 |  | 
 | # Token class.  This class is used to represent the tokens produced. | 
 | class LexToken(object): | 
 |     def __str__(self): | 
 |         return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) | 
 |     def __repr__(self): | 
 |         return str(self) | 
 |  | 
 | # This object is a stand-in for a logging object created by the  | 
 | # logging module.   | 
 |  | 
 | class PlyLogger(object): | 
 |     def __init__(self,f): | 
 |         self.f = f | 
 |     def critical(self,msg,*args,**kwargs): | 
 |         self.f.write((msg % args) + "\n") | 
 |  | 
 |     def warning(self,msg,*args,**kwargs): | 
 |         self.f.write("WARNING: "+ (msg % args) + "\n") | 
 |  | 
 |     def error(self,msg,*args,**kwargs): | 
 |         self.f.write("ERROR: " + (msg % args) + "\n") | 
 |  | 
 |     info = critical | 
 |     debug = critical | 
 |  | 
 | # Null logger is used when no output is generated. Does nothing. | 
 | class NullLogger(object): | 
 |     def __getattribute__(self,name): | 
 |         return self | 
 |     def __call__(self,*args,**kwargs): | 
 |         return self | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | #                        === Lexing Engine === | 
 | # | 
 | # The following Lexer class implements the lexer runtime.   There are only | 
 | # a few public methods and attributes: | 
 | # | 
 | #    input()          -  Store a new string in the lexer | 
 | #    token()          -  Get the next token | 
 | #    clone()          -  Clone the lexer | 
 | # | 
 | #    lineno           -  Current line number | 
 | #    lexpos           -  Current position in the input string | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | class Lexer: | 
 |     def __init__(self): | 
 |         self.lexre = None             # Master regular expression. This is a list of | 
 |                                       # tuples (re,findex) where re is a compiled | 
 |                                       # regular expression and findex is a list | 
 |                                       # mapping regex group numbers to rules | 
 |         self.lexretext = None         # Current regular expression strings | 
 |         self.lexstatere = {}          # Dictionary mapping lexer states to master regexs | 
 |         self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings | 
 |         self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names | 
 |         self.lexstate = "INITIAL"     # Current lexer state | 
 |         self.lexstatestack = []       # Stack of lexer states | 
 |         self.lexstateinfo = None      # State information | 
 |         self.lexstateignore = {}      # Dictionary of ignored characters for each state | 
 |         self.lexstateerrorf = {}      # Dictionary of error functions for each state | 
 |         self.lexreflags = 0           # Optional re compile flags | 
 |         self.lexdata = None           # Actual input data (as a string) | 
 |         self.lexpos = 0               # Current position in input text | 
 |         self.lexlen = 0               # Length of the input text | 
 |         self.lexerrorf = None         # Error rule (if any) | 
 |         self.lextokens = None         # List of valid tokens | 
 |         self.lexignore = ""           # Ignored characters | 
 |         self.lexliterals = ""         # Literal characters that can be passed through | 
 |         self.lexmodule = None         # Module | 
 |         self.lineno = 1               # Current line number | 
 |         self.lexoptimize = 0          # Optimized mode | 
 |  | 
 |     def clone(self,object=None): | 
 |         c = copy.copy(self) | 
 |  | 
 |         # If the object parameter has been supplied, it means we are attaching the | 
 |         # lexer to a new object.  In this case, we have to rebind all methods in | 
 |         # the lexstatere and lexstateerrorf tables. | 
 |  | 
 |         if object: | 
 |             newtab = { } | 
 |             for key, ritem in self.lexstatere.items(): | 
 |                 newre = [] | 
 |                 for cre, findex in ritem: | 
 |                      newfindex = [] | 
 |                      for f in findex: | 
 |                          if not f or not f[0]: | 
 |                              newfindex.append(f) | 
 |                              continue | 
 |                          newfindex.append((getattr(object,f[0].__name__),f[1])) | 
 |                 newre.append((cre,newfindex)) | 
 |                 newtab[key] = newre | 
 |             c.lexstatere = newtab | 
 |             c.lexstateerrorf = { } | 
 |             for key, ef in self.lexstateerrorf.items(): | 
 |                 c.lexstateerrorf[key] = getattr(object,ef.__name__) | 
 |             c.lexmodule = object | 
 |         return c | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # writetab() - Write lexer information to a table file | 
 |     # ------------------------------------------------------------ | 
 |     def writetab(self,tabfile,outputdir=""): | 
 |         if isinstance(tabfile,types.ModuleType): | 
 |             return | 
 |         basetabfilename = tabfile.split(".")[-1] | 
 |         filename = os.path.join(outputdir,basetabfilename)+".py" | 
 |         tf = open(filename,"w") | 
 |         tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) | 
 |         tf.write("_tabversion   = %s\n" % repr(__version__)) | 
 |         tf.write("_lextokens    = %s\n" % repr(self.lextokens)) | 
 |         tf.write("_lexreflags   = %s\n" % repr(self.lexreflags)) | 
 |         tf.write("_lexliterals  = %s\n" % repr(self.lexliterals)) | 
 |         tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) | 
 |  | 
 |         tabre = { } | 
 |         # Collect all functions in the initial state | 
 |         initial = self.lexstatere["INITIAL"] | 
 |         initialfuncs = [] | 
 |         for part in initial: | 
 |             for f in part[1]: | 
 |                 if f and f[0]: | 
 |                     initialfuncs.append(f) | 
 |  | 
 |         for key, lre in self.lexstatere.items(): | 
 |              titem = [] | 
 |              for i in range(len(lre)): | 
 |                   titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) | 
 |              tabre[key] = titem | 
 |  | 
 |         tf.write("_lexstatere   = %s\n" % repr(tabre)) | 
 |         tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) | 
 |  | 
 |         taberr = { } | 
 |         for key, ef in self.lexstateerrorf.items(): | 
 |              if ef: | 
 |                   taberr[key] = ef.__name__ | 
 |              else: | 
 |                   taberr[key] = None | 
 |         tf.write("_lexstateerrorf = %s\n" % repr(taberr)) | 
 |         tf.close() | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # readtab() - Read lexer information from a tab file | 
 |     # ------------------------------------------------------------ | 
 |     def readtab(self,tabfile,fdict): | 
 |         if isinstance(tabfile,types.ModuleType): | 
 |             lextab = tabfile | 
 |         else: | 
 |             if sys.version_info[0] < 3: | 
 |                 exec("import %s as lextab" % tabfile) | 
 |             else: | 
 |                 env = { } | 
 |                 exec("import %s as lextab" % tabfile, env,env) | 
 |                 lextab = env['lextab'] | 
 |  | 
 |         if getattr(lextab,"_tabversion","0.0") != __version__: | 
 |             raise ImportError("Inconsistent PLY version") | 
 |  | 
 |         self.lextokens      = lextab._lextokens | 
 |         self.lexreflags     = lextab._lexreflags | 
 |         self.lexliterals    = lextab._lexliterals | 
 |         self.lexstateinfo   = lextab._lexstateinfo | 
 |         self.lexstateignore = lextab._lexstateignore | 
 |         self.lexstatere     = { } | 
 |         self.lexstateretext = { } | 
 |         for key,lre in lextab._lexstatere.items(): | 
 |              titem = [] | 
 |              txtitem = [] | 
 |              for i in range(len(lre)): | 
 |                   titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) | 
 |                   txtitem.append(lre[i][0]) | 
 |              self.lexstatere[key] = titem | 
 |              self.lexstateretext[key] = txtitem | 
 |         self.lexstateerrorf = { } | 
 |         for key,ef in lextab._lexstateerrorf.items(): | 
 |              self.lexstateerrorf[key] = fdict[ef] | 
 |         self.begin('INITIAL') | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # input() - Push a new string into the lexer | 
 |     # ------------------------------------------------------------ | 
 |     def input(self,s): | 
 |         # Pull off the first character to see if s looks like a string | 
 |         c = s[:1] | 
 |         if not isinstance(c,StringTypes): | 
 |             raise ValueError("Expected a string") | 
 |         self.lexdata = s | 
 |         self.lexpos = 0 | 
 |         self.lexlen = len(s) | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # begin() - Changes the lexing state | 
 |     # ------------------------------------------------------------ | 
 |     def begin(self,state): | 
 |         if not state in self.lexstatere: | 
 |             raise ValueError("Undefined state") | 
 |         self.lexre = self.lexstatere[state] | 
 |         self.lexretext = self.lexstateretext[state] | 
 |         self.lexignore = self.lexstateignore.get(state,"") | 
 |         self.lexerrorf = self.lexstateerrorf.get(state,None) | 
 |         self.lexstate = state | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # push_state() - Changes the lexing state and saves old on stack | 
 |     # ------------------------------------------------------------ | 
 |     def push_state(self,state): | 
 |         self.lexstatestack.append(self.lexstate) | 
 |         self.begin(state) | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # pop_state() - Restores the previous state | 
 |     # ------------------------------------------------------------ | 
 |     def pop_state(self): | 
 |         self.begin(self.lexstatestack.pop()) | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # current_state() - Returns the current lexing state | 
 |     # ------------------------------------------------------------ | 
 |     def current_state(self): | 
 |         return self.lexstate | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # skip() - Skip ahead n characters | 
 |     # ------------------------------------------------------------ | 
 |     def skip(self,n): | 
 |         self.lexpos += n | 
 |  | 
 |     # ------------------------------------------------------------ | 
 |     # opttoken() - Return the next token from the Lexer | 
 |     # | 
 |     # Note: This function has been carefully implemented to be as fast | 
 |     # as possible.  Don't make changes unless you really know what | 
 |     # you are doing | 
 |     # ------------------------------------------------------------ | 
 |     def token(self): | 
 |         # Make local copies of frequently referenced attributes | 
 |         lexpos    = self.lexpos | 
 |         lexlen    = self.lexlen | 
 |         lexignore = self.lexignore | 
 |         lexdata   = self.lexdata | 
 |  | 
 |         while lexpos < lexlen: | 
 |             # This code provides some short-circuit code for whitespace, tabs, and other ignored characters | 
 |             if lexdata[lexpos] in lexignore: | 
 |                 lexpos += 1 | 
 |                 continue | 
 |  | 
 |             # Look for a regular expression match | 
 |             for lexre,lexindexfunc in self.lexre: | 
 |                 m = lexre.match(lexdata,lexpos) | 
 |                 if not m: continue | 
 |  | 
 |                 # Create a token for return | 
 |                 tok = LexToken() | 
 |                 tok.value = m.group() | 
 |                 tok.lineno = self.lineno | 
 |                 tok.lexpos = lexpos | 
 |  | 
 |                 i = m.lastindex | 
 |                 func,tok.type = lexindexfunc[i] | 
 |  | 
 |                 if not func: | 
 |                    # If no token type was set, it's an ignored token | 
 |                    if tok.type: | 
 |                       self.lexpos = m.end() | 
 |                       return tok | 
 |                    else: | 
 |                       lexpos = m.end() | 
 |                       break | 
 |  | 
 |                 lexpos = m.end() | 
 |  | 
 |                 # If token is processed by a function, call it | 
 |  | 
 |                 tok.lexer = self      # Set additional attributes useful in token rules | 
 |                 self.lexmatch = m | 
 |                 self.lexpos = lexpos | 
 |  | 
 |                 newtok = func(tok) | 
 |  | 
 |                 # Every function must return a token, if nothing, we just move to next token | 
 |                 if not newtok: | 
 |                     lexpos    = self.lexpos         # This is here in case user has updated lexpos. | 
 |                     lexignore = self.lexignore      # This is here in case there was a state change | 
 |                     break | 
 |  | 
 |                 # Verify type of the token.  If not in the token map, raise an error | 
 |                 if not self.lexoptimize: | 
 |                     if not newtok.type in self.lextokens: | 
 |                         raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( | 
 |                             func_code(func).co_filename, func_code(func).co_firstlineno, | 
 |                             func.__name__, newtok.type),lexdata[lexpos:]) | 
 |  | 
 |                 return newtok | 
 |             else: | 
 |                 # No match, see if in literals | 
 |                 if lexdata[lexpos] in self.lexliterals: | 
 |                     tok = LexToken() | 
 |                     tok.value = lexdata[lexpos] | 
 |                     tok.lineno = self.lineno | 
 |                     tok.type = tok.value | 
 |                     tok.lexpos = lexpos | 
 |                     self.lexpos = lexpos + 1 | 
 |                     return tok | 
 |  | 
 |                 # No match. Call t_error() if defined. | 
 |                 if self.lexerrorf: | 
 |                     tok = LexToken() | 
 |                     tok.value = self.lexdata[lexpos:] | 
 |                     tok.lineno = self.lineno | 
 |                     tok.type = "error" | 
 |                     tok.lexer = self | 
 |                     tok.lexpos = lexpos | 
 |                     self.lexpos = lexpos | 
 |                     newtok = self.lexerrorf(tok) | 
 |                     if lexpos == self.lexpos: | 
 |                         # Error method didn't change text position at all. This is an error. | 
 |                         raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) | 
 |                     lexpos = self.lexpos | 
 |                     if not newtok: continue | 
 |                     return newtok | 
 |  | 
 |                 self.lexpos = lexpos | 
 |                 raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) | 
 |  | 
 |         self.lexpos = lexpos + 1 | 
 |         if self.lexdata is None: | 
 |              raise RuntimeError("No input string given with input()") | 
 |         return None | 
 |  | 
 |     # Iterator interface | 
 |     def __iter__(self): | 
 |         return self | 
 |  | 
 |     def next(self): | 
 |         t = self.token() | 
 |         if t is None: | 
 |             raise StopIteration | 
 |         return t | 
 |  | 
 |     __next__ = next | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | #                           ==== Lex Builder === | 
 | # | 
 | # The functions and classes below are used to collect lexing information | 
 | # and build a Lexer object from it. | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # get_caller_module_dict() | 
 | # | 
 | # This function returns a dictionary containing all of the symbols defined within | 
 | # a caller further down the call stack.  This is used to get the environment | 
 | # associated with the yacc() call if none was provided. | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def get_caller_module_dict(levels): | 
 |     try: | 
 |         raise RuntimeError | 
 |     except RuntimeError: | 
 |         e,b,t = sys.exc_info() | 
 |         f = t.tb_frame | 
 |         while levels > 0: | 
 |             f = f.f_back                    | 
 |             levels -= 1 | 
 |         ldict = f.f_globals.copy() | 
 |         if f.f_globals != f.f_locals: | 
 |             ldict.update(f.f_locals) | 
 |  | 
 |         return ldict | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # _funcs_to_names() | 
 | # | 
 | # Given a list of regular expression functions, this converts it to a list | 
 | # suitable for output to a table file | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def _funcs_to_names(funclist,namelist): | 
 |     result = [] | 
 |     for f,name in zip(funclist,namelist): | 
 |          if f and f[0]: | 
 |              result.append((name, f[1])) | 
 |          else: | 
 |              result.append(f) | 
 |     return result | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # _names_to_funcs() | 
 | # | 
 | # Given a list of regular expression function names, this converts it back to | 
 | # functions. | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def _names_to_funcs(namelist,fdict): | 
 |      result = [] | 
 |      for n in namelist: | 
 |           if n and n[0]: | 
 |               result.append((fdict[n[0]],n[1])) | 
 |           else: | 
 |               result.append(n) | 
 |      return result | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # _form_master_re() | 
 | # | 
 | # This function takes a list of all of the regex components and attempts to | 
 | # form the master regular expression.  Given limitations in the Python re | 
 | # module, it may be necessary to break the master regex into separate expressions. | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def _form_master_re(relist,reflags,ldict,toknames): | 
 |     if not relist: return [] | 
 |     regex = "|".join(relist) | 
 |     try: | 
 |         lexre = re.compile(regex,re.VERBOSE | reflags) | 
 |  | 
 |         # Build the index to function map for the matching engine | 
 |         lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) | 
 |         lexindexnames = lexindexfunc[:] | 
 |  | 
 |         for f,i in lexre.groupindex.items(): | 
 |             handle = ldict.get(f,None) | 
 |             if type(handle) in (types.FunctionType, types.MethodType): | 
 |                 lexindexfunc[i] = (handle,toknames[f]) | 
 |                 lexindexnames[i] = f | 
 |             elif handle is not None: | 
 |                 lexindexnames[i] = f | 
 |                 if f.find("ignore_") > 0: | 
 |                     lexindexfunc[i] = (None,None) | 
 |                 else: | 
 |                     lexindexfunc[i] = (None, toknames[f]) | 
 |          | 
 |         return [(lexre,lexindexfunc)],[regex],[lexindexnames] | 
 |     except Exception: | 
 |         m = int(len(relist)/2) | 
 |         if m == 0: m = 1 | 
 |         llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) | 
 |         rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) | 
 |         return llist+rlist, lre+rre, lnames+rnames | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # def _statetoken(s,names) | 
 | # | 
 | # Given a declaration name s of the form "t_" and a dictionary whose keys are | 
 | # state names, this function returns a tuple (states,tokenname) where states | 
 | # is a tuple of state names and tokenname is the name of the token.  For example, | 
 | # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def _statetoken(s,names): | 
 |     nonstate = 1 | 
 |     parts = s.split("_") | 
 |     for i in range(1,len(parts)): | 
 |          if not parts[i] in names and parts[i] != 'ANY': break | 
 |     if i > 1: | 
 |        states = tuple(parts[1:i]) | 
 |     else: | 
 |        states = ('INITIAL',) | 
 |  | 
 |     if 'ANY' in states: | 
 |        states = tuple(names) | 
 |  | 
 |     tokenname = "_".join(parts[i:]) | 
 |     return (states,tokenname) | 
 |  | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # LexerReflect() | 
 | # | 
 | # This class represents information needed to build a lexer as extracted from a | 
 | # user's input file. | 
 | # ----------------------------------------------------------------------------- | 
 | class LexerReflect(object): | 
 |     def __init__(self,ldict,log=None,reflags=0): | 
 |         self.ldict      = ldict | 
 |         self.error_func = None | 
 |         self.tokens     = [] | 
 |         self.reflags    = reflags | 
 |         self.stateinfo  = { 'INITIAL' : 'inclusive'} | 
 |         self.files      = {} | 
 |         self.error      = 0 | 
 |  | 
 |         if log is None: | 
 |             self.log = PlyLogger(sys.stderr) | 
 |         else: | 
 |             self.log = log | 
 |  | 
 |     # Get all of the basic information | 
 |     def get_all(self): | 
 |         self.get_tokens() | 
 |         self.get_literals() | 
 |         self.get_states() | 
 |         self.get_rules() | 
 |          | 
 |     # Validate all of the information | 
 |     def validate_all(self): | 
 |         self.validate_tokens() | 
 |         self.validate_literals() | 
 |         self.validate_rules() | 
 |         return self.error | 
 |  | 
 |     # Get the tokens map | 
 |     def get_tokens(self): | 
 |         tokens = self.ldict.get("tokens",None) | 
 |         if not tokens: | 
 |             self.log.error("No token list is defined") | 
 |             self.error = 1 | 
 |             return | 
 |  | 
 |         if not isinstance(tokens,(list, tuple)): | 
 |             self.log.error("tokens must be a list or tuple") | 
 |             self.error = 1 | 
 |             return | 
 |          | 
 |         if not tokens: | 
 |             self.log.error("tokens is empty") | 
 |             self.error = 1 | 
 |             return | 
 |  | 
 |         self.tokens = tokens | 
 |  | 
 |     # Validate the tokens | 
 |     def validate_tokens(self): | 
 |         terminals = {} | 
 |         for n in self.tokens: | 
 |             if not _is_identifier.match(n): | 
 |                 self.log.error("Bad token name '%s'",n) | 
 |                 self.error = 1 | 
 |             if n in terminals: | 
 |                 self.log.warning("Token '%s' multiply defined", n) | 
 |             terminals[n] = 1 | 
 |  | 
 |     # Get the literals specifier | 
 |     def get_literals(self): | 
 |         self.literals = self.ldict.get("literals","") | 
 |  | 
 |     # Validate literals | 
 |     def validate_literals(self): | 
 |         try: | 
 |             for c in self.literals: | 
 |                 if not isinstance(c,StringTypes) or len(c) > 1: | 
 |                     self.log.error("Invalid literal %s. Must be a single character", repr(c)) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |         except TypeError: | 
 |             self.log.error("Invalid literals specification. literals must be a sequence of characters") | 
 |             self.error = 1 | 
 |  | 
 |     def get_states(self): | 
 |         self.states = self.ldict.get("states",None) | 
 |         # Build statemap | 
 |         if self.states: | 
 |              if not isinstance(self.states,(tuple,list)): | 
 |                   self.log.error("states must be defined as a tuple or list") | 
 |                   self.error = 1 | 
 |              else: | 
 |                   for s in self.states: | 
 |                         if not isinstance(s,tuple) or len(s) != 2: | 
 |                                self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) | 
 |                                self.error = 1 | 
 |                                continue | 
 |                         name, statetype = s | 
 |                         if not isinstance(name,StringTypes): | 
 |                                self.log.error("State name %s must be a string", repr(name)) | 
 |                                self.error = 1 | 
 |                                continue | 
 |                         if not (statetype == 'inclusive' or statetype == 'exclusive'): | 
 |                                self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) | 
 |                                self.error = 1 | 
 |                                continue | 
 |                         if name in self.stateinfo: | 
 |                                self.log.error("State '%s' already defined",name) | 
 |                                self.error = 1 | 
 |                                continue | 
 |                         self.stateinfo[name] = statetype | 
 |  | 
 |     # Get all of the symbols with a t_ prefix and sort them into various | 
 |     # categories (functions, strings, error functions, and ignore characters) | 
 |  | 
 |     def get_rules(self): | 
 |         tsymbols = [f for f in self.ldict if f[:2] == 't_' ] | 
 |  | 
 |         # Now build up a list of functions and a list of strings | 
 |  | 
 |         self.toknames = { }        # Mapping of symbols to token names | 
 |         self.funcsym =  { }        # Symbols defined as functions | 
 |         self.strsym =   { }        # Symbols defined as strings | 
 |         self.ignore   = { }        # Ignore strings by state | 
 |         self.errorf   = { }        # Error functions by state | 
 |  | 
 |         for s in self.stateinfo: | 
 |              self.funcsym[s] = [] | 
 |              self.strsym[s] = [] | 
 |  | 
 |         if len(tsymbols) == 0: | 
 |             self.log.error("No rules of the form t_rulename are defined") | 
 |             self.error = 1 | 
 |             return | 
 |  | 
 |         for f in tsymbols: | 
 |             t = self.ldict[f] | 
 |             states, tokname = _statetoken(f,self.stateinfo) | 
 |             self.toknames[f] = tokname | 
 |  | 
 |             if hasattr(t,"__call__"): | 
 |                 if tokname == 'error': | 
 |                     for s in states: | 
 |                         self.errorf[s] = t | 
 |                 elif tokname == 'ignore': | 
 |                     line = func_code(t).co_firstlineno | 
 |                     file = func_code(t).co_filename | 
 |                     self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) | 
 |                     self.error = 1 | 
 |                 else: | 
 |                     for s in states:  | 
 |                         self.funcsym[s].append((f,t)) | 
 |             elif isinstance(t, StringTypes): | 
 |                 if tokname == 'ignore': | 
 |                     for s in states: | 
 |                         self.ignore[s] = t | 
 |                     if "\\" in t: | 
 |                         self.log.warning("%s contains a literal backslash '\\'",f) | 
 |  | 
 |                 elif tokname == 'error': | 
 |                     self.log.error("Rule '%s' must be defined as a function", f) | 
 |                     self.error = 1 | 
 |                 else: | 
 |                     for s in states:  | 
 |                         self.strsym[s].append((f,t)) | 
 |             else: | 
 |                 self.log.error("%s not defined as a function or string", f) | 
 |                 self.error = 1 | 
 |  | 
 |         # Sort the functions by line number | 
 |         for f in self.funcsym.values(): | 
 |             if sys.version_info[0] < 3: | 
 |                 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) | 
 |             else: | 
 |                 # Python 3.0 | 
 |                 f.sort(key=lambda x: func_code(x[1]).co_firstlineno) | 
 |  | 
 |         # Sort the strings by regular expression length | 
 |         for s in self.strsym.values(): | 
 |             if sys.version_info[0] < 3: | 
 |                 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) | 
 |             else: | 
 |                 # Python 3.0 | 
 |                 s.sort(key=lambda x: len(x[1]),reverse=True) | 
 |  | 
 |     # Validate all of the t_rules collected  | 
 |     def validate_rules(self): | 
 |         for state in self.stateinfo: | 
 |             # Validate all rules defined by functions | 
 |  | 
 |              | 
 |  | 
 |             for fname, f in self.funcsym[state]: | 
 |                 line = func_code(f).co_firstlineno | 
 |                 file = func_code(f).co_filename | 
 |                 self.files[file] = 1 | 
 |  | 
 |                 tokname = self.toknames[fname] | 
 |                 if isinstance(f, types.MethodType): | 
 |                     reqargs = 2 | 
 |                 else: | 
 |                     reqargs = 1 | 
 |                 nargs = func_code(f).co_argcount | 
 |                 if nargs > reqargs: | 
 |                     self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |                 if nargs < reqargs: | 
 |                     self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |                 if not f.__doc__: | 
 |                     self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |                 try: | 
 |                     c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) | 
 |                     if c.match(""): | 
 |                         self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) | 
 |                         self.error = 1 | 
 |                 except re.error: | 
 |                     _etype, e, _etrace = sys.exc_info() | 
 |                     self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) | 
 |                     if '#' in f.__doc__: | 
 |                         self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) | 
 |                     self.error = 1 | 
 |  | 
 |             # Validate all rules defined by strings | 
 |             for name,r in self.strsym[state]: | 
 |                 tokname = self.toknames[name] | 
 |                 if tokname == 'error': | 
 |                     self.log.error("Rule '%s' must be defined as a function", name) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |                 if not tokname in self.tokens and tokname.find("ignore_") < 0: | 
 |                     self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) | 
 |                     self.error = 1 | 
 |                     continue | 
 |  | 
 |                 try: | 
 |                     c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) | 
 |                     if (c.match("")): | 
 |                          self.log.error("Regular expression for rule '%s' matches empty string",name) | 
 |                          self.error = 1 | 
 |                 except re.error: | 
 |                     _etype, e, _etrace = sys.exc_info() | 
 |                     self.log.error("Invalid regular expression for rule '%s'. %s",name,e) | 
 |                     if '#' in r: | 
 |                          self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) | 
 |                     self.error = 1 | 
 |  | 
 |             if not self.funcsym[state] and not self.strsym[state]: | 
 |                 self.log.error("No rules defined for state '%s'",state) | 
 |                 self.error = 1 | 
 |  | 
 |             # Validate the error function | 
 |             efunc = self.errorf.get(state,None) | 
 |             if efunc: | 
 |                 f = efunc | 
 |                 line = func_code(f).co_firstlineno | 
 |                 file = func_code(f).co_filename | 
 |                 self.files[file] = 1 | 
 |  | 
 |                 if isinstance(f, types.MethodType): | 
 |                     reqargs = 2 | 
 |                 else: | 
 |                     reqargs = 1 | 
 |                 nargs = func_code(f).co_argcount | 
 |                 if nargs > reqargs: | 
 |                     self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) | 
 |                     self.error = 1 | 
 |  | 
 |                 if nargs < reqargs: | 
 |                     self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) | 
 |                     self.error = 1 | 
 |  | 
 |         for f in self.files: | 
 |             self.validate_file(f) | 
 |  | 
 |  | 
 |     # ----------------------------------------------------------------------------- | 
 |     # validate_file() | 
 |     # | 
 |     # This checks to see if there are duplicated t_rulename() functions or strings | 
 |     # in the parser input file.  This is done using a simple regular expression | 
 |     # match on each line in the given file.   | 
 |     # ----------------------------------------------------------------------------- | 
 |  | 
 |     def validate_file(self,filename): | 
 |         import os.path | 
 |         base,ext = os.path.splitext(filename) | 
 |         if ext != '.py': return         # No idea what the file is. Return OK | 
 |  | 
 |         try: | 
 |             f = open(filename) | 
 |             lines = f.readlines() | 
 |             f.close() | 
 |         except IOError: | 
 |             return                      # Couldn't find the file.  Don't worry about it | 
 |  | 
 |         fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') | 
 |         sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') | 
 |  | 
 |         counthash = { } | 
 |         linen = 1 | 
 |         for l in lines: | 
 |             m = fre.match(l) | 
 |             if not m: | 
 |                 m = sre.match(l) | 
 |             if m: | 
 |                 name = m.group(1) | 
 |                 prev = counthash.get(name) | 
 |                 if not prev: | 
 |                     counthash[name] = linen | 
 |                 else: | 
 |                     self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) | 
 |                     self.error = 1 | 
 |             linen += 1 | 
 |              | 
 | # ----------------------------------------------------------------------------- | 
 | # lex(module) | 
 | # | 
 | # Build all of the regular expression rules from definitions in the supplied module | 
 | # ----------------------------------------------------------------------------- | 
 | def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): | 
 |     global lexer | 
 |     ldict = None | 
 |     stateinfo  = { 'INITIAL' : 'inclusive'} | 
 |     lexobj = Lexer() | 
 |     lexobj.lexoptimize = optimize | 
 |     global token,input | 
 |  | 
 |     if errorlog is None: | 
 |         errorlog = PlyLogger(sys.stderr) | 
 |  | 
 |     if debug: | 
 |         if debuglog is None: | 
 |             debuglog = PlyLogger(sys.stderr) | 
 |  | 
 |     # Get the module dictionary used for the lexer | 
 |     if object: module = object | 
 |  | 
 |     if module: | 
 |         _items = [(k,getattr(module,k)) for k in dir(module)] | 
 |         ldict = dict(_items) | 
 |     else: | 
 |         ldict = get_caller_module_dict(2) | 
 |  | 
 |     # Collect parser information from the dictionary | 
 |     linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) | 
 |     linfo.get_all() | 
 |     if not optimize: | 
 |         if linfo.validate_all(): | 
 |             raise SyntaxError("Can't build lexer") | 
 |  | 
 |     if optimize and lextab: | 
 |         try: | 
 |             lexobj.readtab(lextab,ldict) | 
 |             token = lexobj.token | 
 |             input = lexobj.input | 
 |             lexer = lexobj | 
 |             return lexobj | 
 |  | 
 |         except ImportError: | 
 |             pass | 
 |  | 
 |     # Dump some basic debugging information | 
 |     if debug: | 
 |         debuglog.info("lex: tokens   = %r", linfo.tokens) | 
 |         debuglog.info("lex: literals = %r", linfo.literals) | 
 |         debuglog.info("lex: states   = %r", linfo.stateinfo) | 
 |  | 
 |     # Build a dictionary of valid token names | 
 |     lexobj.lextokens = { } | 
 |     for n in linfo.tokens: | 
 |         lexobj.lextokens[n] = 1 | 
 |  | 
 |     # Get literals specification | 
 |     if isinstance(linfo.literals,(list,tuple)): | 
 |         lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) | 
 |     else: | 
 |         lexobj.lexliterals = linfo.literals | 
 |  | 
 |     # Get the stateinfo dictionary | 
 |     stateinfo = linfo.stateinfo | 
 |  | 
 |     regexs = { } | 
 |     # Build the master regular expressions | 
 |     for state in stateinfo: | 
 |         regex_list = [] | 
 |  | 
 |         # Add rules defined by functions first | 
 |         for fname, f in linfo.funcsym[state]: | 
 |             line = func_code(f).co_firstlineno | 
 |             file = func_code(f).co_filename | 
 |             regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) | 
 |             if debug: | 
 |                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) | 
 |  | 
 |         # Now add all of the simple rules | 
 |         for name,r in linfo.strsym[state]: | 
 |             regex_list.append("(?P<%s>%s)" % (name,r)) | 
 |             if debug: | 
 |                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) | 
 |  | 
 |         regexs[state] = regex_list | 
 |  | 
 |     # Build the master regular expressions | 
 |  | 
 |     if debug: | 
 |         debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") | 
 |  | 
 |     for state in regexs: | 
 |         lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) | 
 |         lexobj.lexstatere[state] = lexre | 
 |         lexobj.lexstateretext[state] = re_text | 
 |         lexobj.lexstaterenames[state] = re_names | 
 |         if debug: | 
 |             for i in range(len(re_text)): | 
 |                 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) | 
 |  | 
 |     # For inclusive states, we need to add the regular expressions from the INITIAL state | 
 |     for state,stype in stateinfo.items(): | 
 |         if state != "INITIAL" and stype == 'inclusive': | 
 |              lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) | 
 |              lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) | 
 |              lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) | 
 |  | 
 |     lexobj.lexstateinfo = stateinfo | 
 |     lexobj.lexre = lexobj.lexstatere["INITIAL"] | 
 |     lexobj.lexretext = lexobj.lexstateretext["INITIAL"] | 
 |     lexobj.lexreflags = reflags | 
 |  | 
 |     # Set up ignore variables | 
 |     lexobj.lexstateignore = linfo.ignore | 
 |     lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") | 
 |  | 
 |     # Set up error functions | 
 |     lexobj.lexstateerrorf = linfo.errorf | 
 |     lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) | 
 |     if not lexobj.lexerrorf: | 
 |         errorlog.warning("No t_error rule is defined") | 
 |  | 
 |     # Check state information for ignore and error rules | 
 |     for s,stype in stateinfo.items(): | 
 |         if stype == 'exclusive': | 
 |               if not s in linfo.errorf: | 
 |                    errorlog.warning("No error rule is defined for exclusive state '%s'", s) | 
 |               if not s in linfo.ignore and lexobj.lexignore: | 
 |                    errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) | 
 |         elif stype == 'inclusive': | 
 |               if not s in linfo.errorf: | 
 |                    linfo.errorf[s] = linfo.errorf.get("INITIAL",None) | 
 |               if not s in linfo.ignore: | 
 |                    linfo.ignore[s] = linfo.ignore.get("INITIAL","") | 
 |  | 
 |     # Create global versions of the token() and input() functions | 
 |     token = lexobj.token | 
 |     input = lexobj.input | 
 |     lexer = lexobj | 
 |  | 
 |     # If in optimize mode, we write the lextab | 
 |     if lextab and optimize: | 
 |         lexobj.writetab(lextab,outputdir) | 
 |  | 
 |     return lexobj | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # runmain() | 
 | # | 
 | # This runs the lexer as a main program | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def runmain(lexer=None,data=None): | 
 |     if not data: | 
 |         try: | 
 |             filename = sys.argv[1] | 
 |             f = open(filename) | 
 |             data = f.read() | 
 |             f.close() | 
 |         except IndexError: | 
 |             sys.stdout.write("Reading from standard input (type EOF to end):\n") | 
 |             data = sys.stdin.read() | 
 |  | 
 |     if lexer: | 
 |         _input = lexer.input | 
 |     else: | 
 |         _input = input | 
 |     _input(data) | 
 |     if lexer: | 
 |         _token = lexer.token | 
 |     else: | 
 |         _token = token | 
 |  | 
 |     while 1: | 
 |         tok = _token() | 
 |         if not tok: break | 
 |         sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) | 
 |  | 
 | # ----------------------------------------------------------------------------- | 
 | # @TOKEN(regex) | 
 | # | 
 | # This decorator function can be used to set the regex expression on a function | 
 | # when its docstring might need to be set in an alternative way | 
 | # ----------------------------------------------------------------------------- | 
 |  | 
 | def TOKEN(r): | 
 |     def set_doc(f): | 
 |         if hasattr(r,"__call__"): | 
 |             f.__doc__ = r.__doc__ | 
 |         else: | 
 |             f.__doc__ = r | 
 |         return f | 
 |     return set_doc | 
 |  | 
 | # Alternative spelling of the TOKEN decorator | 
 | Token = TOKEN | 
 |  |