third_party/cython/src/Cython/Plex/Traditional.py - mojo-tools - Git at Google

 #=======================================================================
 #
 #   Python Lexical Analyser
 #
 #   Traditional Regular Expression Syntax
 #
 #=======================================================================

 from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
 from Errors import PlexError

 class RegexpSyntaxError(PlexError):
   pass

 def re(s):
   """
   Convert traditional string representation of regular expression |s|
   into Plex representation.
   """
   return REParser(s).parse_re()

 class REParser(object):

   def __init__(self, s):
     self.s = s
     self.i = -1
     self.end = 0
     self.next()

   def parse_re(self):
     re = self.parse_alt()
     if not self.end:
       self.error("Unexpected %s" % repr(self.c))
     return re

   def parse_alt(self):
     """Parse a set of alternative regexps."""
     re = self.parse_seq()
     if self.c == '|':
       re_list = [re]
       while self.c == '|':
         self.next()
         re_list.append(self.parse_seq())
       re = Alt(*re_list)
     return re

   def parse_seq(self):
     """Parse a sequence of regexps."""
     re_list = []
     while not self.end and not self.c in "|)":
       re_list.append(self.parse_mod())
     return Seq(*re_list)

   def parse_mod(self):
     """Parse a primitive regexp followed by *, +, ? modifiers."""
     re = self.parse_prim()
     while not self.end and self.c in "*+?":
       if self.c == '*':
         re = Rep(re)
       elif self.c == '+':
         re = Rep1(re)
       else: # self.c == '?'
         re = Opt(re)
       self.next()
     return re

   def parse_prim(self):
     """Parse a primitive regexp."""
     c = self.get()
     if c == '.':
       re = AnyBut("\n")
     elif c == '^':
       re = Bol
     elif c == '$':
       re = Eol
     elif c == '(':
       re = self.parse_alt()
       self.expect(')')
     elif c == '[':
       re = self.parse_charset()
       self.expect(']')
     else:
       if c == '\\':
         c = self.get()
       re = Char(c)
     return re

   def parse_charset(self):
     """Parse a charset. Does not include the surrounding []."""
     char_list = []
     invert = 0
     if self.c == '^':
       invert = 1
       self.next()
     if self.c == ']':
       char_list.append(']')
       self.next()
     while not self.end and self.c != ']':
       c1 = self.get()
       if self.c == '-' and self.lookahead(1) != ']':
         self.next()
         c2 = self.get()
         for a in xrange(ord(c1), ord(c2) + 1):
           char_list.append(chr(a))
       else:
         char_list.append(c1)
     chars = ''.join(char_list)
     if invert:
       return AnyBut(chars)
     else:
       return Any(chars)

   def next(self):
     """Advance to the next char."""
     s = self.s
     i = self.i = self.i + 1
     if i < len(s):
       self.c = s[i]
     else:
       self.c = ''
       self.end = 1

   def get(self):
     if self.end:
       self.error("Premature end of string")
     c = self.c
     self.next()
     return c

   def lookahead(self, n):
     """Look ahead n chars."""
     j = self.i + n
     if j < len(self.s):
       return self.s[j]
     else:
       return ''

   def expect(self, c):
     """
     Expect to find character |c| at current position.
     Raises an exception otherwise.
     """
     if self.c == c:
       self.next()
     else:
       self.error("Missing %s" % repr(c))

   def error(self, mess):
     """Raise exception to signal syntax error in regexp."""
     raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
       repr(self.s), self.i, mess))
	#=======================================================================
	#
	# Python Lexical Analyser
	#
	# Traditional Regular Expression Syntax
	#
	#=======================================================================

	from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
	from Errors import PlexError

	class RegexpSyntaxError(PlexError):
	pass

	def re(s):
	"""
	Convert traditional string representation of regular expression \|s\|
	into Plex representation.
	"""
	return REParser(s).parse_re()

	class REParser(object):

	def __init__(self, s):
	self.s = s
	self.i = -1
	self.end = 0
	self.next()

	def parse_re(self):
	re = self.parse_alt()
	if not self.end:
	self.error("Unexpected %s" % repr(self.c))
	return re

	def parse_alt(self):
	"""Parse a set of alternative regexps."""
	re = self.parse_seq()
	if self.c == '\|':
	re_list = [re]
	while self.c == '\|':
	self.next()
	re_list.append(self.parse_seq())
	re = Alt(*re_list)
	return re

	def parse_seq(self):
	"""Parse a sequence of regexps."""
	re_list = []
	while not self.end and not self.c in "\|)":
	re_list.append(self.parse_mod())
	return Seq(*re_list)

	def parse_mod(self):
	"""Parse a primitive regexp followed by *, +, ? modifiers."""
	re = self.parse_prim()
	while not self.end and self.c in "*+?":
	if self.c == '*':
	re = Rep(re)
	elif self.c == '+':
	re = Rep1(re)
	else: # self.c == '?'
	re = Opt(re)
	self.next()
	return re

	def parse_prim(self):
	"""Parse a primitive regexp."""
	c = self.get()
	if c == '.':
	re = AnyBut("\n")
	elif c == '^':
	re = Bol
	elif c == '$':
	re = Eol
	elif c == '(':
	re = self.parse_alt()
	self.expect(')')
	elif c == '[':
	re = self.parse_charset()
	self.expect(']')
	else:
	if c == '\\':
	c = self.get()
	re = Char(c)
	return re

	def parse_charset(self):
	"""Parse a charset. Does not include the surrounding []."""
	char_list = []
	invert = 0
	if self.c == '^':
	invert = 1
	self.next()
	if self.c == ']':
	char_list.append(']')
	self.next()
	while not self.end and self.c != ']':
	c1 = self.get()
	if self.c == '-' and self.lookahead(1) != ']':
	self.next()
	c2 = self.get()
	for a in xrange(ord(c1), ord(c2) + 1):
	char_list.append(chr(a))
	else:
	char_list.append(c1)
	chars = ''.join(char_list)
	if invert:
	return AnyBut(chars)
	else:
	return Any(chars)

	def next(self):
	"""Advance to the next char."""
	s = self.s
	i = self.i = self.i + 1
	if i < len(s):
	self.c = s[i]
	else:
	self.c = ''
	self.end = 1

	def get(self):
	if self.end:
	self.error("Premature end of string")
	c = self.c
	self.next()
	return c

	def lookahead(self, n):
	"""Look ahead n chars."""
	j = self.i + n
	if j < len(self.s):
	return self.s[j]
	else:
	return ''

	def expect(self, c):
	"""
	Expect to find character \|c\| at current position.
	Raises an exception otherwise.
	"""
	if self.c == c:
	self.next()
	else:
	self.error("Missing %s" % repr(c))

	def error(self, mess):
	"""Raise exception to signal syntax error in regexp."""
	raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
	repr(self.s), self.i, mess))