Blame - third_party/jinja2/lexer.py - mojo

blob: a50128507bb98ac6bc57a76afe8a0776a2df2c49 [file] [log] [blame]

James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	1	# -- coding: utf-8 --
				2	"""
				3	jinja2.lexer
				4	~~~~~~~~~~~~
				5
				6	This module implements a Jinja / Python combination lexer. The
				7	`Lexer` class provided by this module is used to do some preprocessing
				8	for Jinja.
				9
				10	On the one hand it filters out invalid operators like the bitshift
				11	operators we don't allow in templates. On the other hand it separates
				12	template code and python code in expressions.
				13
				14	:copyright: (c) 2010 by the Jinja Team.
				15	:license: BSD, see LICENSE for more details.
				16	"""
				17	import re
				18
				19	from operator import itemgetter
				20	from collections import deque
				21	from jinja2.exceptions import TemplateSyntaxError
				22	from jinja2.utils import LRUCache
				23	from jinja2._compat import next, iteritems, implements_iterator, text_type, \
				24	intern
				25
				26
				27	# cache for the lexers. Exists in order to be able to have multiple
				28	# environments with the same lexer
				29	_lexer_cache = LRUCache(50)
				30
				31	# static regular expressions
				32	whitespace_re = re.compile(r'\s+', re.U)
				33	string_re = re.compile(r"('([^'\\](?:\\.[^'\\])*)'"
				34	r'\|"([^"\\](?:\\.[^"\\])*)")', re.S)
				35	integer_re = re.compile(r'\d+')
				36
				37	# we use the unicode identifier rule if this python version is able
				38	# to handle unicode identifiers, otherwise the standard ASCII one.
				39	try:
				40	compile('föö', '<unknown>', 'eval')
				41	except SyntaxError:
				42	name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
				43	else:
				44	from jinja2 import _stringdefs
				45	name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
				46	_stringdefs.xid_continue))
				47
				48	float_re = re.compile(r'(?<!\.)\d+\.\d+')
				49	newline_re = re.compile(r'(\r\n\|\r\|\n)')
				50
				51	# internal the tokens and keep references to them
				52	TOKEN_ADD = intern('add')
				53	TOKEN_ASSIGN = intern('assign')
				54	TOKEN_COLON = intern('colon')
				55	TOKEN_COMMA = intern('comma')
				56	TOKEN_DIV = intern('div')
				57	TOKEN_DOT = intern('dot')
				58	TOKEN_EQ = intern('eq')
				59	TOKEN_FLOORDIV = intern('floordiv')
				60	TOKEN_GT = intern('gt')
				61	TOKEN_GTEQ = intern('gteq')
				62	TOKEN_LBRACE = intern('lbrace')
				63	TOKEN_LBRACKET = intern('lbracket')
				64	TOKEN_LPAREN = intern('lparen')
				65	TOKEN_LT = intern('lt')
				66	TOKEN_LTEQ = intern('lteq')
				67	TOKEN_MOD = intern('mod')
				68	TOKEN_MUL = intern('mul')
				69	TOKEN_NE = intern('ne')
				70	TOKEN_PIPE = intern('pipe')
				71	TOKEN_POW = intern('pow')
				72	TOKEN_RBRACE = intern('rbrace')
				73	TOKEN_RBRACKET = intern('rbracket')
				74	TOKEN_RPAREN = intern('rparen')
				75	TOKEN_SEMICOLON = intern('semicolon')
				76	TOKEN_SUB = intern('sub')
				77	TOKEN_TILDE = intern('tilde')
				78	TOKEN_WHITESPACE = intern('whitespace')
				79	TOKEN_FLOAT = intern('float')
				80	TOKEN_INTEGER = intern('integer')
				81	TOKEN_NAME = intern('name')
				82	TOKEN_STRING = intern('string')
				83	TOKEN_OPERATOR = intern('operator')
				84	TOKEN_BLOCK_BEGIN = intern('block_begin')
				85	TOKEN_BLOCK_END = intern('block_end')
				86	TOKEN_VARIABLE_BEGIN = intern('variable_begin')
				87	TOKEN_VARIABLE_END = intern('variable_end')
				88	TOKEN_RAW_BEGIN = intern('raw_begin')
				89	TOKEN_RAW_END = intern('raw_end')
				90	TOKEN_COMMENT_BEGIN = intern('comment_begin')
				91	TOKEN_COMMENT_END = intern('comment_end')
				92	TOKEN_COMMENT = intern('comment')
				93	TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
				94	TOKEN_LINESTATEMENT_END = intern('linestatement_end')
				95	TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
				96	TOKEN_LINECOMMENT_END = intern('linecomment_end')
				97	TOKEN_LINECOMMENT = intern('linecomment')
				98	TOKEN_DATA = intern('data')
				99	TOKEN_INITIAL = intern('initial')
				100	TOKEN_EOF = intern('eof')
				101
				102	# bind operators to token types
				103	operators = {
				104	'+': TOKEN_ADD,
				105	'-': TOKEN_SUB,
				106	'/': TOKEN_DIV,
				107	'//': TOKEN_FLOORDIV,
				108	'*': TOKEN_MUL,
				109	'%': TOKEN_MOD,
				110	'**': TOKEN_POW,
				111	'~': TOKEN_TILDE,
				112	'[': TOKEN_LBRACKET,
				113	']': TOKEN_RBRACKET,
				114	'(': TOKEN_LPAREN,
				115	')': TOKEN_RPAREN,
				116	'{': TOKEN_LBRACE,
				117	'}': TOKEN_RBRACE,
				118	'==': TOKEN_EQ,
				119	'!=': TOKEN_NE,
				120	'>': TOKEN_GT,
				121	'>=': TOKEN_GTEQ,
				122	'<': TOKEN_LT,
				123	'<=': TOKEN_LTEQ,
				124	'=': TOKEN_ASSIGN,
				125	'.': TOKEN_DOT,
				126	':': TOKEN_COLON,
				127	'\|': TOKEN_PIPE,
				128	',': TOKEN_COMMA,
				129	';': TOKEN_SEMICOLON
				130	}
				131
				132	reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
				133	assert len(operators) == len(reverse_operators), 'operators dropped'
				134	operator_re = re.compile('(%s)' % '\|'.join(re.escape(x) for x in
				135	sorted(operators, key=lambda x: -len(x))))
				136
				137	ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
				138	TOKEN_COMMENT_END, TOKEN_WHITESPACE,
				139	TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
				140	TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
				141	ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
				142	TOKEN_COMMENT, TOKEN_LINECOMMENT])
				143
				144
				145	def _describe_token_type(token_type):
				146	if token_type in reverse_operators:
				147	return reverse_operators[token_type]
				148	return {
				149	TOKEN_COMMENT_BEGIN: 'begin of comment',
				150	TOKEN_COMMENT_END: 'end of comment',
				151	TOKEN_COMMENT: 'comment',
				152	TOKEN_LINECOMMENT: 'comment',
				153	TOKEN_BLOCK_BEGIN: 'begin of statement block',
				154	TOKEN_BLOCK_END: 'end of statement block',
				155	TOKEN_VARIABLE_BEGIN: 'begin of print statement',
				156	TOKEN_VARIABLE_END: 'end of print statement',
				157	TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement',
				158	TOKEN_LINESTATEMENT_END: 'end of line statement',
				159	TOKEN_DATA: 'template data / text',
				160	TOKEN_EOF: 'end of template'
				161	}.get(token_type, token_type)
				162
				163
				164	def describe_token(token):
				165	"""Returns a description of the token."""
				166	if token.type == 'name':
				167	return token.value
				168	return _describe_token_type(token.type)
				169
				170
				171	def describe_token_expr(expr):
				172	"""Like `describe_token` but for token expressions."""
				173	if ':' in expr:
				174	type, value = expr.split(':', 1)
				175	if type == 'name':
				176	return value
				177	else:
				178	type = expr
				179	return _describe_token_type(type)
				180
				181
				182	def count_newlines(value):
				183	"""Count the number of newline characters in the string. This is
				184	useful for extensions that filter a stream.
				185	"""
				186	return len(newline_re.findall(value))
				187
				188
				189	def compile_rules(environment):
				190	"""Compiles all the rules from the environment into a list of rules."""
				191	e = re.escape
				192	rules = [
				193	(len(environment.comment_start_string), 'comment',
				194	e(environment.comment_start_string)),
				195	(len(environment.block_start_string), 'block',
				196	e(environment.block_start_string)),
				197	(len(environment.variable_start_string), 'variable',
				198	e(environment.variable_start_string))
				199	]
				200
				201	if environment.line_statement_prefix is not None:
				202	rules.append((len(environment.line_statement_prefix), 'linestatement',
				203	r'^[ \t\v]*' + e(environment.line_statement_prefix)))
				204	if environment.line_comment_prefix is not None:
				205	rules.append((len(environment.line_comment_prefix), 'linecomment',
				206	r'(?:^\|(?<=\S))[^\S\r\n]*' +
				207	e(environment.line_comment_prefix)))
				208
				209	return [x[1:] for x in sorted(rules, reverse=True)]
				210
				211
				212	class Failure(object):
				213	"""Class that raises a `TemplateSyntaxError` if called.
				214	Used by the `Lexer` to specify known errors.
				215	"""
				216
				217	def __init__(self, message, cls=TemplateSyntaxError):
				218	self.message = message
				219	self.error_class = cls
				220
				221	def __call__(self, lineno, filename):
				222	raise self.error_class(self.message, lineno, filename)
				223
				224
				225	class Token(tuple):
				226	"""Token class."""
				227	__slots__ = ()
				228	lineno, type, value = (property(itemgetter(x)) for x in range(3))
				229
				230	def __new__(cls, lineno, type, value):
				231	return tuple.__new__(cls, (lineno, intern(str(type)), value))
				232
				233	def __str__(self):
				234	if self.type in reverse_operators:
				235	return reverse_operators[self.type]
				236	elif self.type == 'name':
				237	return self.value
				238	return self.type
				239
				240	def test(self, expr):
				241	"""Test a token against a token expression. This can either be a
				242	token type or ``'token_type:token_value'``. This can only test
				243	against string values and types.
				244	"""
				245	# here we do a regular string equality check as test_any is usually
				246	# passed an iterable of not interned strings.
				247	if self.type == expr:
				248	return True
				249	elif ':' in expr:
				250	return expr.split(':', 1) == [self.type, self.value]
				251	return False
				252
				253	def test_any(self, *iterable):
				254	"""Test against multiple token expressions."""
				255	for expr in iterable:
				256	if self.test(expr):
				257	return True
				258	return False
				259
				260	def __repr__(self):
				261	return 'Token(%r, %r, %r)' % (
				262	self.lineno,
				263	self.type,
				264	self.value
				265	)
				266
				267
				268	@implements_iterator
				269	class TokenStreamIterator(object):
				270	"""The iterator for tokenstreams. Iterate over the stream
				271	until the eof token is reached.
				272	"""
				273
				274	def __init__(self, stream):
				275	self.stream = stream
				276
				277	def __iter__(self):
				278	return self
				279
				280	def __next__(self):
				281	token = self.stream.current
				282	if token.type is TOKEN_EOF:
				283	self.stream.close()
				284	raise StopIteration()
				285	next(self.stream)
				286	return token
				287
				288
				289	@implements_iterator
				290	class TokenStream(object):
				291	"""A token stream is an iterable that yields :class:`Token`\s. The
				292	parser however does not iterate over it but calls :meth:`next` to go
				293	one token ahead. The current active token is stored as :attr:`current`.
				294	"""
				295
				296	def __init__(self, generator, name, filename):
				297	self._iter = iter(generator)
				298	self._pushed = deque()
				299	self.name = name
				300	self.filename = filename
				301	self.closed = False
				302	self.current = Token(1, TOKEN_INITIAL, '')
				303	next(self)
				304
				305	def __iter__(self):
				306	return TokenStreamIterator(self)
				307
				308	def __bool__(self):
				309	return bool(self._pushed) or self.current.type is not TOKEN_EOF
				310	__nonzero__ = __bool__ # py2
				311
				312	eos = property(lambda x: not x, doc="Are we at the end of the stream?")
				313
				314	def push(self, token):
				315	"""Push a token back to the stream."""
				316	self._pushed.append(token)
				317
				318	def look(self):
				319	"""Look at the next token."""
				320	old_token = next(self)
				321	result = self.current
				322	self.push(result)
				323	self.current = old_token
				324	return result
				325
				326	def skip(self, n=1):
				327	"""Got n tokens ahead."""
				328	for x in range(n):
				329	next(self)
				330
				331	def next_if(self, expr):
				332	"""Perform the token test and return the token if it matched.
				333	Otherwise the return value is `None`.
				334	"""
				335	if self.current.test(expr):
				336	return next(self)
				337
				338	def skip_if(self, expr):
				339	"""Like :meth:`next_if` but only returns `True` or `False`."""
				340	return self.next_if(expr) is not None
				341
				342	def __next__(self):
				343	"""Go one token ahead and return the old one"""
				344	rv = self.current
				345	if self._pushed:
				346	self.current = self._pushed.popleft()
				347	elif self.current.type is not TOKEN_EOF:
				348	try:
				349	self.current = next(self._iter)
				350	except StopIteration:
				351	self.close()
				352	return rv
				353
				354	def close(self):
				355	"""Close the stream."""
				356	self.current = Token(self.current.lineno, TOKEN_EOF, '')
				357	self._iter = None
				358	self.closed = True
				359
				360	def expect(self, expr):
				361	"""Expect a given token type and return it. This accepts the same
				362	argument as :meth:`jinja2.lexer.Token.test`.
				363	"""
				364	if not self.current.test(expr):
				365	expr = describe_token_expr(expr)
				366	if self.current.type is TOKEN_EOF:
				367	raise TemplateSyntaxError('unexpected end of template, '
				368	'expected %r.' % expr,
				369	self.current.lineno,
				370	self.name, self.filename)
				371	raise TemplateSyntaxError("expected token %r, got %r" %
				372	(expr, describe_token(self.current)),
				373	self.current.lineno,
				374	self.name, self.filename)
				375	try:
				376	return self.current
				377	finally:
				378	next(self)
				379
				380
				381	def get_lexer(environment):
				382	"""Return a lexer which is probably cached."""
				383	key = (environment.block_start_string,
				384	environment.block_end_string,
				385	environment.variable_start_string,
				386	environment.variable_end_string,
				387	environment.comment_start_string,
				388	environment.comment_end_string,
				389	environment.line_statement_prefix,
				390	environment.line_comment_prefix,
				391	environment.trim_blocks,
				392	environment.lstrip_blocks,
				393	environment.newline_sequence,
				394	environment.keep_trailing_newline)
				395	lexer = _lexer_cache.get(key)
				396	if lexer is None:
				397	lexer = Lexer(environment)
				398	_lexer_cache[key] = lexer
				399	return lexer
				400
				401
				402	class Lexer(object):
				403	"""Class that implements a lexer for a given environment. Automatically
				404	created by the environment class, usually you don't have to do that.
				405
				406	Note that the lexer is not automatically bound to an environment.
				407	Multiple environments can share the same lexer.
				408	"""
				409
				410	def __init__(self, environment):
				411	# shortcuts
				412	c = lambda x: re.compile(x, re.M \| re.S)
				413	e = re.escape
				414
				415	# lexing rules for tags
				416	tag_rules = [
				417	(whitespace_re, TOKEN_WHITESPACE, None),
				418	(float_re, TOKEN_FLOAT, None),
				419	(integer_re, TOKEN_INTEGER, None),
				420	(name_re, TOKEN_NAME, None),
				421	(string_re, TOKEN_STRING, None),
				422	(operator_re, TOKEN_OPERATOR, None)
				423	]
				424
				425	# assemble the root lexing rule. because "\|" is ungreedy
				426	# we have to sort by length so that the lexer continues working
				427	# as expected when we have parsing rules like <% for block and
				428	# <%= for variables. (if someone wants asp like syntax)
				429	# variables are just part of the rules if variable processing
				430	# is required.
				431	root_tag_rules = compile_rules(environment)
				432
				433	# block suffix if trimming is enabled
				434	block_suffix_re = environment.trim_blocks and '\\n?' or ''
				435
				436	# strip leading spaces if lstrip_blocks is enabled
				437	prefix_re = {}
				438	if environment.lstrip_blocks:
				439	# use '{%+' to manually disable lstrip_blocks behavior
				440	no_lstrip_re = e('+')
				441	# detect overlap between block and variable or comment strings
				442	block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
				443	# make sure we don't mistake a block for a variable or a comment
				444	m = block_diff.match(environment.comment_start_string)
				445	no_lstrip_re += m and r'\|%s' % e(m.group(1)) or ''
				446	m = block_diff.match(environment.variable_start_string)
				447	no_lstrip_re += m and r'\|%s' % e(m.group(1)) or ''
				448
				449	# detect overlap between comment and variable strings
				450	comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
				451	m = comment_diff.match(environment.variable_start_string)
				452	no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
				453
				454	lstrip_re = r'^[ \t]*'
				455	block_prefix_re = r'%s%s(?!%s)\|%s\+?' % (
				456	lstrip_re,
				457	e(environment.block_start_string),
				458	no_lstrip_re,
				459	e(environment.block_start_string),
				460	)
				461	comment_prefix_re = r'%s%s%s\|%s\+?' % (
				462	lstrip_re,
				463	e(environment.comment_start_string),
				464	no_variable_re,
				465	e(environment.comment_start_string),
				466	)
				467	prefix_re['block'] = block_prefix_re
				468	prefix_re['comment'] = comment_prefix_re
				469	else:
				470	block_prefix_re = '%s' % e(environment.block_start_string)
				471
				472	self.newline_sequence = environment.newline_sequence
				473	self.keep_trailing_newline = environment.keep_trailing_newline
				474
				475	# global lexing rules
				476	self.rules = {
				477	'root': [
				478	# directives
				479	(c('(.*?)(?:%s)' % '\|'.join(
				480	[r'(?P<raw_begin>(?:\s%s\-\|%s)\sraw\s(?:\-%s\s\|%s))' % (
				481	e(environment.block_start_string),
				482	block_prefix_re,
				483	e(environment.block_end_string),
				484	e(environment.block_end_string)
				485	)] + [
				486	r'(?P<%s_begin>\s*%s\-\|%s)' % (n, r, prefix_re.get(n,r))
				487	for n, r in root_tag_rules
				488	])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
				489	# data
				490	(c('.+'), TOKEN_DATA, None)
				491	],
				492	# comments
				493	TOKEN_COMMENT_BEGIN: [
				494	(c(r'(.?)((?:\-%s\s\|%s)%s)' % (
				495	e(environment.comment_end_string),
				496	e(environment.comment_end_string),
				497	block_suffix_re
				498	)), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
				499	(c('(.)'), (Failure('Missing end of comment tag'),), None)
				500	],
				501	# blocks
				502	TOKEN_BLOCK_BEGIN: [
				503	(c('(?:\-%s\s*\|%s)%s' % (
				504	e(environment.block_end_string),
				505	e(environment.block_end_string),
				506	block_suffix_re
				507	)), TOKEN_BLOCK_END, '#pop'),
				508	] + tag_rules,
				509	# variables
				510	TOKEN_VARIABLE_BEGIN: [
				511	(c('\-%s\s*\|%s' % (
				512	e(environment.variable_end_string),
				513	e(environment.variable_end_string)
				514	)), TOKEN_VARIABLE_END, '#pop')
				515	] + tag_rules,
				516	# raw block
				517	TOKEN_RAW_BEGIN: [
				518	(c('(.?)((?:\s%s\-\|%s)\sendraw\s(?:\-%s\s*\|%s%s))' % (
				519	e(environment.block_start_string),
				520	block_prefix_re,
				521	e(environment.block_end_string),
				522	e(environment.block_end_string),
				523	block_suffix_re
				524	)), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
				525	(c('(.)'), (Failure('Missing end of raw directive'),), None)
				526	],
				527	# line statements
				528	TOKEN_LINESTATEMENT_BEGIN: [
				529	(c(r'\s*(\n\|$)'), TOKEN_LINESTATEMENT_END, '#pop')
				530	] + tag_rules,
				531	# line comments
				532	TOKEN_LINECOMMENT_BEGIN: [
				533	(c(r'(.*?)()(?=\n\|$)'), (TOKEN_LINECOMMENT,
				534	TOKEN_LINECOMMENT_END), '#pop')
				535	]
				536	}
				537
				538	def _normalize_newlines(self, value):
				539	"""Called for strings and template data to normalize it to unicode."""
				540	return newline_re.sub(self.newline_sequence, value)
				541
				542	def tokenize(self, source, name=None, filename=None, state=None):
				543	"""Calls tokeniter + tokenize and wraps it in a token stream.
				544	"""
				545	stream = self.tokeniter(source, name, filename, state)
				546	return TokenStream(self.wrap(stream, name, filename), name, filename)
				547
				548	def wrap(self, stream, name=None, filename=None):
				549	"""This is called with the stream as returned by `tokenize` and wraps
				550	every token in a :class:`Token` and converts the value.
				551	"""
				552	for lineno, token, value in stream:
				553	if token in ignored_tokens:
				554	continue
				555	elif token == 'linestatement_begin':
				556	token = 'block_begin'
				557	elif token == 'linestatement_end':
				558	token = 'block_end'
				559	# we are not interested in those tokens in the parser
				560	elif token in ('raw_begin', 'raw_end'):
				561	continue
				562	elif token == 'data':
				563	value = self._normalize_newlines(value)
				564	elif token == 'keyword':
				565	token = value
				566	elif token == 'name':
				567	value = str(value)
				568	elif token == 'string':
				569	# try to unescape string
				570	try:
				571	value = self._normalize_newlines(value[1:-1]) \
				572	.encode('ascii', 'backslashreplace') \
				573	.decode('unicode-escape')
				574	except Exception as e:
				575	msg = str(e).split(':')[-1].strip()
				576	raise TemplateSyntaxError(msg, lineno, name, filename)
				577	# if we can express it as bytestring (ascii only)
				578	# we do that for support of semi broken APIs
				579	# as datetime.datetime.strftime. On python 3 this
				580	# call becomes a noop thanks to 2to3
				581	try:
				582	value = str(value)
				583	except UnicodeError:
				584	pass
				585	elif token == 'integer':
				586	value = int(value)
				587	elif token == 'float':
				588	value = float(value)
				589	elif token == 'operator':
				590	token = operators[value]
				591	yield Token(lineno, token, value)
				592
				593	def tokeniter(self, source, name, filename=None, state=None):
				594	"""This method tokenizes the text and returns the tokens in a
				595	generator. Use this method if you just want to tokenize a template.
				596	"""
				597	source = text_type(source)
				598	lines = source.splitlines()
				599	if self.keep_trailing_newline and source:
				600	for newline in ('\r\n', '\r', '\n'):
				601	if source.endswith(newline):
				602	lines.append('')
				603	break
				604	source = '\n'.join(lines)
				605	pos = 0
				606	lineno = 1
				607	stack = ['root']
				608	if state is not None and state != 'root':
				609	assert state in ('variable', 'block'), 'invalid state'
				610	stack.append(state + '_begin')
				611	else:
				612	state = 'root'
				613	statetokens = self.rules[stack[-1]]
				614	source_length = len(source)
				615
				616	balancing_stack = []
				617
				618	while 1:
				619	# tokenizer loop
				620	for regex, tokens, new_state in statetokens:
				621	m = regex.match(source, pos)
				622	# if no match we try again with the next rule
				623	if m is None:
				624	continue
				625
				626	# we only match blocks and variables if braces / parentheses
				627	# are balanced. continue parsing with the lower rule which
				628	# is the operator rule. do this only if the end tags look
				629	# like operators
				630	if balancing_stack and \
				631	tokens in ('variable_end', 'block_end',
				632	'linestatement_end'):
				633	continue
				634
				635	# tuples support more options
				636	if isinstance(tokens, tuple):
				637	for idx, token in enumerate(tokens):
				638	# failure group
				639	if token.__class__ is Failure:
				640	raise token(lineno, filename)
				641	# bygroup is a bit more complex, in that case we
				642	# yield for the current token the first named
				643	# group that matched
				644	elif token == '#bygroup':
				645	for key, value in iteritems(m.groupdict()):
				646	if value is not None:
				647	yield lineno, key, value
				648	lineno += value.count('\n')
				649	break
				650	else:
				651	raise RuntimeError('%r wanted to resolve '
				652	'the token dynamically'
				653	' but no group matched'
				654	% regex)
				655	# normal group
				656	else:
				657	data = m.group(idx + 1)
				658	if data or token not in ignore_if_empty:
				659	yield lineno, token, data
				660	lineno += data.count('\n')
				661
				662	# strings as token just are yielded as it.
				663	else:
				664	data = m.group()
				665	# update brace/parentheses balance
				666	if tokens == 'operator':
				667	if data == '{':
				668	balancing_stack.append('}')
				669	elif data == '(':
				670	balancing_stack.append(')')
				671	elif data == '[':
				672	balancing_stack.append(']')
				673	elif data in ('}', ')', ']'):
				674	if not balancing_stack:
				675	raise TemplateSyntaxError('unexpected \'%s\'' %
				676	data, lineno, name,
				677	filename)
				678	expected_op = balancing_stack.pop()
				679	if expected_op != data:
				680	raise TemplateSyntaxError('unexpected \'%s\', '
				681	'expected \'%s\'' %
				682	(data, expected_op),
				683	lineno, name,
				684	filename)
				685	# yield items
				686	if data or tokens not in ignore_if_empty:
				687	yield lineno, tokens, data
				688	lineno += data.count('\n')
				689
				690	# fetch new position into new variable so that we can check
				691	# if there is a internal parsing error which would result
				692	# in an infinite loop
				693	pos2 = m.end()
				694
				695	# handle state changes
				696	if new_state is not None:
				697	# remove the uppermost state
				698	if new_state == '#pop':
				699	stack.pop()
				700	# resolve the new state by group checking
				701	elif new_state == '#bygroup':
				702	for key, value in iteritems(m.groupdict()):
				703	if value is not None:
				704	stack.append(key)
				705	break
				706	else:
				707	raise RuntimeError('%r wanted to resolve the '
				708	'new state dynamically but'
				709	' no group matched' %
				710	regex)
				711	# direct state name given
				712	else:
				713	stack.append(new_state)
				714	statetokens = self.rules[stack[-1]]
				715	# we are still at the same position and no stack change.
				716	# this means a loop without break condition, avoid that and
				717	# raise error
				718	elif pos2 == pos:
				719	raise RuntimeError('%r yielded empty string without '
				720	'stack change' % regex)
				721	# publish new function and start again
				722	pos = pos2
				723	break
				724	# if loop terminated without break we haven't found a single match
				725	# either we are at the end of the file or we have a problem
				726	else:
				727	# end of text
				728	if pos >= source_length:
				729	return
				730	# something went wrong
				731	raise TemplateSyntaxError('unexpected char %r at %d' %
				732	(source[pos], pos), lineno,
				733	name, filename)