|  | #!/usr/bin/env python | 
|  | # Copyright (C) 2013 Google Inc. All rights reserved. | 
|  | # | 
|  | # Redistribution and use in source and binary forms, with or without | 
|  | # modification, are permitted provided that the following conditions are | 
|  | # met: | 
|  | # | 
|  | #     * Redistributions of source code must retain the above copyright | 
|  | # notice, this list of conditions and the following disclaimer. | 
|  | #     * Redistributions in binary form must reproduce the above | 
|  | # copyright notice, this list of conditions and the following disclaimer | 
|  | # in the documentation and/or other materials provided with the | 
|  | # distribution. | 
|  | #     * Neither the name of Google Inc. nor the names of its | 
|  | # contributors may be used to endorse or promote products derived from | 
|  | # this software without specific prior written permission. | 
|  | # | 
|  | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|  | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|  | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
|  | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
|  | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
|  | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
|  | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
|  | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
|  | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
|  | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | import io | 
|  | import itertools | 
|  | import re | 
|  | import sys | 
|  |  | 
|  |  | 
|  | class BadInput(Exception): | 
|  | """Unsupported input has been found.""" | 
|  |  | 
|  |  | 
|  | class SwitchCase(object): | 
|  | """Represents a CASE block.""" | 
|  | def __init__(self, identifier, block): | 
|  | self.identifier = identifier | 
|  | self.block = block | 
|  |  | 
|  |  | 
|  | class Optimizer(object): | 
|  | """Generates optimized identifier matching code.""" | 
|  | def __init__(self, output_file, array_variable, length_variable): | 
|  | self.output_file = output_file | 
|  | self.array_variable = array_variable | 
|  | self.length_variable = length_variable | 
|  |  | 
|  | def inspect(self, cases): | 
|  | lengths = list(set([len(c.identifier) for c in cases])) | 
|  | lengths.sort() | 
|  |  | 
|  | def response(length): | 
|  | self.inspect_array([c for c in cases if len(c.identifier) == length], range(length)) | 
|  | self.write_selection(self.length_variable, lengths, str, response) | 
|  |  | 
|  | def score(self, alternatives): | 
|  | return -sum([len(list(count)) ** 2 for _, count in itertools.groupby(sorted(alternatives))]) | 
|  |  | 
|  | def choose_selection_pos(self, cases, pending): | 
|  | candidates = [pos for pos in pending if all(alternative.isalpha() for alternative in [c.identifier[pos] for c in cases])] | 
|  | if not candidates: | 
|  | raise BadInput('Case-insensitive switching on non-alphabetic characters not yet implemented') | 
|  | return sorted(candidates, key=lambda pos: self.score([c.identifier[pos] for c in cases]))[0] | 
|  |  | 
|  | def inspect_array(self, cases, pending): | 
|  | assert len(cases) >= 1 | 
|  | if pending: | 
|  | common = [pos for pos in pending | 
|  | if len(set([c.identifier[pos] for c in cases])) == 1] | 
|  | if common: | 
|  | identifier = cases[0].identifier | 
|  | for index in xrange(len(common)): | 
|  | if index == 0: | 
|  | self.output_file.write(u'if (LIKELY(') | 
|  | else: | 
|  | self.output_file.write(u' && ') | 
|  | pos = common[index] | 
|  | if identifier[pos].isalpha(): | 
|  | self.output_file.write("(%s[%d] | 0x20) == '%s'" % | 
|  | (self.array_variable, pos, identifier[pos])) | 
|  | else: | 
|  | self.output_file.write("%s[%d] == '%s'" % | 
|  | (self.array_variable, pos, identifier[pos])) | 
|  | self.output_file.write(u')) {\n') | 
|  | next_pending = list(set(pending) - set(common)) | 
|  | next_pending.sort() | 
|  | self.inspect_array(cases, next_pending) | 
|  | self.output_file.write(u'}\n') | 
|  | else: | 
|  | pos = self.choose_selection_pos(cases, pending) | 
|  | next_pending = filter(lambda p: p != pos, pending) | 
|  |  | 
|  | alternatives = list(set([c.identifier[pos] for c in cases])) | 
|  | alternatives.sort() | 
|  |  | 
|  | def literal(alternative): | 
|  | if isinstance(alternative, int): | 
|  | return str(alternative) | 
|  | else: | 
|  | return "'%s'" % alternative | 
|  |  | 
|  | def response(alternative): | 
|  | self.inspect_array([c for c in cases if c.identifier[pos] == alternative], | 
|  | next_pending) | 
|  |  | 
|  | expression = '(%s[%d] | 0x20)' % (self.array_variable, pos) | 
|  | self.write_selection(expression, alternatives, literal, response) | 
|  | else: | 
|  | assert len(cases) == 1 | 
|  | for block_line in cases[0].block: | 
|  | self.output_file.write(block_line) | 
|  |  | 
|  | def write_selection(self, expression, alternatives, literal, response): | 
|  | if len(alternatives) == 1: | 
|  | self.output_file.write(u'if (LIKELY(%s == %s)) {\n' % (expression, literal(alternatives[0]))) | 
|  | response(alternatives[0]) | 
|  | self.output_file.write(u'}\n') | 
|  | elif len(alternatives) == 2: | 
|  | self.output_file.write(u'if (%s == %s) {\n' % (expression, literal(alternatives[0]))) | 
|  | response(alternatives[0]) | 
|  | self.output_file.write(u'} else if (LIKELY(%s == %s)) {\n' % (expression, literal(alternatives[1]))) | 
|  | response(alternatives[1]) | 
|  | self.output_file.write(u'}\n') | 
|  | else: | 
|  | self.output_file.write('switch (%s) {\n' % expression) | 
|  | for alternative in alternatives: | 
|  | self.output_file.write(u'case %s: {\n' % literal(alternative)) | 
|  | response(alternative) | 
|  | self.output_file.write(u'} break;\n') | 
|  | self.output_file.write(u'}\n') | 
|  |  | 
|  |  | 
|  | class LineProcessor(object): | 
|  | def process_line(self, line): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class MainLineProcessor(LineProcessor): | 
|  | """Processes the contents of an input file.""" | 
|  | SWITCH_PATTERN = re.compile(r'\s*SWITCH\s*\((\w*),\s*(\w*)\) \{$') | 
|  |  | 
|  | def __init__(self, output_file): | 
|  | self.output_file = output_file | 
|  |  | 
|  | def process_line(self, line): | 
|  | match_switch = MainLineProcessor.SWITCH_PATTERN.match(line) | 
|  | if match_switch: | 
|  | array_variable = match_switch.group(1) | 
|  | length_variable = match_switch.group(2) | 
|  | return SwitchLineProcessor(self, self.output_file, array_variable, length_variable) | 
|  | else: | 
|  | self.output_file.write(line) | 
|  | return self | 
|  |  | 
|  |  | 
|  | class SwitchLineProcessor(LineProcessor): | 
|  | """Processes the contents of a SWITCH block.""" | 
|  | CASE_PATTERN = re.compile(r'\s*CASE\s*\(\"([a-z0-9_\-\(]*)\"\) \{$') | 
|  | CLOSE_BRACE_PATTERN = re.compile(r'\s*\}$') | 
|  | EMPTY_PATTERN = re.compile(r'\s*$') | 
|  |  | 
|  | def __init__(self, parent, output_file, array_variable, length_variable): | 
|  | self.parent = parent | 
|  | self.output_file = output_file | 
|  | self.array_variable = array_variable | 
|  | self.length_variable = length_variable | 
|  | self.cases = [] | 
|  |  | 
|  | def process_line(self, line): | 
|  | match_case = SwitchLineProcessor.CASE_PATTERN.match(line) | 
|  | match_close_brace = SwitchLineProcessor.CLOSE_BRACE_PATTERN.match(line) | 
|  | match_empty = SwitchLineProcessor.EMPTY_PATTERN.match(line) | 
|  | if match_case: | 
|  | identifier = match_case.group(1) | 
|  | return CaseLineProcessor(self, self.output_file, identifier) | 
|  | elif match_close_brace: | 
|  | Optimizer(self.output_file, self.array_variable, self.length_variable).inspect(self.cases) | 
|  | return self.parent | 
|  | elif match_empty: | 
|  | return self | 
|  | else: | 
|  | raise BadInput('Invalid line within SWITCH: %s' % line) | 
|  |  | 
|  | def add_case(self, latest_case): | 
|  | if latest_case.identifier in [c.identifier for c in self.cases]: | 
|  | raise BadInput('Repeated case: %s' % latest_case.identifier) | 
|  | self.cases.append(latest_case) | 
|  |  | 
|  |  | 
|  | class CaseLineProcessor(LineProcessor): | 
|  | """Processes the contents of a CASE block.""" | 
|  | CLOSE_BRACE_PATTERN = re.compile(r'\s*\}$') | 
|  | BREAK_PATTERN = re.compile(r'break;') | 
|  |  | 
|  | def __init__(self, parent, output_file, identifier): | 
|  | self.parent = parent | 
|  | self.output_file = output_file | 
|  | self.identifier = identifier | 
|  | self.block = [] | 
|  |  | 
|  | def process_line(self, line): | 
|  | match_close_brace = CaseLineProcessor.CLOSE_BRACE_PATTERN.match(line) | 
|  | match_break = CaseLineProcessor.BREAK_PATTERN.search(line) | 
|  | if match_close_brace: | 
|  | self.parent.add_case(SwitchCase(self.identifier, self.block)) | 
|  | return self.parent | 
|  | elif match_break: | 
|  | raise BadInput('break within CASE not supported: %s' % line) | 
|  | else: | 
|  | self.block.append(line) | 
|  | return self | 
|  |  | 
|  |  | 
|  | def process_file(input_name, output_name): | 
|  | """Transforms input file into legal C++ source code.""" | 
|  | with io.open(input_name, 'r', -1, 'utf-8') as input_file: | 
|  | with io.open(output_name, 'w', -1, 'utf-8') as output_file: | 
|  | processor = MainLineProcessor(output_file) | 
|  | input_lines = input_file.readlines() | 
|  | for line in input_lines: | 
|  | processor = processor.process_line(line) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | process_file(sys.argv[1], sys.argv[2]) |