| #======================================================================= |
| # |
| # Python Lexical Analyser |
| # |
| # Classes for building NFAs and DFAs |
| # |
| #======================================================================= |
| |
| import sys |
| |
| from Transitions import TransitionMap |
| |
| LOWEST_PRIORITY = -sys.maxint |
| |
| class Machine(object): |
| """A collection of Nodes representing an NFA or DFA.""" |
| states = None # [Node] |
| next_state_number = 1 |
| initial_states = None # {(name, bol): Node} |
| |
| def __init__(self): |
| self.states = [] |
| self.initial_states = {} |
| |
| def __del__(self): |
| #print "Destroying", self ### |
| for state in self.states: |
| state.destroy() |
| |
| def new_state(self): |
| """Add a new state to the machine and return it.""" |
| s = Node() |
| n = self.next_state_number |
| self.next_state_number = n + 1 |
| s.number = n |
| self.states.append(s) |
| return s |
| |
| def new_initial_state(self, name): |
| state = self.new_state() |
| self.make_initial_state(name, state) |
| return state |
| |
| def make_initial_state(self, name, state): |
| self.initial_states[name] = state |
| |
| def get_initial_state(self, name): |
| return self.initial_states[name] |
| |
| def dump(self, file): |
| file.write("Plex.Machine:\n") |
| if self.initial_states is not None: |
| file.write(" Initial states:\n") |
| for (name, state) in self.initial_states.iteritems(): |
| file.write(" '%s': %d\n" % (name, state.number)) |
| for s in self.states: |
| s.dump(file) |
| |
| class Node(object): |
| """A state of an NFA or DFA.""" |
| transitions = None # TransitionMap |
| action = None # Action |
| action_priority = None # integer |
| number = 0 # for debug output |
| epsilon_closure = None # used by nfa_to_dfa() |
| |
| def __init__(self): |
| # Preinitialise the list of empty transitions, because |
| # the nfa-to-dfa algorithm needs it |
| #self.transitions = {'':[]} |
| self.transitions = TransitionMap() |
| self.action_priority = LOWEST_PRIORITY |
| |
| def destroy(self): |
| #print "Destroying", self ### |
| self.transitions = None |
| self.action = None |
| self.epsilon_closure = None |
| |
| def add_transition(self, event, new_state): |
| self.transitions.add(event, new_state) |
| |
| def link_to(self, state): |
| """Add an epsilon-move from this state to another state.""" |
| self.add_transition('', state) |
| |
| def set_action(self, action, priority): |
| """Make this an accepting state with the given action. If |
| there is already an action, choose the action with highest |
| priority.""" |
| if priority > self.action_priority: |
| self.action = action |
| self.action_priority = priority |
| |
| def get_action(self): |
| return self.action |
| |
| def get_action_priority(self): |
| return self.action_priority |
| |
| def is_accepting(self): |
| return self.action is not None |
| |
| def __str__(self): |
| return "State %d" % self.number |
| |
| def dump(self, file): |
| # Header |
| file.write(" State %d:\n" % self.number) |
| # Transitions |
| # self.dump_transitions(file) |
| self.transitions.dump(file) |
| # Action |
| action = self.action |
| priority = self.action_priority |
| if action is not None: |
| file.write(" %s [priority %d]\n" % (action, priority)) |
| |
| def __lt__(self, other): |
| return self.number < other.number |
| |
| class FastMachine(object): |
| """ |
| FastMachine is a deterministic machine represented in a way that |
| allows fast scanning. |
| """ |
| initial_states = None # {state_name:state} |
| states = None # [state] |
| # where state = {event:state, 'else':state, 'action':Action} |
| next_number = 1 # for debugging |
| |
| new_state_template = { |
| '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None |
| } |
| |
| def __init__(self, old_machine = None): |
| self.initial_states = initial_states = {} |
| self.states = [] |
| if old_machine: |
| self.old_to_new = old_to_new = {} |
| for old_state in old_machine.states: |
| new_state = self.new_state() |
| old_to_new[old_state] = new_state |
| for name, old_state in old_machine.initial_states.iteritems(): |
| initial_states[name] = old_to_new[old_state] |
| for old_state in old_machine.states: |
| new_state = old_to_new[old_state] |
| for event, old_state_set in old_state.transitions.iteritems(): |
| if old_state_set: |
| new_state[event] = old_to_new[old_state_set.keys()[0]] |
| else: |
| new_state[event] = None |
| new_state['action'] = old_state.action |
| |
| def __del__(self): |
| for state in self.states: |
| state.clear() |
| |
| def new_state(self, action = None): |
| number = self.next_number |
| self.next_number = number + 1 |
| result = self.new_state_template.copy() |
| result['number'] = number |
| result['action'] = action |
| self.states.append(result) |
| return result |
| |
| def make_initial_state(self, name, state): |
| self.initial_states[name] = state |
| |
| def add_transitions(self, state, event, new_state, maxint=sys.maxint): |
| if type(event) is tuple: |
| code0, code1 = event |
| if code0 == -maxint: |
| state['else'] = new_state |
| elif code1 != maxint: |
| while code0 < code1: |
| state[chr(code0)] = new_state |
| code0 = code0 + 1 |
| else: |
| state[event] = new_state |
| |
| def get_initial_state(self, name): |
| return self.initial_states[name] |
| |
| def dump(self, file): |
| file.write("Plex.FastMachine:\n") |
| file.write(" Initial states:\n") |
| for name, state in self.initial_states.iteritems(): |
| file.write(" %s: %s\n" % (repr(name), state['number'])) |
| for state in self.states: |
| self.dump_state(state, file) |
| |
| def dump_state(self, state, file): |
| # Header |
| file.write(" State %d:\n" % state['number']) |
| # Transitions |
| self.dump_transitions(state, file) |
| # Action |
| action = state['action'] |
| if action is not None: |
| file.write(" %s\n" % action) |
| |
| def dump_transitions(self, state, file): |
| chars_leading_to_state = {} |
| special_to_state = {} |
| for (c, s) in state.iteritems(): |
| if len(c) == 1: |
| chars = chars_leading_to_state.get(id(s), None) |
| if chars is None: |
| chars = [] |
| chars_leading_to_state[id(s)] = chars |
| chars.append(c) |
| elif len(c) <= 4: |
| special_to_state[c] = s |
| ranges_to_state = {} |
| for state in self.states: |
| char_list = chars_leading_to_state.get(id(state), None) |
| if char_list: |
| ranges = self.chars_to_ranges(char_list) |
| ranges_to_state[ranges] = state |
| ranges_list = ranges_to_state.keys() |
| ranges_list.sort() |
| for ranges in ranges_list: |
| key = self.ranges_to_string(ranges) |
| state = ranges_to_state[ranges] |
| file.write(" %s --> State %d\n" % (key, state['number'])) |
| for key in ('bol', 'eol', 'eof', 'else'): |
| state = special_to_state.get(key, None) |
| if state: |
| file.write(" %s --> State %d\n" % (key, state['number'])) |
| |
| def chars_to_ranges(self, char_list): |
| char_list.sort() |
| i = 0 |
| n = len(char_list) |
| result = [] |
| while i < n: |
| c1 = ord(char_list[i]) |
| c2 = c1 |
| i = i + 1 |
| while i < n and ord(char_list[i]) == c2 + 1: |
| i = i + 1 |
| c2 = c2 + 1 |
| result.append((chr(c1), chr(c2))) |
| return tuple(result) |
| |
| def ranges_to_string(self, range_list): |
| return ','.join(map(self.range_to_string, range_list)) |
| |
| def range_to_string(self, range_tuple): |
| (c1, c2) = range_tuple |
| if c1 == c2: |
| return repr(c1) |
| else: |
| return "%s..%s" % (repr(c1), repr(c2)) |