#!/usr/bin/python import re from sys import exit ################################################################################ # class Binds - contains binding data ################################################################################ class Binds: def __init__(self): self.table = dict() def getbind(self, name): if name in self.table: return self.table[name] else: return None def getbind_deep(self, name): bnext = self while bnext != None: bind = bnext.getbind(name) if bind == None: bnext = bnext.getnext() if bnext == None: return None else: return bind def setbind(self, name, value): self.table[name] = value def getnext(self): return self.next table = None next = None ################################################################################ # class Token - contains type and data values for an expression ################################################################################ class Token: def __repr__(self): return self.__str__() def __str__(self): if self.pair: if self.trap_name == None: return "<" + self.strmain() + "," + self.strmain() + ">" else: return "<" + self.trap_name + "," + self.strmain() + ">" else: return self.strmain() def strmain(self): if self.type == self.T_BOOL: if self.data: return ":true:" return ":false:" elif self.type == self.T_ERR: return ":error:" elif self.type == self.T_STRING: return "\"" + self.data + "\"" elif self.type == self.T_LAMBDA: return ":closure:" elif self.type == self.T_PRIM: return self.P_NAMESTRINGS[self.data] elif self.type == self.T_ARRAY: if len(self.data) > 0: retstr = "[ " for s in self.data: retstr = retstr + str(s) + " " return retstr + "]" else: return "[]" else: return str(self.data) def __init__(self, type, data): self.type = type self.data = data def copy(self): if self.type == self.T_LAMBDA or self.type == self.T_ARRAY: # make a copy of the lambda fn, not a reference newcopy = [] for token in self.data: newcopy.append(token.copy()) return Token(self.type, newcopy) else: # whatever, just copy this return Token(self.type, self.data) T_BOOL, T_INT, T_STRING, T_NAME, T_ERR, T_PRIM, T_APP, T_LAMBDA, T_CLOSURE, T_ARRAY = range(10) P_ADD, P_SUB, P_MUL, P_DIV, P_REM, P_NEG, P_BINDS, P_BIND, P_LOAD, P_POP, P_EXC, P_DUP, P_QUIT, P_AND, P_OR, P_NOT, P_EQUAL, P_LESSTHAN, P_IF, P_APPLY, P_ARRAYNEW, P_PREPEND, P_FIRST, P_REST, P_LENGTH, P_CONCAT = range(26) P_NAMESTRINGS = ["add", "sub", "mul", "div", "rem", "neg", "binds", "bind", "load", "pop", "exc", "dup", "quit", "and", "or", "not", "equal", "lessThan", "if", "apply", "[]", "prepend", "first", "rest", "length", "concat" ] type = None data = None trap_name = None pair = False ################################################################################ # MISC EXTRA FUNCTIONS ################################################################################ # proper remainders # this should work regardless of platform-dependent quirks def div_proper(dividend, divisor): quotient = dividend / divisor remainder = dividend % divisor if remainder < 0: remainder = remainder + abs(divisor) if divisor < 0: quotient = quotient + 1 else: quotient = quotient - 1 return (quotient, remainder) # TUPLES: NATURE'S CLING WRAP # push an error onto the stack def push_error(stack): stack.append(Token(Token.T_ERR, False)) ################################################################################ # LEXER FUNCTIONS ################################################################################ # findclosure : scans for closures at the beginning of a string, returns their length def findclosure(string): depth = 1 i = 1 if not re.match("{\s", string[0:2]): return 0 # not the beginning of a closure string = string[1:] while string: if re.match("\s{\s", string[0:3]): depth = depth + 1 string = string[2:] i = i + 2 elif re.match("\s}\s", string[0:3]): depth = depth - 1 i = i + 2 if depth == 0: # closure closed, return position return i string = string[2:] elif re.match("\s}$", string): # YES CORNER CASES depth = depth - 1 i = i + 2 if depth == 0: return i string = string[2:] else: i = i + 1 string = string[1:] return 0 # no closure found or unmatched parens # keywordtoken : takes a keyword and turns it into a token # i feel like i should be able to refactor this def keywordtoken(str): if str == "add": return Token(Token.T_PRIM, Token.P_ADD) elif str == "sub": return Token(Token.T_PRIM, Token.P_SUB) elif str == "mul": return Token(Token.T_PRIM, Token.P_MUL) elif str == "div": return Token(Token.T_PRIM, Token.P_DIV) elif str == "rem": return Token(Token.T_PRIM, Token.P_REM) elif str == "neg": return Token(Token.T_PRIM, Token.P_NEG) elif str == "binds": return Token(Token.T_PRIM, Token.P_BINDS) elif str == "bind": return Token(Token.T_PRIM, Token.P_BIND) elif str == "load": return Token(Token.T_PRIM, Token.P_LOAD) elif str == "pop": return Token(Token.T_PRIM, Token.P_POP) elif str == "exc": return Token(Token.T_PRIM, Token.P_EXC) elif str == "dup": return Token(Token.T_PRIM, Token.P_DUP) elif str == "quit": return Token(Token.T_PRIM, Token.P_QUIT) elif str == "and": return Token(Token.T_PRIM, Token.P_AND) elif str == "or": return Token(Token.T_PRIM, Token.P_OR) elif str == "not": return Token(Token.T_PRIM, Token.P_NOT) elif str == "equal": return Token(Token.T_PRIM, Token.P_EQUAL) elif str == "lessThan": return Token(Token.T_PRIM, Token.P_LESSTHAN) elif str == "if": return Token(Token.T_PRIM, Token.P_IF) elif str == "apply": return Token(Token.T_PRIM, Token.P_APPLY) elif str == "[]": return Token(Token.T_PRIM, Token.P_ARRAYNEW) elif str == "prepend": return Token(Token.T_PRIM, Token.P_PREPEND) elif str == "first": return Token(Token.T_PRIM, Token.P_FIRST) elif str == "rest": return Token(Token.T_PRIM, Token.P_REST) elif str == "length": return Token(Token.T_PRIM, Token.P_LENGTH) elif str == "concat": return Token(Token.T_PRIM, Token.P_CONCAT) elif str == ":true:": return Token(Token.T_BOOL, True) elif str == ":false:": return Token(Token.T_BOOL, False) elif str == ":error:": return Token(Token.T_ERR, False) # lex : takes a string and turns it into a list of tokens def lex(string): # create token array tokens = [] # scan the string until we run out of tokens while len(string) > 0: # match whitespace m = re.search(r"^\s+", string) if m: string = string[m.end():] # this is where the magic happens. the substring that was just matched is removed continue # match keywords m = re.search(r"^(add|sub|mul|div|rem|neg|binds|bind|load|pop|exc|dup|quit|:true:|:false:|:error:|and|or|not|equal|lessThan|if|apply|\[\]|prepend|first|rest|length|concat)(\s|$)", string) if m: tokens.append(keywordtoken(m.group(1))) string = string[m.end():] continue; # match numbers m = re.search(r"^(-?\d+)(\s|$)", string) if m: tokens.append(Token(Token.T_INT, int(m.group(1)))) string = string[m.end():] continue; # match strings m = re.search(r"^\"([^\"]*?)\"(\s|$)", string) if m: tokens.append(Token(Token.T_STRING, m.group(1))) string = string[m.end():] continue # match lambda functions m = findclosure(string) if m: tokens.append(Token(Token.T_LAMBDA, lex(string[1:m-1]))) string = string[m:] continue m = re.search(r"^f\(([a-zA-Z][a-zA-Z0-9]*)(,[a-zA-Z][a-zA-Z0-9]*)*\)\{ (.+) \}", string) if m: string = string[2:] m = re.match("^[^)]*", string) fnargs = m.group().split(",") string = string[m.end() + 1:] m = findclosure(string) fntoken = Token(Token.T_LAMBDA, lex(string[1:m-1])); for arg in fnargs: fntoken.data.insert(0, Token(Token.T_PRIM, Token.P_POP)) fntoken.data.insert(0, Token(Token.T_PRIM, Token.P_BIND)) fntoken.data.insert(0, Token(Token.T_PRIM, Token.P_EXC)) fntoken.data.insert(0, Token(Token.T_NAME, arg)) tokens.append(fntoken) string = string[m:] continue # match names m = re.search(r"(^[a-zA-Z][a-zA-Z0-9]*)(\s|$)", string) if m: tokens.append(Token(Token.T_NAME, m.group(1))) string = string[m.end():] continue # nothing matched, syntax error; throw an error and discard everything up to the next whitespace push_error(tokens) m = re.search(r"(\s|$)", string) string = string[m.end():] # all done return tokens ################################################################################ # GRAMMAR FUNCTIONS ################################################################################ # primitive binary numeric ops (add, sub, mul, div, lessthan, equal) # so named for legacy reasons def p_binmath(stack, op): if len(stack) < 2 or stack[-1].type != Token.T_INT or stack[-2].type != Token.T_INT: push_error(stack) return if op == Token.P_ADD: stack[-2].data += stack[-1].data elif op == Token.P_SUB: stack[-2].data -= stack[-1].data elif op == Token.P_MUL: stack[-2].data *= stack[-1].data elif op == Token.P_REM: if stack[-1].data == 0: push_error(stack) return stack[-2].data = div_proper(stack[-2].data, stack[-1].data)[1]; elif op == Token.P_DIV: if stack[-1].data == 0: push_error(stack) return stack[-2].data = div_proper(stack[-2].data, stack[-1].data)[0]; elif op == Token.P_LESSTHAN: stack[-2].type = Token.T_BOOL stack[-2].data = (stack[-2].data < stack[-1].data) stack.pop() # primitive negate def p_neg(stack): if len(stack) < 1 or stack[-1].type != Token.T_INT: push_error(stack) return stack[-1].data = - stack[-1].data # primitive pop def p_pop(stack): if len(stack) < 1: push_error(stack) return stack.pop() # primitive duplicate def p_dup(stack): if len(stack) < 1: push_error(stack) return stack.append(stack[-1].copy()) # primitive exchange def p_exc(stack): if len(stack) < 2: push_error(stack) return stack.append(stack.pop(-2)) # primitive bind def p_bind(stack, binds): if len(stack) < 2 or (stack[-2].trap_name == None and stack[-2].type != Token.T_NAME): push_error(stack) return if stack[-2].trap_name != None: # figure out what name we're binding to name = stack[-2].trap_name else: name = stack[-2].data if (binds.getbind(name) != None): # make sure it's not already in use push_error(stack) return binds.setbind(name, Token(stack[-1].type, stack[-1].data)) # copy token into bindings p_exc(stack) # swap arguments stack.pop() # pop off name, leaving value on stack # primitive load def p_load(stack, binds): if len(stack) < 1 or stack[-1].type != Token.T_STRING: push_error(stack) return try: # open up the file or bail out file = open(stack[-1].data, "r") except IOError: stack.append(Token(Token.T_BOOL, False)) # load evaluated as false return stack.pop() # pop off path argument gram(stack, binds, lex(file.read())) # send file contents off to lexer/grammar stack.append(Token(Token.T_BOOL, True)) # load evaluated as true # primitive binary boolean functions (and, or) def p_binbool(stack, op): if len(stack) < 2 or stack[-1].type != Token.T_BOOL or stack[-2].type != Token.T_BOOL: push_error(stack) return if op == Token.P_AND: stack[-2].data = stack[-2].data and stack[-1].data elif op == Token.P_OR: stack[-2].data = stack[-2].data or stack[-1].data stack.pop() # primitive not def p_not(stack): if len(stack) < 1 or stack[-1].type != Token.T_BOOL: push_error(stack) return stack[-1].data = not stack[-1].data def p_if(stack): if len(stack) < 3 or stack[-1].type != Token.T_BOOL: # make sure args are okay push_error(stack) return if stack[-1].data: # if the top arg is true stack.pop(-2) # keep first arg else: # if the top arg is false stack.pop(-3) # keep second arg stack.pop() def p_apply(stack, binds): if len(stack) < 1 or stack[-1].type != Token.T_LAMBDA: push_error(stack) return lexed = stack.pop().data # grab lambda cbinds = Binds() cbinds.next = binds # no links yet gram(stack, cbinds, lexed) def p_equal(stack): ret = Token(Token.T_BOOL, False) if len(stack) < 2: push_error(stack) return if stack[-1].type == stack[-2].type and stack[-1].data == stack[-2].data: ret.data = True stack.pop() stack.pop() stack.append(ret) def p_binds(binds): print "===== BIND TABLE: =====" for bind in binds.table: print "BIND", bind, "=>", binds.getbind(bind) print "=======================" def p_arraynew(stack): stack.append(Token(Token.T_ARRAY, list())) return def p_prepend(stack): if len(stack) < 2 or stack[-2].type != Token.T_ARRAY: push_error(stack) return ret = stack[-2].copy() ret.data.insert(0, stack[-1]) stack.pop() stack.pop() stack.append(ret) def p_first(stack): if len(stack) < 1 or stack[-1].type != Token.T_ARRAY or len(stack[-1].data) < 1: push_error(stack) return stack.append(stack[-1].data[0].copy()) stack.pop(-2) def p_rest(stack): if len(stack) < 1 or stack[-1].type != Token.T_ARRAY or len(stack[-1].data) < 1: push_error(stack) return ret = Token(Token.T_ARRAY, list()); for token in stack[-1].data[1:]: ret.data.append(token) stack.pop() stack.append(ret) def p_length(stack): if len(stack) < 1 or stack[-1].type != Token.T_STRING: push_error(stack) return ret = Token(Token.T_INT, len(stack[-1].data)) stack.pop() stack.append(ret) def p_concat(stack): if len(stack) < 2 or stack[-1].type != Token.T_STRING or stack[-2].type != Token.T_STRING: push_error(stack) return ret = Token(Token.T_STRING, stack[-2].data + stack[-1].data) stack.pop() stack.pop() stack.append(ret) #the main grammar function # stack - the stack to operate on # binds - the main binding environment # lexed - lexed tokens to gram def gram(stack, binds, lexed): for token in lexed: if token.type == Token.T_PRIM: # evaluate primitive operations if token.data in [Token.P_ADD, Token.P_SUB, Token.P_MUL, Token.P_DIV, Token.P_REM, Token.P_LESSTHAN]: # binary math ops p_binmath(stack, token.data) elif token.data == Token.P_NEG: # neg (negate integer) p_neg(stack) elif token.data in [Token.P_AND, Token.P_OR]: # binary bool ops p_binbool(stack, token.data) elif token.data == Token.P_NOT: # not (negates bool) p_not(stack) elif token.data == Token.P_BINDS: # binds (show bindings) p_binds(binds) elif token.data == Token.P_BIND: # bind (create binding) p_bind(stack, binds) elif token.data == Token.P_LOAD: # load (load file) p_load(stack, binds) elif token.data == Token.P_POP: # pop (remove top element from stack) p_pop(stack) elif token.data == Token.P_EXC: # exc (exchange top two elements of stack) p_exc(stack) elif token.data == Token.P_DUP: # dup (duplicate top element of stack) p_dup(stack) elif token.data == Token.P_IF: # if (binary conditional) p_if(stack) elif token.data == Token.P_APPLY: # apply (apply closure) p_apply(stack, binds) elif token.data == Token.P_QUIT: # quit exit(0) elif token.data == Token.P_EQUAL: # equality p_equal(stack) elif token.data == Token.P_ARRAYNEW: p_arraynew(stack) elif token.data == Token.P_PREPEND: p_prepend(stack) elif token.data == Token.P_FIRST: p_first(stack) elif token.data == Token.P_REST: p_rest(stack) elif token.data == Token.P_LENGTH: p_length(stack) elif token.data == Token.P_CONCAT: p_concat(stack) elif token.type == Token.T_NAME: # evaluate name bindings bindval = binds.getbind_deep(token.data) if bindval == None: stack.append(token) else: bindcopy = bindval.copy() bindcopy.pair = True # flag this as a pair if not binds.getbind(token.data) != None: # check for local binding bindcopy.trap_name = token.data # if this is a non-local binding, trap the name else: bindcopy.trap_name = None stack.append(bindcopy) else: # push all other values onto stack stack.append(token) ################################################################################ # MAIN LOOP ################################################################################ def repl(): stack = list() binds = Binds() # global binding scope has no parent link binds.table["TestBind00"] = Token(Token.T_INT, 42); while True: line = raw_input("repl> ") # get some input gram(stack, binds, lex(line)) # apply grammar for token in reversed(stack): # show new stack print token