(* Allows more print depth. *) Compiler.Control.Print.printDepth:= 100; datatype token = ID of string | NUM of int | Plus | Minus | Mult | Div | Neg | LParen | RParen | EOF; signature PCFLEXER = sig val lex : string -> token list val lexstr : string -> token list end structure PCFlexer: PCFLEXER = struct open TextIO; (* Return true if c is a letter or digit *) fun alphanum c = (Char.isAlpha c) orelse (Char.isDigit c) (* Extracts consecutive alphanumeric characters from the input to build up an identifier. Returns a tuple containing the next identifier in the input and the input left over after removing the identifier. Precondition: The initial character of the identifier has already been found and is passed in in the second parameter. Parameter 1: Input to extract the identifier from Parameter 2: The characters found so far in the identifier. *) fun getid nil id = (id, nil) | getid (s as c::rest) id = if (alphanum c) then getid rest (id ^ (str(c))) else (id, s) (* Extracts consecutive digits from the input to build up an integer. Returns a tuple containing the next integer in the input and the input left over after removing the integer. Precondition: The initial digit of the integer has already been found and is passed in in the second parameter. Parameter 1: Input to extract the integer from Parameter 2: The digits found so far in the integer. *) fun getnum nil num = (num, nil) | getnum (s as c::rest) num = if (Char.isDigit c) then getnum rest (num*10 + ((ord c)-ord #"0")) else (num, s) (* Return the list of tokens found in the input. Parameter: A character list to tokenize *) fun gettokens nil = [EOF] | gettokens (#"+"::rest) = Plus::gettokens rest | gettokens (#"-"::rest) = Minus::gettokens rest | gettokens (#"*"::rest) = Mult::gettokens rest | gettokens (#"/"::rest) = Div::gettokens rest | gettokens (#"~"::rest) = Neg::gettokens rest | gettokens (#"("::rest) = LParen::gettokens rest | gettokens (#")"::rest) = RParen::gettokens rest | gettokens (c::rest) = if Char.isSpace c then (* Recurse to skip white space *) gettokens rest else if Char.isAlpha c then (* Return keyword or identifier *) let val (id, remainder) = (getid rest (str c)) in (ID id)::gettokens remainder end else if (Char.isDigit c) then (* Return number *) let val (num, remainder) = getnum rest ((ord c) - (ord #"0")) in (NUM (num))::gettokens remainder end else (print ("Skipping illegal character "^(str c) ^"."); gettokens rest) (* Returns the list of tokens found in a string. *) fun lexstr s = gettokens (explode s) (* Returns the list of tokens found in a file. Parameter: filename *) fun lex file = let val strm = openIn file val filecontents = explode (input strm) in (closeIn strm; gettokens filecontents) end end;