(* Allows more print depth.                                         *)
Compiler.Control.Print.printDepth:= 100;

datatype token = ID of string | NUM of int | Plus | Minus | Mult | Div | Neg
  | LParen | RParen | EOF;

signature PCFLEXER =
sig
    val lex : string -> token list
    val lexstr : string -> token list
end


structure PCFlexer: PCFLEXER =
struct
    open TextIO;

    (* Return true if c is a letter or digit *)
    fun alphanum c = (Char.isAlpha c) orelse (Char.isDigit c)

    (* Extracts consecutive alphanumeric characters from the input to
       build up an identifier.  Returns a tuple containing the next
       identifier in the input and the input left over after removing
       the identifier.
       Precondition:  The initial character of the identifier has
         already been found and is passed in in the second parameter.
       Parameter 1:  Input to extract the identifier from
       Parameter 2:  The characters found so far in the identifier.
    *)
    fun getid nil id = (id, nil)
      | getid (s as c::rest) id = 
                if (alphanum c) then getid rest (id ^ (str(c)))
                else (id, s)

    (* Extracts consecutive digits from the input to
       build up an integer.  Returns a tuple containing the next
       integer in the input and the input left over after removing
       the integer.
       Precondition:  The initial digit of the integer has
         already been found and is passed in in the second parameter.
       Parameter 1:  Input to extract the integer from
       Parameter 2:  The digits found so far in the integer.
    *)
    fun getnum nil num = (num, nil)
      | getnum (s as c::rest) num =
                if (Char.isDigit c) then 
                    getnum rest (num*10 + ((ord c)-ord #"0"))
                else (num, s)
                    
    (* Return the list of tokens found in the input.
       Parameter:  A character list to tokenize
    *)
    fun gettokens nil = [EOF]
      | gettokens (#"+"::rest) = Plus::gettokens rest
      | gettokens (#"-"::rest) = Minus::gettokens rest
      | gettokens (#"*"::rest) = Mult::gettokens rest
      | gettokens (#"/"::rest) = Div::gettokens rest
      | gettokens (#"~"::rest) = Neg::gettokens rest
      | gettokens (#"("::rest) = LParen::gettokens rest
      | gettokens (#")"::rest) = RParen::gettokens rest
      | gettokens (c::rest) =
          if Char.isSpace c then
	    (* Recurse to skip white space *)
	    gettokens rest

	  else if Char.isAlpha c then
	    (* Return keyword or identifier *)
	    let val (id, remainder) = (getid rest (str c))
	    in
	      (ID id)::gettokens remainder
            end

          else if (Char.isDigit c) then
	    (* Return number *)
            let
	      val (num, remainder) = getnum rest ((ord c) - (ord #"0"))
	    in
	      (NUM (num))::gettokens remainder
	    end

          else
            (print ("Skipping illegal character "^(str c) ^"."); 
             gettokens rest)

    (* Returns the list of tokens found in a string. *)
    fun lexstr s = gettokens (explode s)
                
    (* Returns the list of tokens found in a file.
       Parameter:  filename *)
    fun lex file = 
        let
	    val strm = openIn file
            val filecontents = explode (input strm)
        in 
            (closeIn strm; gettokens filecontents)
        end
end;