Lecture 7 — 2020-02-11

Parsing

This lecture is written in literate Haskell; you can download the raw source.

With apologies for the lack of description, here’s the recursive descent parser discussed in class.

import Data.Char

type VarName = String

data AExp =
    Var VarName
  | Num Int
  | Plus AExp AExp
  | Times AExp AExp
  | Neg AExp
  | Div AExp AExp
  deriving (Eq, Show)

data Token =
    TId String
  | TNum Int
  | TPlus
  | TMinus
  | TTimes
  | TDiv
  | TLParen
  | TRParen
  deriving (Eq, Show)

lexer :: String -> [Token]
lexer [] = []
lexer (w:s) | isSpace w = lexer (dropWhile isSpace s)
lexer ('+':s) = TPlus:lexer s
lexer ('-':s) = TMinus:lexer s
lexer ('*':s) = TTimes:lexer s
lexer ('/':s) = TDiv:lexer s
lexer ('(':s) = TLParen:lexer s
lexer (')':s) = TRParen:lexer s
lexer s | isAlpha (head s) =
  let (id,s') = span isAlphaNum s in
  TId id:lexer s'
lexer s | isDigit (head s) =
  let (n,s') = span isDigit s in
  TNum (read n :: Int):lexer s'
lexer (n:_) = error $ "Lexer error: unexpected character " ++ [n]

parseATerm :: [Token] -> Either String (AExp,[Token])
parseATerm ts = 
  case parseAFactor ts of
    Right (f,ts) -> parseATerm' f ts
    Left e -> Left e
      
parseATerm' :: AExp -> [Token] -> Either String (AExp,[Token])
parseATerm' lhs [] = Right (lhs, [])
parseATerm' lhs (TPlus:ts) = parseATerm'' (Plus lhs) ts
parseATerm' lhs (TMinus:ts) = parseATerm'' (Plus lhs . Neg) ts
parseATerm' lhs ts = Right (lhs,ts)

parseATerm'' :: (AExp -> AExp) -> [Token] -> Either String (AExp,[Token])
parseATerm'' mk [] = Left $ "expected term after + or -"
parseATerm'' mk ts = 
  case parseAFactor ts of
    Right (e,ts) -> parseATerm' (mk e) ts
    Left e -> Left e

parseAFactor :: [Token] -> Either String (AExp,[Token])
parseAFactor ts =
  case parseANeg ts of
    Right (lhs,ts) -> parseAFactor' lhs ts
    Left e -> Left e
    
parseAFactor' :: AExp -> [Token] -> Either String (AExp,[Token])
parseAFactor' lhs (TTimes:ts) = parseAFactor'' (Times lhs) ts
parseAFactor' lhs (TDiv:ts) = parseAFactor'' (Div lhs) ts
parseAFactor' lhs ts = Right (lhs, ts)

parseAFactor'' :: (AExp -> AExp) -> [Token] -> Either String (AExp,[Token])
parseAFactor'' mk [] = Left $ "expected term after * or /"
parseAFactor'' mk ts = 
  case parseANeg ts of
    Right (e,ts) -> parseAFactor' (mk e) ts
    Left e -> Left e

parseANeg :: [Token] -> Either String (AExp, [Token])
parseANeg (TMinus:ts) = 
  case parseAAtom ts of
    Right (e,ts') -> Right (Neg e, ts')
    Left e -> Left e
parseANeg ts = parseAAtom ts

parseAAtom :: [Token] -> Either String (AExp, [Token])
parseAAtom (TNum n:ts) = Right (Num n, ts)
parseAAtom (TId id:ts) = Right (Var id, ts)
parseAAtom (TLParen:ts) =
  case parseATerm ts of
    Right (e,TRParen:ts') -> Right (e,ts')
    Right (_,ts) -> Left $ "expected right paren, found: " ++ show ts
    Left e -> Left e
parseAAtom ts = Left $ "expected number, identifier, or parens, found: " ++ show ts

parseAExp :: String -> Either String AExp
parseAExp s =
  case parseATerm (lexer s) of
    Left e -> Left e
    Right (e, []) -> Right e
    Right (_, ts) -> Left $ "expected EOF, found " ++ show ts