Lecture 7 — 2020-02-11
Parsing
This lecture is written in literate Haskell; you can download the raw source.
With apologies for the lack of description, here’s the recursive descent parser discussed in class.
import Data.Char
type VarName = String
data AExp =
Var VarName
| Num Int
| Plus AExp AExp
| Times AExp AExp
| Neg AExp
| Div AExp AExp
deriving (Eq, Show)
data Token =
TId String
| TNum Int
| TPlus
| TMinus
| TTimes
| TDiv
| TLParen
| TRParen
deriving (Eq, Show)
lexer :: String -> [Token]
lexer [] = []
lexer (w:s) | isSpace w = lexer (dropWhile isSpace s)
lexer ('+':s) = TPlus:lexer s
lexer ('-':s) = TMinus:lexer s
lexer ('*':s) = TTimes:lexer s
lexer ('/':s) = TDiv:lexer s
lexer ('(':s) = TLParen:lexer s
lexer (')':s) = TRParen:lexer s
lexer s | isAlpha (head s) =
let (id,s') = span isAlphaNum s in
TId id:lexer s'
lexer s | isDigit (head s) =
let (n,s') = span isDigit s in
TNum (read n :: Int):lexer s'
lexer (n:_) = error $ "Lexer error: unexpected character " ++ [n]
parseATerm :: [Token] -> Either String (AExp,[Token])
parseATerm ts =
case parseAFactor ts of
Right (f,ts) -> parseATerm' f ts
Left e -> Left e
parseATerm' :: AExp -> [Token] -> Either String (AExp,[Token])
parseATerm' lhs [] = Right (lhs, [])
parseATerm' lhs (TPlus:ts) = parseATerm'' (Plus lhs) ts
parseATerm' lhs (TMinus:ts) = parseATerm'' (Plus lhs . Neg) ts
parseATerm' lhs ts = Right (lhs,ts)
parseATerm'' :: (AExp -> AExp) -> [Token] -> Either String (AExp,[Token])
parseATerm'' mk [] = Left $ "expected term after + or -"
parseATerm'' mk ts =
case parseAFactor ts of
Right (e,ts) -> parseATerm' (mk e) ts
Left e -> Left e
parseAFactor :: [Token] -> Either String (AExp,[Token])
parseAFactor ts =
case parseANeg ts of
Right (lhs,ts) -> parseAFactor' lhs ts
Left e -> Left e
parseAFactor' :: AExp -> [Token] -> Either String (AExp,[Token])
parseAFactor' lhs (TTimes:ts) = parseAFactor'' (Times lhs) ts
parseAFactor' lhs (TDiv:ts) = parseAFactor'' (Div lhs) ts
parseAFactor' lhs ts = Right (lhs, ts)
parseAFactor'' :: (AExp -> AExp) -> [Token] -> Either String (AExp,[Token])
parseAFactor'' mk [] = Left $ "expected term after * or /"
parseAFactor'' mk ts =
case parseANeg ts of
Right (e,ts) -> parseAFactor' (mk e) ts
Left e -> Left e
parseANeg :: [Token] -> Either String (AExp, [Token])
parseANeg (TMinus:ts) =
case parseAAtom ts of
Right (e,ts') -> Right (Neg e, ts')
Left e -> Left e
parseANeg ts = parseAAtom ts
parseAAtom :: [Token] -> Either String (AExp, [Token])
parseAAtom (TNum n:ts) = Right (Num n, ts)
parseAAtom (TId id:ts) = Right (Var id, ts)
parseAAtom (TLParen:ts) =
case parseATerm ts of
Right (e,TRParen:ts') -> Right (e,ts')
Right (_,ts) -> Left $ "expected right paren, found: " ++ show ts
Left e -> Left e
parseAAtom ts = Left $ "expected number, identifier, or parens, found: " ++ show ts
parseAExp :: String -> Either String AExp
parseAExp s =
case parseATerm (lexer s) of
Left e -> Left e
Right (e, []) -> Right e
Right (_, ts) -> Left $ "expected EOF, found " ++ show ts