ImpParserLexing and Parsing in Coq
Definition isWhite (c : ascii) : bool :=
let n := nat_of_ascii c in
orb (orb (n =? 32) (* space *)
(n =? 9)) (* tab *)
(orb (n =? 10) (* linefeed *)
(n =? 13)). (* Carriage return. *)
Notation "x '<=?' y" := (x <=? y)
(at level 70, no associativity) : nat_scope.
Definition isLowerAlpha (c : ascii) : bool :=
let n := nat_of_ascii c in
andb (97 <=? n) (n <=? 122).
Definition isAlpha (c : ascii) : bool :=
let n := nat_of_ascii c in
orb (andb (65 <=? n) (n <=? 90))
(andb (97 <=? n) (n <=? 122)).
Definition isDigit (c : ascii) : bool :=
let n := nat_of_ascii c in
andb (48 <=? n) (n <=? 57).
Inductive chartype := white | alpha | digit | other.
Definition classifyChar (c : ascii) : chartype :=
if isWhite c then
white
else if isAlpha c then
alpha
else if isDigit c then
digit
else
other.
Fixpoint list_of_string (s : string) : list ascii :=
match s with
| EmptyString ⇒ []
| String c s ⇒ c :: (list_of_string s)
end.
Fixpoint string_of_list (xs : list ascii) : string :=
fold_right String EmptyString xs.
Definition token := string.
Fixpoint tokenize_helper (cls : chartype) (acc xs : list ascii)
: list (list ascii) :=
let tk := match acc with [] ⇒ [] | _::_ ⇒ [rev acc] end in
match xs with
| [] ⇒ tk
| (x::xs') ⇒
match cls, classifyChar x, x with
| _, _, "(" ⇒
tk ++ ["("]::(tokenize_helper other [] xs')
| _, _, ")" ⇒
tk ++ [")"]::(tokenize_helper other [] xs')
| _, white, _ ⇒
tk ++ (tokenize_helper white [] xs')
| alpha,alpha,x ⇒
tokenize_helper alpha (x::acc) xs'
| digit,digit,x ⇒
tokenize_helper digit (x::acc) xs'
| other,other,x ⇒
tokenize_helper other (x::acc) xs'
| _,tp,x ⇒
tk ++ (tokenize_helper tp [x] xs')
end
end %char.
Definition tokenize (s : string) : list string :=
map string_of_list (tokenize_helper white [] (list_of_string s)).
Example tokenize_ex1 :
tokenize "abc12=3 223*(3+(a+c))" %string
= ["abc"; "12"; "="; "3"; "223";
"*"; "("; "3"; "+"; "(";
"a"; "+"; "c"; ")"; ")"]%string.
Proof. reflexivity. Qed.
Inductive optionE (X:Type) : Type :=
| SomeE (x : X)
| NoneE (s : string).
Arguments SomeE {X}.
Arguments NoneE {X}.
| SomeE (x : X)
| NoneE (s : string).
Arguments SomeE {X}.
Arguments NoneE {X}.
Some syntactic sugar to make writing nested match-expressions on
optionE more convenient.
Notation "' p <- e1 ;; e2"
:= (match e1 with
| SomeE p ⇒ e2
| NoneE err ⇒ NoneE err
end)
(right associativity, p pattern, at level 60, e1 at next level).
Notation "'TRY' ' p <- e1 ;; e2 'OR' e3"
:= (match e1 with
| SomeE p ⇒ e2
| NoneE _ ⇒ e3
end)
(right associativity, p pattern,
at level 60, e1 at next level, e2 at next level).
:= (match e1 with
| SomeE p ⇒ e2
| NoneE err ⇒ NoneE err
end)
(right associativity, p pattern, at level 60, e1 at next level).
Notation "'TRY' ' p <- e1 ;; e2 'OR' e3"
:= (match e1 with
| SomeE p ⇒ e2
| NoneE _ ⇒ e3
end)
(right associativity, p pattern,
at level 60, e1 at next level, e2 at next level).
Open Scope string_scope.
Definition parser (T : Type) :=
list token → optionE (T * list token).
Fixpoint many_helper {T} (p : parser T) acc steps xs :=
match steps, p xs with
| 0, _ ⇒
NoneE "Too many recursive calls"
| _, NoneE _ ⇒
SomeE ((rev acc), xs)
| S steps', SomeE (t, xs') ⇒
many_helper p (t :: acc) steps' xs'
end.
A (step-indexed) parser that expects zero or more ps:
Fixpoint many {T} (p : parser T) (steps : nat) : parser (list T) :=
many_helper p [] steps.
many_helper p [] steps.
A parser that expects a given token, followed by p:
Definition firstExpect {T} (t : token) (p : parser T)
: parser T :=
fun xs ⇒ match xs with
| x::xs' ⇒
if string_dec x t
then p xs'
else NoneE ("expected '" ++ t ++ "'.")
| [] ⇒
NoneE ("expected '" ++ t ++ "'.")
end.
: parser T :=
fun xs ⇒ match xs with
| x::xs' ⇒
if string_dec x t
then p xs'
else NoneE ("expected '" ++ t ++ "'.")
| [] ⇒
NoneE ("expected '" ++ t ++ "'.")
end.
A parser that expects a particular token:
Definition expect (t : token) : parser unit :=
firstExpect t (fun xs ⇒ SomeE (tt, xs)).
firstExpect t (fun xs ⇒ SomeE (tt, xs)).
Definition parseIdentifier (xs : list token)
: optionE (string * list token) :=
match xs with
| [] ⇒ NoneE "Expected identifier"
| x::xs' ⇒
if forallb isLowerAlpha (list_of_string x) then
SomeE (x, xs')
else
NoneE ("Illegal identifier:'" ++ x ++ "'")
end.
: optionE (string * list token) :=
match xs with
| [] ⇒ NoneE "Expected identifier"
| x::xs' ⇒
if forallb isLowerAlpha (list_of_string x) then
SomeE (x, xs')
else
NoneE ("Illegal identifier:'" ++ x ++ "'")
end.
Numbers:
Definition parseNumber (xs : list token)
: optionE (nat * list token) :=
match xs with
| [] ⇒ NoneE "Expected number"
| x::xs' ⇒
if forallb isDigit (list_of_string x) then
SomeE (fold_left
(fun n d ⇒
10 * n + (nat_of_ascii d -
nat_of_ascii "0"%char))
(list_of_string x)
0,
xs')
else
NoneE "Expected number"
end.
: optionE (nat * list token) :=
match xs with
| [] ⇒ NoneE "Expected number"
| x::xs' ⇒
if forallb isDigit (list_of_string x) then
SomeE (fold_left
(fun n d ⇒
10 * n + (nat_of_ascii d -
nat_of_ascii "0"%char))
(list_of_string x)
0,
xs')
else
NoneE "Expected number"
end.
Parse arithmetic expressions
Fixpoint parsePrimaryExp (steps:nat)
(xs : list token)
: optionE (aexp * list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (i, rest) <- parseIdentifier xs ;;
SomeE (AId i, rest)
OR
TRY ' (n, rest) <- parseNumber xs ;;
SomeE (ANum n, rest)
OR
' (e, rest) <- firstExpect "(" (parseSumExp steps') xs ;;
' (u, rest') <- expect ")" rest ;;
SomeE (e,rest')
end
with parseProductExp (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parsePrimaryExp steps' xs ;;
' (es, rest') <- many (firstExpect "*" (parsePrimaryExp steps'))
steps' rest ;;
SomeE (fold_left AMult es e, rest')
end
with parseSumExp (steps:nat) (xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parseProductExp steps' xs ;;
' (es, rest') <-
many (fun xs ⇒
TRY ' (e,rest') <-
firstExpect "+"
(parseProductExp steps') xs ;;
SomeE ( (true, e), rest')
OR
' (e, rest') <-
firstExpect "-"
(parseProductExp steps') xs ;;
SomeE ( (false, e), rest'))
steps' rest ;;
SomeE (fold_left (fun e0 term ⇒
match term with
| (true, e) ⇒ APlus e0 e
| (false, e) ⇒ AMinus e0 e
end)
es e,
rest')
end.
Definition parseAExp := parseSumExp.
(xs : list token)
: optionE (aexp * list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (i, rest) <- parseIdentifier xs ;;
SomeE (AId i, rest)
OR
TRY ' (n, rest) <- parseNumber xs ;;
SomeE (ANum n, rest)
OR
' (e, rest) <- firstExpect "(" (parseSumExp steps') xs ;;
' (u, rest') <- expect ")" rest ;;
SomeE (e,rest')
end
with parseProductExp (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parsePrimaryExp steps' xs ;;
' (es, rest') <- many (firstExpect "*" (parsePrimaryExp steps'))
steps' rest ;;
SomeE (fold_left AMult es e, rest')
end
with parseSumExp (steps:nat) (xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parseProductExp steps' xs ;;
' (es, rest') <-
many (fun xs ⇒
TRY ' (e,rest') <-
firstExpect "+"
(parseProductExp steps') xs ;;
SomeE ( (true, e), rest')
OR
' (e, rest') <-
firstExpect "-"
(parseProductExp steps') xs ;;
SomeE ( (false, e), rest'))
steps' rest ;;
SomeE (fold_left (fun e0 term ⇒
match term with
| (true, e) ⇒ APlus e0 e
| (false, e) ⇒ AMinus e0 e
end)
es e,
rest')
end.
Definition parseAExp := parseSumExp.
Parsing boolean expressions:
Fixpoint parseAtomicExp (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (u,rest) <- expect "true" xs ;;
SomeE (BTrue,rest)
OR
TRY ' (u,rest) <- expect "false" xs ;;
SomeE (BFalse,rest)
OR
TRY ' (e,rest) <- firstExpect "¬"
(parseAtomicExp steps')
xs ;;
SomeE (BNot e, rest)
OR
TRY ' (e,rest) <- firstExpect "("
(parseConjunctionExp steps')
xs ;;
' (u,rest') <- expect ")" rest ;;
SomeE (e, rest')
OR
' (e, rest) <- parseProductExp steps' xs ;;
TRY ' (e', rest') <- firstExpect "="
(parseAExp steps') rest ;;
SomeE (BEq e e', rest')
OR
TRY ' (e', rest') <- firstExpect "≤"
(parseAExp steps') rest ;;
SomeE (BLe e e', rest')
OR
NoneE "Expected '=' or '≤' after arithmetic expression"
end
with parseConjunctionExp (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parseAtomicExp steps' xs ;;
' (es, rest') <- many (firstExpect "&&"
(parseAtomicExp steps'))
steps' rest ;;
SomeE (fold_left BAnd es e, rest')
end.
Definition parseBExp := parseConjunctionExp.
Check parseConjunctionExp.
Definition testParsing {X : Type}
(p : nat →
list token →
optionE (X * list token))
(s : string) :=
let t := tokenize s in
p 100 t.
(*
Eval compute in
testParsing parseProductExp "x.y.(x.x).x".
Eval compute in
testParsing parseConjunctionExp "~(x=x&&x*x<=(x*x)*x)&&x=x".
*)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (u,rest) <- expect "true" xs ;;
SomeE (BTrue,rest)
OR
TRY ' (u,rest) <- expect "false" xs ;;
SomeE (BFalse,rest)
OR
TRY ' (e,rest) <- firstExpect "¬"
(parseAtomicExp steps')
xs ;;
SomeE (BNot e, rest)
OR
TRY ' (e,rest) <- firstExpect "("
(parseConjunctionExp steps')
xs ;;
' (u,rest') <- expect ")" rest ;;
SomeE (e, rest')
OR
' (e, rest) <- parseProductExp steps' xs ;;
TRY ' (e', rest') <- firstExpect "="
(parseAExp steps') rest ;;
SomeE (BEq e e', rest')
OR
TRY ' (e', rest') <- firstExpect "≤"
(parseAExp steps') rest ;;
SomeE (BLe e e', rest')
OR
NoneE "Expected '=' or '≤' after arithmetic expression"
end
with parseConjunctionExp (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (e, rest) <- parseAtomicExp steps' xs ;;
' (es, rest') <- many (firstExpect "&&"
(parseAtomicExp steps'))
steps' rest ;;
SomeE (fold_left BAnd es e, rest')
end.
Definition parseBExp := parseConjunctionExp.
Check parseConjunctionExp.
Definition testParsing {X : Type}
(p : nat →
list token →
optionE (X * list token))
(s : string) :=
let t := tokenize s in
p 100 t.
(*
Eval compute in
testParsing parseProductExp "x.y.(x.x).x".
Eval compute in
testParsing parseConjunctionExp "~(x=x&&x*x<=(x*x)*x)&&x=x".
*)
Parsing commands:
Fixpoint parseSimpleCommand (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (u, rest) <- expect "SKIP" xs ;;
SomeE (SKIP%imp, rest)
OR
TRY ' (e,rest) <-
firstExpect "TEST"
(parseBExp steps') xs ;;
' (c,rest') <-
firstExpect "THEN"
(parseSequencedCommand steps') rest ;;
' (c',rest'') <-
firstExpect "ELSE"
(parseSequencedCommand steps') rest' ;;
' (tt,rest''') <-
expect "END" rest'' ;;
SomeE(TEST e THEN c ELSE c' FI%imp, rest''')
OR
TRY ' (e,rest) <-
firstExpect "WHILE"
(parseBExp steps') xs ;;
' (c,rest') <-
firstExpect "DO"
(parseSequencedCommand steps') rest ;;
' (u,rest'') <-
expect "END" rest' ;;
SomeE(WHILE e DO c END%imp, rest'')
OR
TRY ' (i, rest) <- parseIdentifier xs ;;
' (e, rest') <- firstExpect "::=" (parseAExp steps') rest ;;
SomeE ((i ::= e)%imp, rest')
OR
NoneE "Expecting a command"
end
with parseSequencedCommand (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (c, rest) <- parseSimpleCommand steps' xs ;;
TRY ' (c', rest') <-
firstExpect ";;"
(parseSequencedCommand steps') rest ;;
SomeE ((c ;; c')%imp, rest')
OR
SomeE (c, rest)
end.
Definition bignumber := 1000.
Definition parse (str : string) : optionE com :=
let tokens := tokenize str in
match parseSequencedCommand bignumber tokens with
| SomeE (c, []) ⇒ SomeE c
| SomeE (_, t::_) ⇒ NoneE ("Trailing tokens remaining: " ++ t)
| NoneE err ⇒ NoneE err
end.
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
TRY ' (u, rest) <- expect "SKIP" xs ;;
SomeE (SKIP%imp, rest)
OR
TRY ' (e,rest) <-
firstExpect "TEST"
(parseBExp steps') xs ;;
' (c,rest') <-
firstExpect "THEN"
(parseSequencedCommand steps') rest ;;
' (c',rest'') <-
firstExpect "ELSE"
(parseSequencedCommand steps') rest' ;;
' (tt,rest''') <-
expect "END" rest'' ;;
SomeE(TEST e THEN c ELSE c' FI%imp, rest''')
OR
TRY ' (e,rest) <-
firstExpect "WHILE"
(parseBExp steps') xs ;;
' (c,rest') <-
firstExpect "DO"
(parseSequencedCommand steps') rest ;;
' (u,rest'') <-
expect "END" rest' ;;
SomeE(WHILE e DO c END%imp, rest'')
OR
TRY ' (i, rest) <- parseIdentifier xs ;;
' (e, rest') <- firstExpect "::=" (parseAExp steps') rest ;;
SomeE ((i ::= e)%imp, rest')
OR
NoneE "Expecting a command"
end
with parseSequencedCommand (steps:nat)
(xs : list token) :=
match steps with
| 0 ⇒ NoneE "Too many recursive calls"
| S steps' ⇒
' (c, rest) <- parseSimpleCommand steps' xs ;;
TRY ' (c', rest') <-
firstExpect ";;"
(parseSequencedCommand steps') rest ;;
SomeE ((c ;; c')%imp, rest')
OR
SomeE (c, rest)
end.
Definition bignumber := 1000.
Definition parse (str : string) : optionE com :=
let tokens := tokenize str in
match parseSequencedCommand bignumber tokens with
| SomeE (c, []) ⇒ SomeE c
| SomeE (_, t::_) ⇒ NoneE ("Trailing tokens remaining: " ++ t)
| NoneE err ⇒ NoneE err
end.
Example eg1 : parse "
TEST x = y + 1 + 2 - y * 6 + 3 THEN
x ::= x * 1;;
y ::= 0
ELSE
SKIP
END "
=
SomeE (
TEST "x" = "y" + 1 + 2 - "y" * 6 + 3 THEN
"x" ::= "x" * 1;;
"y" ::= 0
ELSE
SKIP
FI)%imp.
Proof. cbv. reflexivity. Qed.
Example eg2 : parse "
SKIP;;
z::=x*y*(x*x);;
WHILE x=x DO
TEST (z ≤ z*z) && ~(x = 2) THEN
x ::= z;;
y ::= z
ELSE
SKIP
END;;
SKIP
END;;
x::=z "
=
SomeE (
SKIP;;
"z" ::= "x" * "y" * ("x" * "x");;
WHILE "x" = "x" DO
TEST ("z" ≤ "z" * "z") && ~("x" = 2) THEN
"x" ::= "z";;
"y" ::= "z"
ELSE
SKIP
FI;;
SKIP
END;;
"x" ::= "z")%imp.
Proof. cbv. reflexivity. Qed.