Simple, efficient, sound and complete combinator parsing for all context-free grammars, using an oracle
The P3 combinator parsing library
(Talk at OCaml 2014)

Dr Tom Ridge

2014-09-05

Main features

This talk

Example

(* Grammar: E -> E E E | "1" | eps *)
let rec parse_E = (fun i -> (mkntparser "E" (
  (parse_E ***> parse_E ***> parse_E)
  |||| (a "1")
  |||| (a "")))
  i)

What does complete mean?

Example: parse trees

let rec parse_E = (fun i -> mkntparser "E" (
  ((parse_E ***> parse_E ***> parse_E) >>>> (fun (x,(y,z)) -> `Node(x,y,z)))
  |||| ((a "1") >>>> (fun _ -> `LF("1")))
  |||| ((a "") >>>> (fun _ -> `LF(""))))
  i)
Input size|Number of results (good parse trees)
0         |1                                   
1         |1                                   
2         |3  
3         |19
4         |150                                
...
19        |441152315040444150                  

Conceptual distinction: parsing is not applying actions

Example: counting

(* Grammar: E -> E E E | "1" | eps *)
let rec parse_E = (fun i -> (mkntparser "E" (
  ((parse_E ***> parse_E ***> parse_E) >>>> (fun (x,(y,z)) -> x+y+z))
  |||| ((a "1") >>>> (fun _ -> 1))
  |||| ((a "") >>>> (fun _ -> 0))))
  i)
Input size|Number of results
0         |1                                   
1         |1                                   
2         |1                                   
4         |1                                 
...       
19        |1

Example: memoized counting (YCDTWAOP)

let rec parse_E = 
  let tbl_E = MyHashtbl.create 100 in
  (fun i -> memo_p3 tbl_E (mkntparser "E" (
  ((parse_E ***> parse_E ***> parse_E) >>>> (fun (x,(y,z)) -> x+y+z))
  |||| ((a "1") >>>> (fun _ -> 1))
  |||| ((a "") >>>> (fun _ -> 0))))
  i)

Example: disambiguation

E -> E "+" E | E "-" E | ...
<Exp> ::= <Exp> + <Term> | <Exp> - <Term> | <Term>

<Term> ::= <Term> * <Factor> | <Term> / <Factor> | <Factor>

<Factor> ::= x | y | ... | ( <Exp> ) | - <Factor>
%left PLUS MINUS
%left MULTIPLY DIVIDE
%left NEG /* negation -- unary minus */
%right CARET /* exponentiation */

Example: disambiguation (YCDTWAOP)

E -> E "+" E           {{ fun (x,(_,y)) -> (match x,y with 
                          | (Some(Plus _),Some _) -> None (* not (x+y)+z ! *)
                          | (Some x,Some y) -> Some(Plus(x,y)) 
                          | _ -> None) }}

  | E "*" E            {{ fun (x,(_,y)) -> (match x,y with
                          | (Some (Times _),Some _) -> None (* not (x*y)*z ! *)
                          | (Some (Plus _),Some _) -> None (* not (x+y)*z ! *)
                          | (Some x,Some(Plus _)) -> None (* not x*(y+z) ! <-- *)
                          | (Some x,Some y) -> Some(Times(x,y)) 
                          | _ -> None) }}
  | ?num?              {{ fun s -> Some(Num(int_of_string (content s))) }}

Example: modular combination of parsers (YCDTWAOP probably)

(* w is (possibly zero length) whitespace *)
let ( ***> ) x y = (x ***> w ***> y) >>>> (fun (x,(_,y)) -> (x,y)) 

let rec parse_A h = (fun i -> mkntparser "arithexp" (
  (((parse_A h) ***> (a "+") ***> (parse_A h))
        >>>> (fun (e1,(_,e2)) -> `Plus(e1,e2)))
  |||| (((parse_A h) ***> (a "-") ***> (parse_A h))
        >>>> (fun (e1,(_,e2)) -> `Minus(e1,e2)))
  |||| (parse_num 
        >>>> (fun s -> `Num(int_of_string (content s))))
  |||| (((a "(") ***> (parse_A h) ***> (a ")"))  (* brackets *)
        >>>> (fun (_,(e,_)) -> e))
  |||| h)  (* helper parser *)
  i)
let rec parse_L h = (fun i -> mkntparser "lambdaexp" (
  (((a "\\") ***> parse_var ***> (parse_L h))  (* lam *)
        >>>> (fun (_,(x,body)) -> `Lam(x,body)))
  |||| (((parse_L h) ***> (parse_L h))  (* app *)
        >>>> (fun (e1,e2) -> `App(e1,e2)))
  |||| (parse_var >>>> (fun s -> `Var s))  (* var *)
  |||| (((a "(") ***> (parse_L h) ***> (a ")"))  (* brackets *)
        >>>> (fun (_,(e,_)) -> e))
  |||| h)  (* helper parser *)
  i)
let parse_U = (
  let rec l i = parse_L h i 
  and a i = parse_A h i 
  and b i = parse_B h i
  and h i = (l |||| a |||| b) i
  in
  l)
let parse_and_eval txt = (remove_err (
  p3_run_parser 
    ((parse_memo_U ()) >>>> eval empty_env)
    txt))


let y = "(\\ f ((\\ x (f (x x))) (\\ x (f (x x)))))"
(* sigma; let rec sigma x = if x < 2 then 1 else x+(sigma (x-1)) *)
let sigma = "(\\ g (\\ x (if (x < 2) then 1 else (x+(g (x-1))))))"
(* following is a lambda calc version of the sigma function, applied to argument 5 *)
let txt = "("^y^" "^sigma^") 5"
let [r] = parse_and_eval txt

Performance

E_EEE: E -> E E E | "1" | ""

Grammar|Input size|Happy/s|P3/s
E_EEE  |020       |0.14   |0.10
E_EEE  |040       |6.87   |0.13
E_EEE  |100       |2535.88|0.50

Conclusion