operator precedence

今回は、四則演算のパーサを優先順位法を使って書き換えたバージョンを書きました。
テストは前回と同じだけど、Hashtblを使っています。これがさらっとかけるようになれば、LLVMカレイドスコープのパーサも簡単と言えるようになるでしょう。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('+'|'-'|'*'|'/'|'('|')' as c); stream=lex >] -> 
  [<' Op c; stream >]
| [< >] -> [< >]

and lex_num buf = parser
| [<' ('0'..'9' | '.' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(float_of_string buf); stream >]

let ps:(char, int) Hashtbl.t = Hashtbl.create 10

let () = Hashtbl.add ps '+' 10
let () = Hashtbl.add ps '-' 10
let () = Hashtbl.add ps '*' 20
let () = Hashtbl.add ps '/' 20

let rec parse_prim = parser
| [<' Number n >] -> Num n
| [<' Op '('; e=parse; ' Op ')' >] -> e

and parse_bin p stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op c) when Hashtbl.mem ps c ->
      let np = Hashtbl.find ps c in
      if np > p then (
        Stream.junk stream;
        loop (Bin(lhs, String.make 1 c, parse_bin np stream))
      ) else
        lhs
    | _ -> lhs
  in
  loop (parse_prim stream)
and parse stream = parse_bin 0 stream

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

結果は以下の通り

lex num1.2 true
lex ops true
lex ops true
parse 1 true
parse 1 + 2 true
parse 1 + 2 + 3 true
parse 1 + 2 * 3 true
parse (1 + 2) * 3 true
parse 1 + 2 / 3 true
parse 1 - 2 / 3 true

この実装は、文字列の結合が遅いので、出来れば
Bufferを使うとより高速になります。