test first

四則演算のパーサを作る際に、テストファーストにするとよいはずです。という事で、テストだけ先に書きます。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let lex = parser
| [< >] -> [<' Number 1.2 >]

let parse = parser
| [< >] -> Num 1.0

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

上記のプログラムをcalc.mlとして保存します。

$ ocamlc -pp camlp4o calc.ml -o calc
$ ./calc

実行すると以下のような結果が得られます。

lex num1 true
lex ops false
lex ops false
parse 1 true
parse 1 + 2 false
parse 1 + 2 + 3 false
parse 1 + 2 * 3 false
parse (1 + 2) * 3 false
parse 1 + 2 / 3 false
parse 1 - 2 / 3 false

後は上からすべてtrueになるように作ればOKです。
実装は以下のように書けます。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('+'|'-'|'*'|'/'|'('|')' as c); stream=lex >] -> 
  [<' Op c; stream >]
| [< >] -> [< >]

and lex_num buf = parser
| [<' ('0'..'9' | '.' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(float_of_string buf); stream >]

let rec parse_prim = parser
| [<' Number n >] -> Num n
| [<' Op '('; e=parse; ' Op ')' >] -> e

and parse_mul stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op ('*' as c))
    | Some(Op ('/' as c)) ->
      Stream.junk stream;
      loop (Bin(lhs, String.make 1 c, parse_prim stream))
    | _ -> lhs
  in
  loop (parse_prim stream)
and parse stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op ('+' as c))
    | Some(Op ('-' as c)) ->
      Stream.junk stream;
      loop (Bin(lhs, String.make 1 c, parse_mul stream))
    | _ -> lhs
  in
  loop (parse_mul stream)

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

結果は以下の通り

lex num1.2 true
lex ops true
lex ops true
parse 1 true
parse 1 + 2 true
parse 1 + 2 + 3 true
parse 1 + 2 * 3 true
parse (1 + 2) * 3 true
parse 1 + 2 / 3 true
parse 1 - 2 / 3 true

この実装は、文字列の結合が遅いので、出来れば
Bufferを使うとより高速になります。