Yet Another LLVM IR Compiler to X86_64

https://bitbucket.org/h_sakurai/yallc/src

llcをmlで書いてみました。
LLVMには依存してなくて、パーサとコンパイラがあるだけです。
出来る事は、四則演算しかないんですけど、これを拡張していけば、
バッグエンドの仕組みが分かるはず。

現状は、スタックマシンのJIT結果を吐き出したような形になってますけど、
レジスタアロケーションすればもっと速くなるでしょう。
例えば、mincamlのemitが分かればきっと簡単なはずです。

ScalaのArrayとArrayBuffer

SwiftのArrayがおかしいって話があったので、たぶん、Scalaなら美しいだろうと思って調べてみました。

package s

object main extends App {
  val a = Array(1,2,3)
  var b = a
  a(1) = 33
  b(0) = 55
  b = b :+ 2
  var b2 = b :+ 2
  println(a.mkString(" . "))
  println(b.mkString(" . "))
  println(b2.mkString(" . "))

  val c = new scala.collection.mutable.ArrayBuffer[Int](1)
  c += 1
  var d = c
  d += 2
  println(c)
  println(d)
}

結果はこんな感じ。

55 . 33 . 3
55 . 33 . 3 . 2
55 . 33 . 3 . 2 . 2
ArrayBuffer(1, 2)
ArrayBuffer(1, 2)

ScalaのArrayはimmutableな変数でも値を代入出来ます。
で、追加する時は :+を使って追加しますが、元の配列そのものは変わりません。
なので、中身が変わったおかしいってことはない。
ArrayBufferは完全に中身を共有しているので、
サイズが1としてあって、内部でリアロケーションが起こっても、共有され続ける。
どちらも、訳の分からない事にはならないので、うまく出来てます。

import  core.stdc.stdio;
void main() {
  immutable int[] a = [1];
  int[] b = cast(int[])a;
  b[0] = 222;
  b ~= 333;
  printf("%d %d %d %d\n", a[0], b[0], a.length, b.length);
}

結果

222 222 1 2

DMD64 D Compiler v2.060だと、bの長さが変わると、中身もかわるかもしれないが、配列への追加は必ずコピーを行うとは限りません。と仕様にある。
immutableなリストは一部書き換えは出来ないので、変わらないけど、bを書き換えるとaも変わる。bの長さが変わるとリアロケーションされて、書き換えてもaは変わらない。
仕様もそうなっている。

OCamlの配列も書き換え可能だ。

MLやScalaでは配列は書き換え可能だ。immutableなのは変数そのもので、中身のことではない。
Dのようなネイティブよりの言語はimmutableなリストがないので、immutableな配列は中身も書き換えられないとしているのではあるけど、castして無理矢理取り出せば書き換えられる。という事なのでした。

内部の実装を考えると、immutableってSSA最適化をする場合の書き換えられないレジスタ相当で、書き換え可能名変数はload,storeをする変数と考えるのが実装がシンプルでかつ、最適化するにもやりやすい。

配列の中身を書き換えられないようにする必要性ってあまりないのかなと。配列を使うという事は高速性を求めているということであるし、immutableなデータとしては、ListかMapか、Setを使えば大体事足りるはずだ。

最適化まで考えて仕様を考えれば結構普通なんじゃないのかなとそんな風に思う。

ArrayBufferは良さそうと思うかもしれないけども、ポインタのポインタと2段階のアクセスをするのは高速さを求める場合は若干だが遅くなる。だけど、実際のアロケートされている配列のサイズは大きく取っておけるので、リアロケーションはたまにしか起こらないから速い。追加操作があると便利だという事もあるのだけど、普通つけない機能だろ。っていう感じだ。

OCamlの型検査の働きもしくは多相性とガベージコレクションが共通して持つ物

結局、次に作成するコンパイラはCamlP4は使わずにocamlyaccとocamllexでMinCamlに近い形で作る事にしました。
そして、テストファーストで四則演算、それ以外の関係演算子や比較演算子、論理演算子等を加え、LLVMに出力し、参照を導入して、mutableな変数のように扱えるようにして、単純な型推論まで入れました。次は多相性を入れたい。
以下のサイトを参考に、LLVMのコードを出力するものを作成してみました。

http://okmij.org/ftp/ML/generalization.html

Scalaで書いた物が大分前に動いていたので、OCamlに移植すれば動いたわけです。

さてこのノウハウをベースに多相型の型推論を導入しようと型エラーは無い状態までプログラムを作成したのですが、うまく動きません。
大体は分かるのです。でも、しっかりと動作を理解してないので、どう修正したら良いのか、よくわかりません。ドキュメントは英語ならあるので、翻訳すれば良いはずです。

そこでしっかり把握するためにOCaml型推論の解説を動作を理解出来るように翻訳してみています。

https://github.com/hsk/generalization/wiki

多相型の型推論を自由に扱えれば、型パラメータ書かなくてもいいテンプレートみたいなものが作れるのではと期待している訳です。作れないなら作れないと断言出来るはずなので、精進あるのみです。

後置、前置、cast

とりあえず、三項演算子以外のCの式のほとんどをカバーする所まで作ってみました。
プリプロセッサとして、Lisp的なマクロを行う場合はこの方式は良さそうだけど、今はユーザー定義の演算子は必要ないのでこの方式は一度、ここで止める事にしました。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of int
| Op of string
| Symbol of string

type e =
| Num of int
| Id of string
| Unary of string * e
| Post of e * string
| Bin of e * string * e
| Void

exception Error of string

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('a'..'z'|'A'..'Z'|'_' as c); stream >] -> 
  lex_str (String.make 1 c) stream
| [<' ('('|')'|'['|']'|'{'|'}'|','|';' as c); stream=lex >] -> 
  [<' Op (String.make 1 c); stream >]
| [<' ('*' as c); stream >] ->
  lex_op2 (String.make 1 c) stream
| [<' ('&' as c); stream >] ->
  lex_op3 (String.make 1 c) stream
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'.'|'~' as c); stream >] ->
  lex_op (String.make 1 c) stream
| [< >] -> [< >]
and lex_op buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'.'|'~'|'*'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op2 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'.'|'~' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op3 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'.'|'~'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]

and lex_num buf = parser
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(int_of_string buf); stream >]

and lex_str buf = parser
| [<' ('0'..'9'|'a'..'z'|'A'..'Z'|'_' as c); stream >] -> 
  lex_str (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  (match buf with
  | "sizeof" | "alignof" | "cast" ->[<' Op(buf); stream >]
  | _ -> [<' Symbol(buf); stream >])

let ps:(string,(int*string*char)) Hashtbl.t = Hashtbl.create 10
let us:(string,(int*string*char)) Hashtbl.t = Hashtbl.create 10
let pends:(string,string) Hashtbl.t = Hashtbl.create 3

let _ =
  Hashtbl.add ps "*" (11,"mul",'l');
  Hashtbl.add ps "/" (11,"div",'l');
  Hashtbl.add ps "%" (11,"mod",'l');
  Hashtbl.add ps "+" (10,"add",'l');
  Hashtbl.add ps "-" (10,"sub",'l');
  Hashtbl.add ps "<<" (9,"shl",'l');
  Hashtbl.add ps ">>" (9,"shr",'l');
  Hashtbl.add ps "<" (8,"lt",'l');
  Hashtbl.add ps ">" (8,"gt",'l');
  Hashtbl.add ps "<=" (8,"le",'l');
  Hashtbl.add ps ">=" (8,"ge",'l');
  Hashtbl.add ps "==" (7,"eq",'l');
  Hashtbl.add ps "!=" (7,"ne",'l');
  Hashtbl.add ps "&" (6,"and",'l');
  Hashtbl.add ps "^" (5,"xor",'l');
  Hashtbl.add ps "|" (4,"or",'l');
  Hashtbl.add ps "&&" (3,"land",'l');
  Hashtbl.add ps "||" (2,"lor",'l');
  Hashtbl.add ps "=" (1,"=",'r');
  Hashtbl.add ps "*=" (1,"*=",'r');
  Hashtbl.add ps "/=" (1,"/=",'r');
  Hashtbl.add ps "%=" (1,"%=",'r');
  Hashtbl.add ps "+=" (1,"+=",'r');
  Hashtbl.add ps "-=" (1,"-=",'r');
  Hashtbl.add ps "<<=" (1,"<<=",'r');
  Hashtbl.add ps ">>=" (1,">>=",'r');
  Hashtbl.add ps "&=" (1,"&=",'r');
  Hashtbl.add ps "^=" (1,"^=",'r');
  Hashtbl.add ps "|=" (1,"|=",'r');
  Hashtbl.add ps "," (0,",",'r');

  Hashtbl.add ps "++" (13,"++",'p');
  Hashtbl.add ps "--" (13,"--",'p');
  Hashtbl.add ps "." (13,".",'l');
  Hashtbl.add ps "->" (13,"->",'l');
  Hashtbl.add ps "(" (13,")",'m');
  Hashtbl.add ps "[" (13,"]",'m');
  Hashtbl.add ps "{" (13,"}",'m');

  Hashtbl.add us "+" (12,"add",'u');
  Hashtbl.add us "-" (12,"sub",'u');
  Hashtbl.add us "++" (12,"++",'u');
  Hashtbl.add us "--" (12,"--",'u');
  Hashtbl.add us "&" (12,"and",'u');
  Hashtbl.add us "*" (12,"mul",'u');
  Hashtbl.add us "~" (12,"xor",'u');
  Hashtbl.add us "!" (12,"eq",'u');
  Hashtbl.add us "sizeof" (12,"sizeof",'u');
  Hashtbl.add us "alignof" (12,"alignof",'u');
  Hashtbl.add us "cast" (12,"(",'s');
  Hashtbl.add pends "(" ")";
  Hashtbl.add pends "[" "]";
  Hashtbl.add pends "{" "}";
  ()

let rec parse stream =
  let rec parse_prim = parser
    | [<' Number n >] -> Num n
    | [<' Symbol n >] -> Id n
    | [<' Op "("; e=parse; ' Op ")" >] -> e
  in
  let rec parse_bin p =
    let rec loop lhs =
      match Stream.peek stream with
      | Some(Op c) when Hashtbl.mem ps c ->
        (match Hashtbl.find ps c with
        | (np,op,'l') ->
          if p < np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | (np,op,'r') ->
          if p <= np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | (np,op,'p') ->
          if p <= np then (
            Stream.junk stream;
            loop (Post(lhs, op))
          ) else lhs
        | (np,op,'m') ->
          if p <= np then (
            Stream.junk stream;
            let e =
              match Stream.peek stream with
              | Some(Op eop) when eop = op -> Void
              | _ -> parse_bin 0 in
            (match Stream.peek stream with
            | Some(Op eop) 
              when eop = op ->
              Stream.junk stream;
              loop (Bin(lhs, c^op, e))
            | _ -> raise (Error "error")
            )
          ) else lhs
        | _ -> lhs
        )
      | _ -> lhs
    in 
    match Stream.peek stream with
    | Some(Op c) when Hashtbl.mem us c ->
        (match Hashtbl.find us c with
        | (np,op,'u') ->
          Stream.junk stream;
          loop (Unary(op, parse_bin np))
        | (np,op,'s') ->
          Stream.junk stream;
          (match Stream.peek stream with
          | Some(Op c2) when c2 = op ->
            Stream.junk stream;
            let e =
              match Stream.peek stream with
              | Some(Op eop) when eop = Hashtbl.find pends op -> Void
              | _ -> parse_bin 0 in
            (match Stream.peek stream with
            | Some(Op eop) 
              when eop = Hashtbl.find pends op ->
              Stream.junk stream;
              loop (Bin(e, c, parse_bin np))
            | _ -> raise (Error "error")
            )

          | _ -> raise (Error "error")
          )
        | _ -> loop (parse_prim stream)
        )
    | _ -> loop (parse_prim stream)
  in parse_bin 0


let rec list_of_stream = parser
| [<' a; stream>] -> a::list_of_stream stream
| [< >] -> []

let testcount = ref 0
let ngcount = ref 0
let check = ref true

let check_point str =
  if (!ngcount > 0 && !check) then (
    fprintf std_formatter "let's clear stage %s....@." str;
    check := false
  ) else ()

let test_ f (s,e) =
  try
    testcount := !testcount + 1;
    if (f(lex (Stream.of_string s)) = e) then () (*print_string ("ok "^s^"\n")*)
    else (
      ngcount := !ngcount + 1;
      if !check then
        fprintf std_formatter ("error %s@.") s
      else ()
    )
  with
  | _ ->
    ngcount := !ngcount + 1;
    if !check then
      fprintf std_formatter ("parse error %s@.") s
    else ()

let testlex(s, e) = test_ list_of_stream (s, e)
let test(s, e) = test_ parse (s, e)

let _ =
  testlex("10", [ Number 10 ]);
  testlex("+ **", [ Op "+"; Op "*"; Op "*" ]);
  testlex("+++", [ Op "+++" ]);
  testlex("*+*", [ Op "*+*" ]);
  testlex("*&", [ Op "*";Op "&" ]);
  testlex("&*", [ Op "&";Op "*" ]);
  testlex("&&", [ Op "&&" ]);
  testlex("&+*", [ Op "&+*" ]);
  testlex("1 + 2 * 3",
    [ Number 1; Op "+"; Number 2; Op "*"; Number 3 ]);
  testlex("s 1", [Symbol "s"; Number(1)]); (* 15. ESizeof *)
  testlex("sa 1", [Symbol "sa"; Number(1)]); (* 15. ESizeof *)
  check_point "lex";

  test("1",Num 1);
  test("1+2", Bin(Num 1, "add", Num 2));
  test("1 + 2 + 3", Bin(Bin(Num 1, "add", Num 2), "add", Num 3));
  test("1 + 2 * 3", Bin(Num 1, "add", Bin(Num 2, "mul", Num 3)));
  test("(1 + 2) * 3", Bin(Bin(Num 1, "add", Num 2), "mul", Num 3));
  test("1 + 2 / 3", Bin(Num 1, "add", Bin(Num 2, "div", Num 3)));
  test("1 - 2 / 3", Bin(Num 1, "sub", Bin(Num 2, "div", Num 3)));
  check_point "four arithmetic operations";

(* mul *)
  (* simple test *)
  test("1*2", Bin(Num(1),"mul",Num(2))); (* 20. EBin("mul") *)
  test("1/2", Bin(Num(1),"div",Num(2))); (* 21. EBin("div") *)
  test("1%2", Bin(Num(1),"mod",Num(2))); (* 21. EBin("div") *)
  (* associative test *)
  test("1*2*3", Bin(Bin(Num(1),"mul",Num(2)),"mul", Num(3)));
  test("1/2/3", Bin(Bin(Num(1),"div",Num(2)),"div", Num(3)));
  test("1%2%3", Bin(Bin(Num(1),"mod",Num(2)),"mod", Num(3)));

  check_point "mul";

(* add *)
  (* simple test *)
  test("1+2", Bin(Num(1),"add",Num(2))); (* 18. EBin("add") *)
  test("1-2", Bin(Num(1),"sub",Num(2))); (* 19. EBin("sub") *)
  (* associative test *)
  test("1+2+3", Bin(Bin(Num(1),"add",Num(2)),"add", Num(3)));
  test("1-2-3", Bin(Bin(Num(1),"sub",Num(2)),"sub", Num(3)));
  (* mul & add *)
  test("1+2*3", Bin(Num(1),"add",Bin(Num(2),"mul", Num(3)))); 
  test("1-2*3", Bin(Num(1),"sub",Bin(Num(2),"mul", Num(3))));
  test("1-2/3", Bin(Num(1),"sub",Bin(Num(2),"div", Num(3))));

  check_point "add";

(* shift *)
  (* simple test *)
  test("1<<2", Bin(Num(1),"shl",Num(2)));  (* 22. EBin("shl") *)
  test("1>>2", Bin(Num(1),"shr",Num(2)));  (* 23. EBin("shr") *)
  (* associative test *)
  test("1<<2<<3", Bin(Bin(Num(1),"shl",Num(2)),"shl", Num(3)));
  test("1>>2>>3", Bin(Bin(Num(1),"shr",Num(2)),"shr", Num(3)));
  (* add & shift *)
  test("1<<2+3", Bin(Num(1),"shl",Bin(Num(2),"add", Num(3)))); 
  test("1<<2-3", Bin(Num(1),"shl",Bin(Num(2),"sub", Num(3)))); 
  test("1>>2-3", Bin(Num(1),"shr",Bin(Num(2),"sub", Num(3)))); 

  check_point "shift";

(* relation *)
  (* simple test *)
  test("1<2", Bin(Num(1),"lt",Num(2))); (* 24. EBin("lt") *)
  test("1>2", Bin(Num(1),"gt",Num(2))); (* 25. EBin("gt") *)
  test("1<=2", Bin(Num(1),"le",Num(2)));  (* 26. EBin("le") *)
  test("1>=2", Bin(Num(1),"ge",Num(2))); (* 27. EBin("le") *)
  (* associative test *)
  test("1<2<3", Bin(Bin(Num(1),"lt",Num(2)),"lt", Num(3)));
  test("1>2>3", Bin(Bin(Num(1),"gt",Num(2)),"gt", Num(3)));
  test("1<=2<=3", Bin(Bin(Num(1),"le",Num(2)),"le", Num(3)));
  test("1>=2>=3", Bin(Bin(Num(1),"ge",Num(2)),"ge", Num(3)));
  (* shift & relation *)
  test("1<2<<3", Bin(Num(1),"lt",Bin(Num(2),"shl", Num(3)))); 
  test("1<2>>3", Bin(Num(1),"lt",Bin(Num(2),"shr", Num(3)))); 
  test("1>2>>3", Bin(Num(1),"gt",Bin(Num(2),"shr", Num(3)))); 
  test("1<=2>>3", Bin(Num(1),"le",Bin(Num(2),"shr", Num(3)))); 
  test("1>=2>>3", Bin(Num(1),"ge",Bin(Num(2),"shr", Num(3)))); 

  check_point "relation";

(* eq *)
  (* simple test *)
  test("1==2", Bin(Num(1),"eq",Num(2)));  (* 28. EBin("eq") *)
  test("1!=2", Bin(Num(1),"ne",Num(2))); (* 29. EBin("ne") *)
  (* associative test *)
  test("1==2==3", Bin(Bin(Num(1),"eq",Num(2)),"eq", Num(3)));
  test("1!=2!=3", Bin(Bin(Num(1),"ne",Num(2)),"ne", Num(3)));
  (* relation & eq *)
  test("1==2<3", Bin(Num(1),"eq",Bin(Num(2),"lt", Num(3)))); 
  test("1==2>3", Bin(Num(1),"eq",Bin(Num(2),"gt", Num(3)))); 
  test("1==2<=3", Bin(Num(1),"eq",Bin(Num(2),"le", Num(3)))); 
  test("1==2>=3", Bin(Num(1),"eq",Bin(Num(2),"ge", Num(3)))); 
  test("1!=2>=3", Bin(Num(1),"ne",Bin(Num(2),"ge", Num(3)))); 

  check_point "eq";

(* iand *)
  (* simple test *)
  test("1&2", Bin(Num(1),"and",Num(2))); (* 30. EBin("and") *)
  (* associative test *)
  test("1&2&3", Bin(Bin(Num(1),"and",Num(2)),"and", Num(3)));
  (* eq & iand *)
  test("1&2==3", Bin(Num(1),"and",Bin(Num(2),"eq", Num(3)))); 
  test("1&2!=3", Bin(Num(1),"and",Bin(Num(2),"ne", Num(3)))); 

(* xor *)
  (* simple test *)
  test("1^2", Bin(Num(1),"xor",Num(2))); (* 31. EBin("xor") *)
  (* associative test *)
  test("1^2^3", Bin(Bin(Num(1),"xor",Num(2)),"xor", Num(3)));
  (* xor & ior *)
  test("1|2^3", Bin(Num(1),"or",Bin(Num(2),"xor", Num(3)))); 

(* ior *)
  (* simple test *)
  test("1|2", Bin(Num(1),"or",Num(2))); (* 32. EBin("or") *)
  (* associative test *)
  test("1|2|3", Bin(Bin(Num(1),"or",Num(2)),"or", Num(3)));

(* land *)
  (* simple test *)
  test("1&&2", Bin(Num(1),"land",Num(2))); (* 33. EBin(EBin("ne",0) "and" EBin("ne",0)) *)
  (* associative test *)
  test("1&&2&&3", Bin(Bin(Num(1),"land",Num(2)),"land", Num(3)));
  (* ior & land *)
  test("1&&2|3", Bin(Num(1),"land",Bin(Num(2),"or", Num(3)))); 

(* lor *)
  (* simple test *)
  test("1||2", Bin(Num(1),"lor",Num(2))); (* 34. EBin(EBin("ne",0) "or" EBin("ne",0)) *)
  (* associative test *)
  test("1||2||3", Bin(Bin(Num(1),"lor",Num(2)),"lor", Num(3)));
  (* land & lor *)
  test("1||2&&3", Bin(Num(1),"lor",Bin(Num(2),"land", Num(3)))); 

  check_point "and or xor";

(* assign *)
  (* simple test *)
  test("1=2", Bin(Num(1),"=",Num(2))); (* 36. EAssign() *)
  test("1*=2", Bin(Num(1),"*=",Num(2))); (* 37. EAssign(EBin("mul")) *)
  test("1/=2", Bin(Num(1),"/=",Num(2)));(* 38. EAssign(EBin("div")) *)
  test("1%=2", Bin(Num(1),"%=",Num(2)));(* 39. EAssign(EBin("mod")) *)
  test("1+=2", Bin(Num(1),"+=",Num(2)));(* 40. EAssign(EBin("add")) *)
  test("1-=2", Bin(Num(1),"-=",Num(2)));(* 41. EAssign(EBin("sub")) *)
  test("1&=2", Bin(Num(1),"&=",Num(2)));(* 42. EAssign(EBin("iand")) *)
  test("1<<=2", Bin(Num(1),"<<=",Num(2)));(* 43. EAssign(EBin("shl")) *)
  test("1>>=2", Bin(Num(1),">>=",Num(2)));(* 44. EAssign(EBin("shr")) *)
  test("1^=2", Bin(Num(1),"^=",Num(2)));(* 45. EAssign(EBin("xor")) *)
  test("1|=2", Bin(Num(1),"|=",Num(2)));(* 46. EAssign(EBin("ior")) *)
  (* associative test *)
  test("0=1=2", Bin(Num(0),"=",Bin(Num(1),"=",Num(2)))); (* 36. EAssign() *)
  test("0=1*=2", Bin(Num(0),"=",Bin(Num(1),"*=",Num(2)))); (* 37. EAssign(EBin("mul")) *)
  test("0=1/=2", Bin(Num(0),"=",Bin(Num(1),"/=",Num(2))));(* 38. EAssign(EBin("div")) *)
  test("0=1%=2", Bin(Num(0),"=",Bin(Num(1),"%=",Num(2))));(* 39. EAssign(EBin("mod")) *)
  test("0=1+=2", Bin(Num(0),"=",Bin(Num(1),"+=",Num(2))));(* 40. EAssign(EBin("add")) *)
  test("0=1-=2", Bin(Num(0),"=",Bin(Num(1),"-=",Num(2))));(* 41. EAssign(EBin("sub")) *)
  test("0=1&=2", Bin(Num(0),"=",Bin(Num(1),"&=",Num(2))));(* 42. EAssign(EBin("iand")) *)
  test("0=1<<=2", Bin(Num(0),"=",Bin(Num(1),"<<=",Num(2))));(* 43. EAssign(EBin("shl")) *)
  test("0=1>>=2", Bin(Num(0),"=",Bin(Num(1),">>=",Num(2))));(* 44. EAssign(EBin("shr")) *)
  test("0=1^=2", Bin(Num(0),"=",Bin(Num(1),"^=",Num(2))));(* 45. EAssign(EBin("xor")) *)
  test("0=1|=2", Bin(Num(0),"=",Bin(Num(1),"|=",Num(2))));(* 46. EAssign(EBin("ior")) *)

(* post *)
  (* simple test *)
  test("a[1]",Bin(Id("a"),"[]",Num(1))); (* 1. EArray *)
  test("a()",Bin(Id("a"),"()",Void)); (* 2. ECall *)
  test("a(1)",Bin(Id("a"),"()",Num(1))); 
  test("a(1,2)",Bin(Id("a"),"()",Bin(Num(1),",",Num(2))));
  test("a.b",Bin(Id("a"),".",Id("b"))); (* 3. EField *)
  test("a->b",Bin(Id("a"),"->",Id("b"))); (* 4. EField *)
  test("a++",Post(Id("a"),"++")); (* 5. EPost("++") *)
  test("a--",Post(Id("a"),"--")); (* 6. EPost("--") *)

  (* associative test *)
  test("a[1][2]", Bin(Bin(Id("a"),"[]",Num(1)),"[]",Num(2)));
  test("a()()", Bin(Bin(Id("a"),"()",Void),"()",Void));
  test("a.b.c", Bin(Bin(Id("a"),".",Id("b")),".",Id("c")));
  test("a->b->c", Bin(Bin(Id("a"),"->",Id("b")),"->",Id("c")));
  test("a++ ++", Post(Post(Id("a"),"++"),"++"));
  test("a-- --", Post(Post(Id("a"),"--"),"--"));

  (* priority test *)
  check_point "post";

(* unary *)
  (* simple test *)
  test("++1", Unary("++",Num(1))); (* 7. EAssign(EBin("add", EInt(TNum(64),1))) *)
  test("--1", Unary("--",Num(1))); (* 8. EPre("sub", EInt(TNum(64),1)) *)
  test("&1", Unary("and",Num(1)));   (* 9. ERef *)
  test("*1", Unary("mul",Num(1)));   (* 10. EPtr *)
  test("+1", Unary("add",Num(1)));   (* 11. -- *)
  test("-1", Unary("sub",Num(1)));   (* 12. EBin("sub",EInt(TNum(64),0)) *)
  test("~1", Unary("xor",Num(1)));   (* 13. EBin(-1,"xor") *)
  test("!1", Unary("eq",Num(1)));   (* 14. EBin("eq",EInt(TNum(64),0)) *)
  test("sizeof 1", Unary("sizeof",Num(1))); (* 15. ESizeof *)
  test("alignof 1", Unary("alignof",Num(1))); (* 16. EPre("alignof") *)


  (* associative test *)
  test("++ ++1", Unary("++",Unary("++",Num(1))));
  test("-- --1", Unary("--",Unary("--",Num(1))));
  test("& &1", Unary("and",Unary("and",Num(1))));
  test("* *1", Unary("mul",Unary("mul",Num(1))));
  test("+ +1", Unary("add",Unary("add",Num(1))));
  test("- -1", Unary("sub",Unary("sub",Num(1))));
  test("~ ~1", Unary("xor",Unary("xor",Num(1))));
  test("! !1", Unary("eq",Unary("eq",Num(1))));
  test("sizeof sizeof 1", Unary("sizeof",Unary("sizeof",Num(1))));
  test("alignof alignof 1", Unary("alignof",Unary("alignof",Num(1))));

  (* priority test *)
  (* post & unary *)
  test("++a[1]", Unary("++",Bin(Id("a"),"[]",Num(1))));
  test("++a(1)", Unary("++",Bin(Id("a"),"()",Num(1))));
  test("++a.b", Unary("++",Bin(Id("a"),".",Id("b"))));
  test("++a->b", Unary("++",Bin(Id("a"),"->",Id("b"))));
  test("++a++", Unary("++",Post(Id("a"),"++")));
  test("++a--", Unary("++",Post(Id("a"),"--")));
  test("--a--", Unary("--",Post(Id("a"),"--")));
  test("&a--", Unary("and",Post(Id("a"),"--")));
  test("*a--", Unary("mul",Post(Id("a"),"--")));
  test("+a--", Unary("add",Post(Id("a"),"--")));
  test("-a--", Unary("sub",Post(Id("a"),"--")));
  test("~a--", Unary("xor",Post(Id("a"),"--")));
  test("!a--", Unary("eq",Post(Id("a"),"--")));
  test("sizeof a--", Unary("sizeof",Post(Id("a"),"--")));
  test("alignof a--", Unary("alignof",Post(Id("a"),"--")));

  check_point "unary";

(* cast *)
  (* simple test *)
  test("cast(byte)2", Bin(Id("byte"),"cast",Num(2))); (* 17. ECast *)
  (* associative test *)
  test("cast(int)cast(byte)2", Bin(Id("int"),"cast",Bin(Id("byte"),"cast", Num(2))));
  (* unary & cast *)
  test("cast(int)++1", Bin(Id("int"),"cast",Unary("++",Num(1))));
  test("cast(int)--1", Bin(Id("int"),"cast",Unary("--",Num(1))));
  test("cast(int)&1", Bin(Id("int"),"cast",Unary("and",Num(1))));
  test("cast(int)*1", Bin(Id("int"),"cast",Unary("mul",Num(1))));
  test("cast(int)+1", Bin(Id("int"),"cast",Unary("add",Num(1))));
  test("cast(int)-1", Bin(Id("int"),"cast",Unary("sub",Num(1))));
  test("cast(int)~1", Bin(Id("int"),"cast",Unary("xor",Num(1))));
  test("cast(int)!1", Bin(Id("int"),"cast",Unary("eq",Num(1))));
  test("cast(int)sizeof 1", Bin(Id("int"),"cast",Unary("sizeof",Num(1))));
  test("cast(int)alignof 1", Bin(Id("int"),"cast",Unary("alignof",Num(1))));

  (*test_error("++cast(int)1"); 
  test_error("--cast(int)1"); *)
  test("++cast(int)1", Unary("++",Bin(Id("int"),"cast",Num(1))));
  test("--cast(int)1", Unary("--",Bin(Id("int"),"cast",Num(1))));
  test("&cast(int)1", Unary("and",Bin(Id("int"),"cast",Num(1))));
  test("*cast(int)1", Unary("mul",Bin(Id("int"),"cast",Num(1))));
  test("+cast(int)1", Unary("add",Bin(Id("int"),"cast",Num(1))));
  test("-cast(int)1", Unary("sub",Bin(Id("int"),"cast",Num(1))));
  test("~cast(int)1", Unary("xor",Bin(Id("int"),"cast",Num(1))));
  test("!cast(int)1", Unary("eq",Bin(Id("int"),"cast",Num(1))));
  test("sizeof cast(int)1", Unary("sizeof",Bin(Id("int"),"cast",Num(1))));
  test("alignof cast(int)1", Unary("alignof",Bin(Id("int"),"cast",Num(1))));
(*  test_error("sizeof cast(int)1"); 
  test_error("alignof cast(int)1"); *)

  check_point "cast";

(* mul *)
  (* simple test *)
  test("1*2", Bin(Num(1),"mul",Num(2))); (* 20. EBin("mul") *)
  test("1/2", Bin(Num(1),"div",Num(2))); (* 21. EBin("div") *)
  (* associative test *)
  test("1*2*3", Bin(Bin(Num(1),"mul",Num(2)),"mul", Num(3)));
  test("1/2/3", Bin(Bin(Num(1),"div",Num(2)),"div", Num(3)));
  (* cast & mul *)
  test("1*cast(byte)2", Bin(Num(1),"mul",Bin(Id("byte"),"cast", Num(2))));
  test("1/cast(byte)2", Bin(Num(1),"div",Bin(Id("byte"),"cast", Num(2))));
  (* post & mul *)
  test("1*2[3]", Bin(Num(1),"mul",Bin(Num(2),"[]", Num(3))));
  test("1/2[3]", Bin(Num(1),"div",Bin(Num(2),"[]", Num(3))));
  test("1%2[3]", Bin(Num(1),"mod",Bin(Num(2),"[]", Num(3))));
  test("1%2()", Bin(Num(1),"mod",Bin(Num(2),"()", Void)));
  test("1%2(3)", Bin(Num(1),"mod",Bin(Num(2),"()", Num(3))));
  test("1%2.a", Bin(Num(1),"mod",Bin(Num(2),".", Id("a"))));
  test("1%2->a", Bin(Num(1),"mod",Bin(Num(2),"->", Id("a"))));
  test("1%2++", Bin(Num(1),"mod",Post(Num(2),"++")));
  test("1%2--", Bin(Num(1),"mod",Post(Num(2),"--")));

  fprintf std_formatter "test ok:%d ng:%d all:%d@.@?" (!testcount- !ngcount) !ngcount !testcount

様々な演算子

演算子順位法を使って、C言語の2項演算子をパース出来るようにしました。
&と*はポインタ演算に使うので特殊に扱いますが、基本的に記号の連続は1つの演算子として字句解析するようにしました。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of int
| Op of string

type e =
| Num of int
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('('|')'|'['|']'|'{'|'}' as c); stream=lex >] -> 
  [<' Op (String.make 1 c); stream >]
| [<' ('*' as c); stream >] ->
  lex_op2 (String.make 1 c) stream
| [<' ('&' as c); stream >] ->
  lex_op3 (String.make 1 c) stream
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|' as c); stream >] ->
  lex_op (String.make 1 c) stream
| [< >] -> [< >]
and lex_op buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'*'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op2 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op3 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]

and lex_num buf = parser
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(int_of_string buf); stream >]

let ps:(string,(int*string*char)) Hashtbl.t = Hashtbl.create 10
let _ =
  Hashtbl.add ps "*" (11,"mul",'l');
  Hashtbl.add ps "/" (11,"div",'l');
  Hashtbl.add ps "%" (11,"mod",'l');
  Hashtbl.add ps "+" (10,"add",'l');
  Hashtbl.add ps "-" (10,"sub",'l');
  Hashtbl.add ps "<<" (9,"shl",'l');
  Hashtbl.add ps ">>" (9,"shr",'l');
  Hashtbl.add ps "<" (8,"lt",'l');
  Hashtbl.add ps ">" (8,"gt",'l');
  Hashtbl.add ps "<=" (8,"le",'l');
  Hashtbl.add ps ">=" (8,"ge",'l');
  Hashtbl.add ps "==" (7,"eq",'l');
  Hashtbl.add ps "!=" (7,"ne",'l');
  Hashtbl.add ps "&" (6,"and",'l');
  Hashtbl.add ps "^" (5,"xor",'l');
  Hashtbl.add ps "|" (4,"or",'l');
  Hashtbl.add ps "&&" (3,"land",'l');
  Hashtbl.add ps "||" (2,"lor",'l');
  Hashtbl.add ps "=" (1,"=",'r');
  Hashtbl.add ps "*=" (1,"*=",'r');
  Hashtbl.add ps "/=" (1,"/=",'r');
  Hashtbl.add ps "%=" (1,"%=",'r');
  Hashtbl.add ps "+=" (1,"+=",'r');
  Hashtbl.add ps "-=" (1,"-=",'r');
  Hashtbl.add ps "<<=" (1,"<<=",'r');
  Hashtbl.add ps ">>=" (1,">>=",'r');
  Hashtbl.add ps "&=" (1,"&=",'r');
  Hashtbl.add ps "^=" (1,"^=",'r');
  Hashtbl.add ps "|=" (1,"|=",'r');
  ()

let rec parse stream =
  let rec parse_prim = parser
    | [<' Number n >] -> Num n
    | [<' Op "("; e=parse; ' Op ")" >] -> e
  in
  let rec parse_bin p =
    let rec loop lhs =
      match Stream.peek stream with
      | Some(Op c) when Hashtbl.mem ps c ->
        (match Hashtbl.find ps c with
        | (np,op,'l') ->
          if p < np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | (np,op,'r') ->
          if p <= np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | _ -> lhs
        )
      | _ -> lhs
    in loop (parse_prim stream)
  in parse_bin 0


let rec list_of_stream = parser
| [<' a; stream>] -> a::list_of_stream stream
| [< >] -> []

let testcount = ref 0
let ngcount = ref 0
let check = ref true

let check_point str =
  if (!ngcount > 0 && !check) then (
    fprintf std_formatter "let's clear stage %s....@." str;
    check := false
  ) else ()

let test_ f (s,e) =
  try
    testcount := !testcount + 1;
    if (f(lex (Stream.of_string s)) = e) then () (*print_string ("ok "^s^"\n")*)
    else (
      ngcount := !ngcount + 1;
      if !check then
        fprintf std_formatter ("error %s@.") s
      else ()
    )
  with
  | _ ->
    ngcount := !ngcount + 1;
    if !check then
      fprintf std_formatter ("parse error %s@.") s
    else ()

let testlex(s, e) = test_ list_of_stream (s, e)
let test(s, e) = test_ parse (s, e)

let _ =
  testlex("10", [ Number 10 ]);
  testlex("+ **", [ Op "+"; Op "*"; Op "*" ]);
  testlex("+++", [ Op "+++" ]);
  testlex("*+*", [ Op "*+*" ]);
  testlex("*&", [ Op "*";Op "&" ]);
  testlex("&*", [ Op "&";Op "*" ]);
  testlex("&&", [ Op "&&" ]);
  testlex("&+*", [ Op "&+*" ]);
  testlex("1 + 2 * 3",
    [ Number 1; Op "+"; Number 2; Op "*"; Number 3 ]);
  check_point "lex";

  test("1",Num 1);
  test("1+2", Bin(Num 1, "add", Num 2));
  test("1 + 2 + 3", Bin(Bin(Num 1, "add", Num 2), "add", Num 3));
  test("1 + 2 * 3", Bin(Num 1, "add", Bin(Num 2, "mul", Num 3)));
  test("(1 + 2) * 3", Bin(Bin(Num 1, "add", Num 2), "mul", Num 3));
  test("1 + 2 / 3", Bin(Num 1, "add", Bin(Num 2, "div", Num 3)));
  test("1 - 2 / 3", Bin(Num 1, "sub", Bin(Num 2, "div", Num 3)));
  check_point "four arithmetic operations";

(* mul *)
  (* simple test *)
  test("1*2", Bin(Num(1),"mul",Num(2))); (* 20. EBin("mul") *)
  test("1/2", Bin(Num(1),"div",Num(2))); (* 21. EBin("div") *)
  test("1%2", Bin(Num(1),"mod",Num(2))); (* 21. EBin("div") *)
  (* associative test *)
  test("1*2*3", Bin(Bin(Num(1),"mul",Num(2)),"mul", Num(3)));
  test("1/2/3", Bin(Bin(Num(1),"div",Num(2)),"div", Num(3)));
  test("1%2%3", Bin(Bin(Num(1),"mod",Num(2)),"mod", Num(3)));

  check_point "mul";

(* add *)
  (* simple test *)
  test("1+2", Bin(Num(1),"add",Num(2))); (* 18. EBin("add") *)
  test("1-2", Bin(Num(1),"sub",Num(2))); (* 19. EBin("sub") *)
  (* associative test *)
  test("1+2+3", Bin(Bin(Num(1),"add",Num(2)),"add", Num(3)));
  test("1-2-3", Bin(Bin(Num(1),"sub",Num(2)),"sub", Num(3)));
  (* mul & add *)
  test("1+2*3", Bin(Num(1),"add",Bin(Num(2),"mul", Num(3)))); 
  test("1-2*3", Bin(Num(1),"sub",Bin(Num(2),"mul", Num(3))));
  test("1-2/3", Bin(Num(1),"sub",Bin(Num(2),"div", Num(3))));

  check_point "add";

(* shift *)
  (* simple test *)
  test("1<<2", Bin(Num(1),"shl",Num(2)));  (* 22. EBin("shl") *)
  test("1>>2", Bin(Num(1),"shr",Num(2)));  (* 23. EBin("shr") *)
  (* associative test *)
  test("1<<2<<3", Bin(Bin(Num(1),"shl",Num(2)),"shl", Num(3)));
  test("1>>2>>3", Bin(Bin(Num(1),"shr",Num(2)),"shr", Num(3)));
  (* add & shift *)
  test("1<<2+3", Bin(Num(1),"shl",Bin(Num(2),"add", Num(3)))); 
  test("1<<2-3", Bin(Num(1),"shl",Bin(Num(2),"sub", Num(3)))); 
  test("1>>2-3", Bin(Num(1),"shr",Bin(Num(2),"sub", Num(3)))); 

  check_point "shift";

(* relation *)
  (* simple test *)
  test("1<2", Bin(Num(1),"lt",Num(2))); (* 24. EBin("lt") *)
  test("1>2", Bin(Num(1),"gt",Num(2))); (* 25. EBin("gt") *)
  test("1<=2", Bin(Num(1),"le",Num(2)));  (* 26. EBin("le") *)
  test("1>=2", Bin(Num(1),"ge",Num(2))); (* 27. EBin("le") *)
  (* associative test *)
  test("1<2<3", Bin(Bin(Num(1),"lt",Num(2)),"lt", Num(3)));
  test("1>2>3", Bin(Bin(Num(1),"gt",Num(2)),"gt", Num(3)));
  test("1<=2<=3", Bin(Bin(Num(1),"le",Num(2)),"le", Num(3)));
  test("1>=2>=3", Bin(Bin(Num(1),"ge",Num(2)),"ge", Num(3)));
  (* shift & relation *)
  test("1<2<<3", Bin(Num(1),"lt",Bin(Num(2),"shl", Num(3)))); 
  test("1<2>>3", Bin(Num(1),"lt",Bin(Num(2),"shr", Num(3)))); 
  test("1>2>>3", Bin(Num(1),"gt",Bin(Num(2),"shr", Num(3)))); 
  test("1<=2>>3", Bin(Num(1),"le",Bin(Num(2),"shr", Num(3)))); 
  test("1>=2>>3", Bin(Num(1),"ge",Bin(Num(2),"shr", Num(3)))); 

  check_point "relation";

(* eq *)
  (* simple test *)
  test("1==2", Bin(Num(1),"eq",Num(2)));  (* 28. EBin("eq") *)
  test("1!=2", Bin(Num(1),"ne",Num(2))); (* 29. EBin("ne") *)
  (* associative test *)
  test("1==2==3", Bin(Bin(Num(1),"eq",Num(2)),"eq", Num(3)));
  test("1!=2!=3", Bin(Bin(Num(1),"ne",Num(2)),"ne", Num(3)));
  (* relation & eq *)
  test("1==2<3", Bin(Num(1),"eq",Bin(Num(2),"lt", Num(3)))); 
  test("1==2>3", Bin(Num(1),"eq",Bin(Num(2),"gt", Num(3)))); 
  test("1==2<=3", Bin(Num(1),"eq",Bin(Num(2),"le", Num(3)))); 
  test("1==2>=3", Bin(Num(1),"eq",Bin(Num(2),"ge", Num(3)))); 
  test("1!=2>=3", Bin(Num(1),"ne",Bin(Num(2),"ge", Num(3)))); 

  check_point "eq";

(* iand *)
  (* simple test *)
  test("1&2", Bin(Num(1),"and",Num(2))); (* 30. EBin("and") *)
  (* associative test *)
  test("1&2&3", Bin(Bin(Num(1),"and",Num(2)),"and", Num(3)));
  (* eq & iand *)
  test("1&2==3", Bin(Num(1),"and",Bin(Num(2),"eq", Num(3)))); 
  test("1&2!=3", Bin(Num(1),"and",Bin(Num(2),"ne", Num(3)))); 

(* xor *)
  (* simple test *)
  test("1^2", Bin(Num(1),"xor",Num(2))); (* 31. EBin("xor") *)
  (* associative test *)
  test("1^2^3", Bin(Bin(Num(1),"xor",Num(2)),"xor", Num(3)));
  (* xor & ior *)
  test("1|2^3", Bin(Num(1),"or",Bin(Num(2),"xor", Num(3)))); 

(* ior *)
  (* simple test *)
  test("1|2", Bin(Num(1),"or",Num(2))); (* 32. EBin("or") *)
  (* associative test *)
  test("1|2|3", Bin(Bin(Num(1),"or",Num(2)),"or", Num(3)));

(* land *)
  (* simple test *)
  test("1&&2", Bin(Num(1),"land",Num(2))); (* 33. EBin(EBin("ne",0) "and" EBin("ne",0)) *)
  (* associative test *)
  test("1&&2&&3", Bin(Bin(Num(1),"land",Num(2)),"land", Num(3)));
  (* ior & land *)
  test("1&&2|3", Bin(Num(1),"land",Bin(Num(2),"or", Num(3)))); 

(* lor *)
  (* simple test *)
  test("1||2", Bin(Num(1),"lor",Num(2))); (* 34. EBin(EBin("ne",0) "or" EBin("ne",0)) *)
  (* associative test *)
  test("1||2||3", Bin(Bin(Num(1),"lor",Num(2)),"lor", Num(3)));
  (* land & lor *)
  test("1||2&&3", Bin(Num(1),"lor",Bin(Num(2),"land", Num(3)))); 

  check_point "and or xor";

(* assign *)
  (* simple test *)
  test("1=2", Bin(Num(1),"=",Num(2))); (* 36. EAssign() *)
  test("1*=2", Bin(Num(1),"*=",Num(2))); (* 37. EAssign(EBin("mul")) *)
  test("1/=2", Bin(Num(1),"/=",Num(2)));(* 38. EAssign(EBin("div")) *)
  test("1%=2", Bin(Num(1),"%=",Num(2)));(* 39. EAssign(EBin("mod")) *)
  test("1+=2", Bin(Num(1),"+=",Num(2)));(* 40. EAssign(EBin("add")) *)
  test("1-=2", Bin(Num(1),"-=",Num(2)));(* 41. EAssign(EBin("sub")) *)
  test("1&=2", Bin(Num(1),"&=",Num(2)));(* 42. EAssign(EBin("iand")) *)
  test("1<<=2", Bin(Num(1),"<<=",Num(2)));(* 43. EAssign(EBin("shl")) *)
  test("1>>=2", Bin(Num(1),">>=",Num(2)));(* 44. EAssign(EBin("shr")) *)
  test("1^=2", Bin(Num(1),"^=",Num(2)));(* 45. EAssign(EBin("xor")) *)
  test("1|=2", Bin(Num(1),"|=",Num(2)));(* 46. EAssign(EBin("ior")) *)
  (* associative test *)
  test("0=1=2", Bin(Num(0),"=",Bin(Num(1),"=",Num(2)))); (* 36. EAssign() *)
  test("0=1*=2", Bin(Num(0),"=",Bin(Num(1),"*=",Num(2)))); (* 37. EAssign(EBin("mul")) *)
  test("0=1/=2", Bin(Num(0),"=",Bin(Num(1),"/=",Num(2))));(* 38. EAssign(EBin("div")) *)
  test("0=1%=2", Bin(Num(0),"=",Bin(Num(1),"%=",Num(2))));(* 39. EAssign(EBin("mod")) *)
  test("0=1+=2", Bin(Num(0),"=",Bin(Num(1),"+=",Num(2))));(* 40. EAssign(EBin("add")) *)
  test("0=1-=2", Bin(Num(0),"=",Bin(Num(1),"-=",Num(2))));(* 41. EAssign(EBin("sub")) *)
  test("0=1&=2", Bin(Num(0),"=",Bin(Num(1),"&=",Num(2))));(* 42. EAssign(EBin("iand")) *)
  test("0=1<<=2", Bin(Num(0),"=",Bin(Num(1),"<<=",Num(2))));(* 43. EAssign(EBin("shl")) *)
  test("0=1>>=2", Bin(Num(0),"=",Bin(Num(1),">>=",Num(2))));(* 44. EAssign(EBin("shr")) *)
  test("0=1^=2", Bin(Num(0),"=",Bin(Num(1),"^=",Num(2))));(* 45. EAssign(EBin("xor")) *)
  test("0=1|=2", Bin(Num(0),"=",Bin(Num(1),"|=",Num(2))));(* 46. EAssign(EBin("ior")) *)

  fprintf std_formatter "test ok:%d ng:%d all:%d@.@?" (!testcount- !ngcount) !ngcount !testcount

operator precedence

今回は、四則演算のパーサを優先順位法を使って書き換えたバージョンを書きました。
テストは前回と同じだけど、Hashtblを使っています。これがさらっとかけるようになれば、LLVMカレイドスコープのパーサも簡単と言えるようになるでしょう。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('+'|'-'|'*'|'/'|'('|')' as c); stream=lex >] -> 
  [<' Op c; stream >]
| [< >] -> [< >]

and lex_num buf = parser
| [<' ('0'..'9' | '.' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(float_of_string buf); stream >]

let ps:(char, int) Hashtbl.t = Hashtbl.create 10

let () = Hashtbl.add ps '+' 10
let () = Hashtbl.add ps '-' 10
let () = Hashtbl.add ps '*' 20
let () = Hashtbl.add ps '/' 20

let rec parse_prim = parser
| [<' Number n >] -> Num n
| [<' Op '('; e=parse; ' Op ')' >] -> e

and parse_bin p stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op c) when Hashtbl.mem ps c ->
      let np = Hashtbl.find ps c in
      if np > p then (
        Stream.junk stream;
        loop (Bin(lhs, String.make 1 c, parse_bin np stream))
      ) else
        lhs
    | _ -> lhs
  in
  loop (parse_prim stream)
and parse stream = parse_bin 0 stream

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

結果は以下の通り

lex num1.2 true
lex ops true
lex ops true
parse 1 true
parse 1 + 2 true
parse 1 + 2 + 3 true
parse 1 + 2 * 3 true
parse (1 + 2) * 3 true
parse 1 + 2 / 3 true
parse 1 - 2 / 3 true

この実装は、文字列の結合が遅いので、出来れば
Bufferを使うとより高速になります。

test first

四則演算のパーサを作る際に、テストファーストにするとよいはずです。という事で、テストだけ先に書きます。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let lex = parser
| [< >] -> [<' Number 1.2 >]

let parse = parser
| [< >] -> Num 1.0

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

上記のプログラムをcalc.mlとして保存します。

$ ocamlc -pp camlp4o calc.ml -o calc
$ ./calc

実行すると以下のような結果が得られます。

lex num1 true
lex ops false
lex ops false
parse 1 true
parse 1 + 2 false
parse 1 + 2 + 3 false
parse 1 + 2 * 3 false
parse (1 + 2) * 3 false
parse 1 + 2 / 3 false
parse 1 - 2 / 3 false

後は上からすべてtrueになるように作ればOKです。
実装は以下のように書けます。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of float
| Op of char

type e =
| Num of float
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('+'|'-'|'*'|'/'|'('|')' as c); stream=lex >] -> 
  [<' Op c; stream >]
| [< >] -> [< >]

and lex_num buf = parser
| [<' ('0'..'9' | '.' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(float_of_string buf); stream >]

let rec parse_prim = parser
| [<' Number n >] -> Num n
| [<' Op '('; e=parse; ' Op ')' >] -> e

and parse_mul stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op ('*' as c))
    | Some(Op ('/' as c)) ->
      Stream.junk stream;
      loop (Bin(lhs, String.make 1 c, parse_prim stream))
    | _ -> lhs
  in
  loop (parse_prim stream)
and parse stream =
  let rec loop lhs =
    match Stream.peek stream with
    | Some(Op ('+' as c))
    | Some(Op ('-' as c)) ->
      Stream.junk stream;
      loop (Bin(lhs, String.make 1 c, parse_mul stream))
    | _ -> lhs
  in
  loop (parse_mul stream)

let rec list_of_stream = parser
| [<' a; stream >] -> a::(list_of_stream stream)
| [< >] -> ([]:token list)

let _ = printf "lex num1.2 %b\n"
  (
    [ Number 1.2 ]
    =
    list_of_stream(lex (Stream.of_string "1.2"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Op '+'; Op '*' ]
    =
    list_of_stream(lex (Stream.of_string "+*"))
  )

let _ = printf "lex ops %b\n"
  (
    [ Number 1.0; Op '+'; Number 2.0; Op '*'; Number 3.0 ]
    =
    list_of_stream(lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse 1 %b\n"
  (
    Num 1.0
    =
    parse (lex (Stream.of_string "1"))
  )

let _ = printf "parse 1 + 2 %b\n"
  (
    Bin(Num 1.0, "+", Num 2.0)
    =
    parse (lex (Stream.of_string "1+2"))
  )

let _ = printf "parse 1 + 2 + 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "+", Num 3.0)
    =
    parse (lex (Stream.of_string "1 + 2 + 3"))
  )

let _ = printf "parse 1 + 2 * 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "*", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 * 3"))
  )

let _ = printf "parse (1 + 2) * 3 %b\n"
  (
    Bin(Bin(Num 1.0, "+", Num 2.0), "*", Num 3.0)
    =
    parse (lex (Stream.of_string "(1 + 2) * 3"))
  )

let _ = printf "parse 1 + 2 / 3 %b\n"
  (
    Bin(Num 1.0, "+", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 + 2 / 3"))
  )

let _ = printf "parse 1 - 2 / 3 %b\n"
  (
    Bin(Num 1.0, "-", Bin(Num 2.0, "/", Num 3.0))
    =
    parse (lex (Stream.of_string "1 - 2 / 3"))
  )

結果は以下の通り

lex num1.2 true
lex ops true
lex ops true
parse 1 true
parse 1 + 2 true
parse 1 + 2 + 3 true
parse 1 + 2 * 3 true
parse (1 + 2) * 3 true
parse 1 + 2 / 3 true
parse 1 - 2 / 3 true

この実装は、文字列の結合が遅いので、出来れば
Bufferを使うとより高速になります。