様々な演算子

演算子順位法を使って、C言語の2項演算子をパース出来るようにしました。
&と*はポインタ演算に使うので特殊に扱いますが、基本的に記号の連続は1つの演算子として字句解析するようにしました。

(*
$ ocamlc -pp camlp4o calc.ml
*)
open Format

type token =
| Number of int
| Op of string

type e =
| Num of int
| Bin of e * string * e

let rec lex = parser
| [<' (' '|'\t'|'\r'|'\n'); stream=lex >] -> stream
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (String.make 1 c) stream
| [<' ('('|')'|'['|']'|'{'|'}' as c); stream=lex >] -> 
  [<' Op (String.make 1 c); stream >]
| [<' ('*' as c); stream >] ->
  lex_op2 (String.make 1 c) stream
| [<' ('&' as c); stream >] ->
  lex_op3 (String.make 1 c) stream
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|' as c); stream >] ->
  lex_op (String.make 1 c) stream
| [< >] -> [< >]
and lex_op buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'*'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op2 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]
and lex_op3 buf = parser
| [<' ('/'|'%'|'+'|'-'|'^'|'<'|'>'|'='|'!'|'|'|'&' as c); stream >] ->
  lex_op (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Op(buf); stream >]

and lex_num buf = parser
| [<' ('0'..'9' as c); stream >] -> 
  lex_num (buf ^ (String.make 1 c)) stream
| [< stream=lex >] ->
  [<' Number(int_of_string buf); stream >]

let ps:(string,(int*string*char)) Hashtbl.t = Hashtbl.create 10
let _ =
  Hashtbl.add ps "*" (11,"mul",'l');
  Hashtbl.add ps "/" (11,"div",'l');
  Hashtbl.add ps "%" (11,"mod",'l');
  Hashtbl.add ps "+" (10,"add",'l');
  Hashtbl.add ps "-" (10,"sub",'l');
  Hashtbl.add ps "<<" (9,"shl",'l');
  Hashtbl.add ps ">>" (9,"shr",'l');
  Hashtbl.add ps "<" (8,"lt",'l');
  Hashtbl.add ps ">" (8,"gt",'l');
  Hashtbl.add ps "<=" (8,"le",'l');
  Hashtbl.add ps ">=" (8,"ge",'l');
  Hashtbl.add ps "==" (7,"eq",'l');
  Hashtbl.add ps "!=" (7,"ne",'l');
  Hashtbl.add ps "&" (6,"and",'l');
  Hashtbl.add ps "^" (5,"xor",'l');
  Hashtbl.add ps "|" (4,"or",'l');
  Hashtbl.add ps "&&" (3,"land",'l');
  Hashtbl.add ps "||" (2,"lor",'l');
  Hashtbl.add ps "=" (1,"=",'r');
  Hashtbl.add ps "*=" (1,"*=",'r');
  Hashtbl.add ps "/=" (1,"/=",'r');
  Hashtbl.add ps "%=" (1,"%=",'r');
  Hashtbl.add ps "+=" (1,"+=",'r');
  Hashtbl.add ps "-=" (1,"-=",'r');
  Hashtbl.add ps "<<=" (1,"<<=",'r');
  Hashtbl.add ps ">>=" (1,">>=",'r');
  Hashtbl.add ps "&=" (1,"&=",'r');
  Hashtbl.add ps "^=" (1,"^=",'r');
  Hashtbl.add ps "|=" (1,"|=",'r');
  ()

let rec parse stream =
  let rec parse_prim = parser
    | [<' Number n >] -> Num n
    | [<' Op "("; e=parse; ' Op ")" >] -> e
  in
  let rec parse_bin p =
    let rec loop lhs =
      match Stream.peek stream with
      | Some(Op c) when Hashtbl.mem ps c ->
        (match Hashtbl.find ps c with
        | (np,op,'l') ->
          if p < np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | (np,op,'r') ->
          if p <= np then (
            Stream.junk stream;
            loop (Bin(lhs, op, parse_bin np))
          ) else lhs
        | _ -> lhs
        )
      | _ -> lhs
    in loop (parse_prim stream)
  in parse_bin 0


let rec list_of_stream = parser
| [<' a; stream>] -> a::list_of_stream stream
| [< >] -> []

let testcount = ref 0
let ngcount = ref 0
let check = ref true

let check_point str =
  if (!ngcount > 0 && !check) then (
    fprintf std_formatter "let's clear stage %s....@." str;
    check := false
  ) else ()

let test_ f (s,e) =
  try
    testcount := !testcount + 1;
    if (f(lex (Stream.of_string s)) = e) then () (*print_string ("ok "^s^"\n")*)
    else (
      ngcount := !ngcount + 1;
      if !check then
        fprintf std_formatter ("error %s@.") s
      else ()
    )
  with
  | _ ->
    ngcount := !ngcount + 1;
    if !check then
      fprintf std_formatter ("parse error %s@.") s
    else ()

let testlex(s, e) = test_ list_of_stream (s, e)
let test(s, e) = test_ parse (s, e)

let _ =
  testlex("10", [ Number 10 ]);
  testlex("+ **", [ Op "+"; Op "*"; Op "*" ]);
  testlex("+++", [ Op "+++" ]);
  testlex("*+*", [ Op "*+*" ]);
  testlex("*&", [ Op "*";Op "&" ]);
  testlex("&*", [ Op "&";Op "*" ]);
  testlex("&&", [ Op "&&" ]);
  testlex("&+*", [ Op "&+*" ]);
  testlex("1 + 2 * 3",
    [ Number 1; Op "+"; Number 2; Op "*"; Number 3 ]);
  check_point "lex";

  test("1",Num 1);
  test("1+2", Bin(Num 1, "add", Num 2));
  test("1 + 2 + 3", Bin(Bin(Num 1, "add", Num 2), "add", Num 3));
  test("1 + 2 * 3", Bin(Num 1, "add", Bin(Num 2, "mul", Num 3)));
  test("(1 + 2) * 3", Bin(Bin(Num 1, "add", Num 2), "mul", Num 3));
  test("1 + 2 / 3", Bin(Num 1, "add", Bin(Num 2, "div", Num 3)));
  test("1 - 2 / 3", Bin(Num 1, "sub", Bin(Num 2, "div", Num 3)));
  check_point "four arithmetic operations";

(* mul *)
  (* simple test *)
  test("1*2", Bin(Num(1),"mul",Num(2))); (* 20. EBin("mul") *)
  test("1/2", Bin(Num(1),"div",Num(2))); (* 21. EBin("div") *)
  test("1%2", Bin(Num(1),"mod",Num(2))); (* 21. EBin("div") *)
  (* associative test *)
  test("1*2*3", Bin(Bin(Num(1),"mul",Num(2)),"mul", Num(3)));
  test("1/2/3", Bin(Bin(Num(1),"div",Num(2)),"div", Num(3)));
  test("1%2%3", Bin(Bin(Num(1),"mod",Num(2)),"mod", Num(3)));

  check_point "mul";

(* add *)
  (* simple test *)
  test("1+2", Bin(Num(1),"add",Num(2))); (* 18. EBin("add") *)
  test("1-2", Bin(Num(1),"sub",Num(2))); (* 19. EBin("sub") *)
  (* associative test *)
  test("1+2+3", Bin(Bin(Num(1),"add",Num(2)),"add", Num(3)));
  test("1-2-3", Bin(Bin(Num(1),"sub",Num(2)),"sub", Num(3)));
  (* mul & add *)
  test("1+2*3", Bin(Num(1),"add",Bin(Num(2),"mul", Num(3)))); 
  test("1-2*3", Bin(Num(1),"sub",Bin(Num(2),"mul", Num(3))));
  test("1-2/3", Bin(Num(1),"sub",Bin(Num(2),"div", Num(3))));

  check_point "add";

(* shift *)
  (* simple test *)
  test("1<<2", Bin(Num(1),"shl",Num(2)));  (* 22. EBin("shl") *)
  test("1>>2", Bin(Num(1),"shr",Num(2)));  (* 23. EBin("shr") *)
  (* associative test *)
  test("1<<2<<3", Bin(Bin(Num(1),"shl",Num(2)),"shl", Num(3)));
  test("1>>2>>3", Bin(Bin(Num(1),"shr",Num(2)),"shr", Num(3)));
  (* add & shift *)
  test("1<<2+3", Bin(Num(1),"shl",Bin(Num(2),"add", Num(3)))); 
  test("1<<2-3", Bin(Num(1),"shl",Bin(Num(2),"sub", Num(3)))); 
  test("1>>2-3", Bin(Num(1),"shr",Bin(Num(2),"sub", Num(3)))); 

  check_point "shift";

(* relation *)
  (* simple test *)
  test("1<2", Bin(Num(1),"lt",Num(2))); (* 24. EBin("lt") *)
  test("1>2", Bin(Num(1),"gt",Num(2))); (* 25. EBin("gt") *)
  test("1<=2", Bin(Num(1),"le",Num(2)));  (* 26. EBin("le") *)
  test("1>=2", Bin(Num(1),"ge",Num(2))); (* 27. EBin("le") *)
  (* associative test *)
  test("1<2<3", Bin(Bin(Num(1),"lt",Num(2)),"lt", Num(3)));
  test("1>2>3", Bin(Bin(Num(1),"gt",Num(2)),"gt", Num(3)));
  test("1<=2<=3", Bin(Bin(Num(1),"le",Num(2)),"le", Num(3)));
  test("1>=2>=3", Bin(Bin(Num(1),"ge",Num(2)),"ge", Num(3)));
  (* shift & relation *)
  test("1<2<<3", Bin(Num(1),"lt",Bin(Num(2),"shl", Num(3)))); 
  test("1<2>>3", Bin(Num(1),"lt",Bin(Num(2),"shr", Num(3)))); 
  test("1>2>>3", Bin(Num(1),"gt",Bin(Num(2),"shr", Num(3)))); 
  test("1<=2>>3", Bin(Num(1),"le",Bin(Num(2),"shr", Num(3)))); 
  test("1>=2>>3", Bin(Num(1),"ge",Bin(Num(2),"shr", Num(3)))); 

  check_point "relation";

(* eq *)
  (* simple test *)
  test("1==2", Bin(Num(1),"eq",Num(2)));  (* 28. EBin("eq") *)
  test("1!=2", Bin(Num(1),"ne",Num(2))); (* 29. EBin("ne") *)
  (* associative test *)
  test("1==2==3", Bin(Bin(Num(1),"eq",Num(2)),"eq", Num(3)));
  test("1!=2!=3", Bin(Bin(Num(1),"ne",Num(2)),"ne", Num(3)));
  (* relation & eq *)
  test("1==2<3", Bin(Num(1),"eq",Bin(Num(2),"lt", Num(3)))); 
  test("1==2>3", Bin(Num(1),"eq",Bin(Num(2),"gt", Num(3)))); 
  test("1==2<=3", Bin(Num(1),"eq",Bin(Num(2),"le", Num(3)))); 
  test("1==2>=3", Bin(Num(1),"eq",Bin(Num(2),"ge", Num(3)))); 
  test("1!=2>=3", Bin(Num(1),"ne",Bin(Num(2),"ge", Num(3)))); 

  check_point "eq";

(* iand *)
  (* simple test *)
  test("1&2", Bin(Num(1),"and",Num(2))); (* 30. EBin("and") *)
  (* associative test *)
  test("1&2&3", Bin(Bin(Num(1),"and",Num(2)),"and", Num(3)));
  (* eq & iand *)
  test("1&2==3", Bin(Num(1),"and",Bin(Num(2),"eq", Num(3)))); 
  test("1&2!=3", Bin(Num(1),"and",Bin(Num(2),"ne", Num(3)))); 

(* xor *)
  (* simple test *)
  test("1^2", Bin(Num(1),"xor",Num(2))); (* 31. EBin("xor") *)
  (* associative test *)
  test("1^2^3", Bin(Bin(Num(1),"xor",Num(2)),"xor", Num(3)));
  (* xor & ior *)
  test("1|2^3", Bin(Num(1),"or",Bin(Num(2),"xor", Num(3)))); 

(* ior *)
  (* simple test *)
  test("1|2", Bin(Num(1),"or",Num(2))); (* 32. EBin("or") *)
  (* associative test *)
  test("1|2|3", Bin(Bin(Num(1),"or",Num(2)),"or", Num(3)));

(* land *)
  (* simple test *)
  test("1&&2", Bin(Num(1),"land",Num(2))); (* 33. EBin(EBin("ne",0) "and" EBin("ne",0)) *)
  (* associative test *)
  test("1&&2&&3", Bin(Bin(Num(1),"land",Num(2)),"land", Num(3)));
  (* ior & land *)
  test("1&&2|3", Bin(Num(1),"land",Bin(Num(2),"or", Num(3)))); 

(* lor *)
  (* simple test *)
  test("1||2", Bin(Num(1),"lor",Num(2))); (* 34. EBin(EBin("ne",0) "or" EBin("ne",0)) *)
  (* associative test *)
  test("1||2||3", Bin(Bin(Num(1),"lor",Num(2)),"lor", Num(3)));
  (* land & lor *)
  test("1||2&&3", Bin(Num(1),"lor",Bin(Num(2),"land", Num(3)))); 

  check_point "and or xor";

(* assign *)
  (* simple test *)
  test("1=2", Bin(Num(1),"=",Num(2))); (* 36. EAssign() *)
  test("1*=2", Bin(Num(1),"*=",Num(2))); (* 37. EAssign(EBin("mul")) *)
  test("1/=2", Bin(Num(1),"/=",Num(2)));(* 38. EAssign(EBin("div")) *)
  test("1%=2", Bin(Num(1),"%=",Num(2)));(* 39. EAssign(EBin("mod")) *)
  test("1+=2", Bin(Num(1),"+=",Num(2)));(* 40. EAssign(EBin("add")) *)
  test("1-=2", Bin(Num(1),"-=",Num(2)));(* 41. EAssign(EBin("sub")) *)
  test("1&=2", Bin(Num(1),"&=",Num(2)));(* 42. EAssign(EBin("iand")) *)
  test("1<<=2", Bin(Num(1),"<<=",Num(2)));(* 43. EAssign(EBin("shl")) *)
  test("1>>=2", Bin(Num(1),">>=",Num(2)));(* 44. EAssign(EBin("shr")) *)
  test("1^=2", Bin(Num(1),"^=",Num(2)));(* 45. EAssign(EBin("xor")) *)
  test("1|=2", Bin(Num(1),"|=",Num(2)));(* 46. EAssign(EBin("ior")) *)
  (* associative test *)
  test("0=1=2", Bin(Num(0),"=",Bin(Num(1),"=",Num(2)))); (* 36. EAssign() *)
  test("0=1*=2", Bin(Num(0),"=",Bin(Num(1),"*=",Num(2)))); (* 37. EAssign(EBin("mul")) *)
  test("0=1/=2", Bin(Num(0),"=",Bin(Num(1),"/=",Num(2))));(* 38. EAssign(EBin("div")) *)
  test("0=1%=2", Bin(Num(0),"=",Bin(Num(1),"%=",Num(2))));(* 39. EAssign(EBin("mod")) *)
  test("0=1+=2", Bin(Num(0),"=",Bin(Num(1),"+=",Num(2))));(* 40. EAssign(EBin("add")) *)
  test("0=1-=2", Bin(Num(0),"=",Bin(Num(1),"-=",Num(2))));(* 41. EAssign(EBin("sub")) *)
  test("0=1&=2", Bin(Num(0),"=",Bin(Num(1),"&=",Num(2))));(* 42. EAssign(EBin("iand")) *)
  test("0=1<<=2", Bin(Num(0),"=",Bin(Num(1),"<<=",Num(2))));(* 43. EAssign(EBin("shl")) *)
  test("0=1>>=2", Bin(Num(0),"=",Bin(Num(1),">>=",Num(2))));(* 44. EAssign(EBin("shr")) *)
  test("0=1^=2", Bin(Num(0),"=",Bin(Num(1),"^=",Num(2))));(* 45. EAssign(EBin("xor")) *)
  test("0=1|=2", Bin(Num(0),"=",Bin(Num(1),"|=",Num(2))));(* 46. EAssign(EBin("ior")) *)

  fprintf std_formatter "test ok:%d ng:%d all:%d@.@?" (!testcount- !ngcount) !ngcount !testcount