変換イメージ

コンパイラを作る上で大切なのがどんなデータがどう変換されていくかというイメージです。
それがはっきりしていればずっと、作るのが楽なはず。
そして、それだけを書き出してみるのはいい練習になると思います。
ということで、書いてみました。

ソースコードは一度、構文木にされて、構文木を抽象構文木
定数を全てメモリ上においてしまって、
ツリー上になった式を展開する。
変数をメモリに配置して、
アセンブラに文字列に変換してコンパイルは終了する。

    src = """
def main() { a = add(1,2,3) printInt(a) }
def add(a,b,c) return(a + b + c)
"""
    st = (
      ("def","main","(","void",")",
        ("{",
          ("a","=",("add","(",(1,",",(2,",",3)),")")),"@",
          ("printInt","(","a",")")
        "}")
      ),
      "@",
      ("def","add","(",("a",",",("b",",","c")),")",
        ("return","(",(("a","+","b"),"+","c"),")")
      )
    )

    asts = List(
      ("main",List(),List(
        ("mov",("call","add",List(1,2,3)),"a"),
        ("call", "printInt", List("a"))
      )),
      ("add",List("a","b","c"),List(
        ("ret", ("add",("add","a","b"),"c"))
      ))
    )
    setmem = List(
      ("_main",List(),List(
        ("mov",1,"v_1"),
        ("mov",2,"v_2"),
        ("mov",3,"v_3"),
        ("mov",("call","_add",List("v_1","v_2","v_3")),"_a"),
        ("call", "_printInt", List("_a"))
      )),
      ("_add",List("_a","_b","_c"),List(
        ("ret", ("add",("add","_a","_b"),"_c"))
      ))
    )
    expand = List(
      ("_main",List(),List(
        ("mov",1,"v_1"),
        ("mov",2,"v_2"),
        ("mov",3,"v_3"),
        ("call", "_add", List("v_1","v_2","v_3"), "v_4"),
        ("mov","v_4","_a"),
        ("call", "_printInt", List("_a"))
      )),
      ("_add",List("_a","_b","_c"),List(
        ("add","_a","_b","v_5"),
        ("add","v_5","_c","v_6"),
        ("ret", "v_6")
      ))
    )
    memAlloc = List(
      ("_main",List(),List(
        ("subq","%rsp", "$32"),
        ("mov","$1","-4(%rbp)"),
        ("mov","$2","-8(%rbp)"),
        ("mov","$3","-12(%rbp)"),
        ("call", "_add", List("-4(%rbp)","-8(%rbp)","-12(%rbp)"), "-16(%rbp)"),
        ("mov","-16(%rbp)","-20(%rbp)"),
        ("call", "_printInt", List("-20(%rbp)"))
      )),
      ("_add",List("-12(%rbp)","-16(%rbp)","-20(%rbp)"),List(
        ("subq","%rsp", "$32"),
        ("mov","$1","-4(%rbp)"),
        ("add","-12(%rbp)","-16(%rbp)","-4(%rbp)"),
        ("add","-4(%rbp)","-20(%rbp)","-8(%rbp)"),
        ("ret", "-8(%rbp)")
      ))
    )

    emit = """
.globl _main
_main:
        push %rbp
        movq %rsp,%rbp
        subq %rsp, $32
        movl $1,-4(%rbp)
        movl $2,-8(%rbp)
        movl $3,-12(%rbp)
        movl -4(%rbp), %edi
        movl -8(%rbp), %esi
        movl -12(%rbp), %ecx
        call _add
        movl %eax, -16(%rbp)

        movl -16(%rbp), -20(%rbp)
        movl -20(%rbp), %eax
        call _printInt
        leave
        ret

.globl _add
_add:
        push %rbp
        movq %rsp,%rbp
        subq %rsp, $32
        movl %edi, -12(%rbp)
        movl %esi, -16(%rbp)
        movl %ecx, -20(%rbp)
        movl -12(%rbp), %eax
        addl -16(%rbp), %eax
        movl %eax, -4(%rbp)
        addl -20(%rbp), %eax
        movl %eax, -8(%rbp)
        movl -8(%rbp), %eax
        leave
        ret
"""