ドカタしながらCOMET II用アセンブラを30日で作る日記(3日目)
今回はいんちきトーカナイザーというかレキサーを作って大分それっぽくなってきました。
いちお、
LD GR0,GR1 LD GR0,1 LD GR0,256,GR2
等がアセンブルできます。
次は、マクロ関連を作っていきます。
そのあと、自作list言語パーサを作って、それをかませて、lispでマクロ書いてうんぬんできるようにしたいなと
思っておりますが、いつになるやらわかりましぇん。
以下ソースです。
import std.file; import std.string; class Token { union { char[] string; int data; } int type; this(){} this(int t) {type = t;} this(int t,int n) {type = t;data = n;} this(int t, char[] s) {type = t; string = s; } char[] toString() { switch (type) { case SPACE: return " "; case CAMMA: return ","; case NUMBER: return std.string.toString(data); case STRING: return string; case ID: return string; case LN: return "\n"; } } } /** * */ enum { SPACE, OPERAND, NUMBER, CAMMA, LN, COMMENT, STRING,MINUS, ID, } int[char] typeSet(inout int[char] types, int type , char[] cs ...) { foreach(char c; cs) { types[c] = type; } return types; } Token[] lex(char[] str) { Token[] tokens; str = str.replace("\r\n", "\n").replace("\r","\n"); char[] string; int[char] types; int data = 0; int minus = 1; typeSet(types, SPACE, ' ', '\t'); typeSet(types, LN, '\n'); typeSet(types, COMMENT, '#'); typeSet(types, STRING, '"'); typeSet(types, CAMMA, ','); typeSet(types, NUMBER, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'); typeSet(types, MINUS, '-'); while (str.length != 0) { int type = (str[0] in types) ? types[str[0]] : -1; //printf("kore %c %d\n", str[0], type); switch (type) { case SPACE: str = str[1 .. str.length]; while (str.length > 0 && (str[0] in types) && types[str[0]] == SPACE) { str = str[1 .. str.length]; } tokens ~= new Token(SPACE); break; case COMMENT: while (str.length > 0 && str[0] != '\n') { str = str[1 .. str.length]; } if (str.length > 0) str = str[1 .. str.length]; break; case LN: str = str[1 .. str.length]; tokens ~= new Token(LN); break; case CAMMA: str = str[1 .. str.length]; tokens ~= new Token(CAMMA); break; case STRING: break; case MINUS: minus = -1; case NUMBER: data = 0; while(str.length > 0 && (str[0] >='0' && str[1] <= '9') ) { data *= 10; data += str[0] - '0'; str = str[1 .. str.length]; } tokens ~= new Token(NUMBER, data * minus); minus = 1; break; case -1: string = str[0..1].dup; str = str[1 .. str.length]; while(str.length > 0 && (str[0] >= 'a' && str[0] <= 'z') || (str[0] >='A' && str[1] <= 'Z') || (str[0] >='0' && str[1] <= '1') ) { string ~= str[0]; str = str[1 .. str.length]; } tokens ~= new Token(ID, string); default: break; } } return tokens; } Token[][] split(Token[] tokens, int type) { Token[][] ts; Token[] ts2; bool f = false; foreach (Token t;tokens) { if (t.type == type) { ts ~= ts2.dup; ts2.length = 0; f = true; continue; } f = false; ts2 ~= t; } if (f||ts2.length>0) ts ~= ts2; return ts; } Token[][][] lines(char[] str) { Token[] ts = lex(str); Token[][] tss = ts.split(LN); Token[][][] c; foreach(Token[] ts2; tss) { c ~= ts2.split(SPACE); } return c; } char[] toString(Token[] ts) { char[] rc; foreach (Token t; ts) { rc ~= t.toString(); } return rc; } char[] toString(Token[][] tss) { char[] rc; char[] s = ""; foreach (Token[] ts; tss) { rc ~= s ~ ts.toString(); s = " "; } return rc; } /** * */ void main() { Token[][][] cs = (cast(char[])read("asm.cas")).lines(); foreach(Token[][] c; cs){ if (c.length == 0) continue; printf("// %.*s\n", c.toString()); switch (c[1][0].string) { case "NOP": p(0x00); break; case "LD": r1r2(0x10, c); break; case "ST": radrx(0x11, c); break; case "LAD":radrx(0x12, c); break; case "ADDA":r1r2(0x20, c); break; case "SUBA":r1r2(0x21, c); break; case "ADDL":r1r2(0x22, c); break; case "SUBL":r1r2(0x23, c); break; case "AND": r1r2(0x30, c); break; case "OR": r1r2(0x31, c); break; case "XOR": r1r2(0x32, c); break; case "CPA": r1r2(0x40, c); break; case "CPL": r1r2(0x41, c); break; case "SLA":radrx(0x50, c); break; case "SRA":radrx(0x51, c); break; case "SLL":radrx(0x52, c); break; case "SRL":radrx(0x53, c); break; case "JMI": adrx(0x61, c); break; case "JNZ": adrx(0x62, c); break; case "JZE": adrx(0x63, c); break; case "JUMP":adrx(0x64, c); break; case "JPL": adrx(0x65, c); break; case "JOV": adrx(0x66, c); break; case "PUSH":adrx(0x70, c); break; case "POP": r(0x71, c); break; case "CALL":adrx(0x80, c); break; case "RET": p(0x81); break; case "SVC": adrx(0xF0, c); break; default: continue; } ln(); } } /** * */ int regs(Token c) { char[][] regs = ["GR0", "GR1", "GR2", "GR3", "GR4", "GR5", "GR6", "GR7"]; for (int i = 0; i < regs.length; i++) { if (c.type == ID && c.string == regs[i]) { return i; } } return -1; } Token[] join(Token[][] tss) { Token[] ts; foreach(Token[] ts2; tss) { ts ~= ts2; } return ts; } /** * */ void r1r2(byte b, Token[][] c) { Token[][] cp = c[2 .. c.length].join().split(CAMMA); int r[2]; r[0] = r[1] = -1; for (int i = 0; i < 2; i++) { r[i] = regs(cp[i][0]); if (r[i] == -1) return radrx(b, c); } p(b|0x4); p((r[0] << 4) | r[1]); } /** * */ void radrx(byte b, Token[][] c) { Token[][] cp = c[2 .. c.length].join().split(CAMMA); int r = regs(cp[0][0]); ushort a = cast(ushort)(cp[1][0].data); int x = 0; if (cp.length > 2 && c[2].length > 0) { x = regs(cp[2][0]); } p(b); p((r << 4) | x); p((a >> 8) & 0xff); p(a & 0xff); } /** * */ void adrx(ubyte b, Token[][] c) { Token[][] cp = c[2 .. c.length].join().split(CAMMA); int r = 0;//regs(cp[0][0]); ushort a = cast(ushort)(cp[1][0].data); int x = 0; if (cp.length > 2 && c[2].length > 0) { x = regs(cp[2][0]); } p(b); p((r << 4) | x); p((a >> 8) & 0xff); p(a & 0xff); } /** * */ void r(byte b, Token[][] c) { Token[][] cp = c[2 .. c.length].join().split(CAMMA); int r = regs(cp[0][0]); int x = 0; p(b); p((r << 4) | x); } /** * 1バイト出力 */ void p(int a) { printf("0x%02x,", a); } /** * 改行を表示する。 */ void ln() { printf("\n"); }