ドカタしながらCOMET II用アセンブラを30日で作る日記(9日目)

調べてみたのですが、昨日移植したものはCASLであって、CASL II ではなかったです。


”移植だと、わかってないところがわからないだろうし、作ってたものを改良してみるか。”
ということで、改良してみました。


今日は調子がよかったらしく、バイナリ出力まで出来てます。
シンボルテーブルもついでにファイルにくっつけてみたりしてます。
次はこれを読み込んで、実行するものが必要だろうなと。


以下ソースです。

import std.file;
import std.string;

/**
 * 
 */
class Token {
	union {
		char[] string;
		int data;
	}
	int type;
	int line;
	this()                {}
	this(int t, int l)           { type = t; line = l; }
	this(int t, int l, int n)    { type = t; line = l; data = n; }
	this(int t, int l, char[] s) { type = t; line = l; string = s; }

	/**
	 * 
	 */
	char[] toString() {
		switch (type) {
		case SPACE:  return " ";
		case CAMMA:  return ",";
		case NUMBER: return std.string.toString(data);
		case STRING: return "'" ~ string.replace("'", "''") ~ "'";
		case ID:     return string;
		case LN:     return "\n";
		}
	}
}

/**
 * 
 */
enum {
	SPACE, OPERAND, NUMBER, CAMMA, LN, COMMENT, STRING,MINUS, ID,
}

/**
 * 
 */
int[char] typeSet(inout int[char] types, int type , char[] cs ...) {
	foreach (char c; cs) {
		types[c] = type;
	}
	return types;
}

/**
 * 
 */
Token[] lex(char[] str) {
	Token[] tokens;
	str = str.replace("\r\n", "\n").replace("\r", "\n");
	char[] string;
	int[char] types;
	int data = 0;
	int minus = 1;
	int line = 1;
	typeSet(types, SPACE, ' ', '\t');
	typeSet(types, LN, '\n');
	typeSet(types, COMMENT, ';');
	typeSet(types, STRING, '\'');
	typeSet(types, CAMMA, ',');
	typeSet(types, NUMBER, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
	typeSet(types, MINUS, '-');
loop:
	while (str.length != 0) {
	int type = (str[0] in types) ? types[str[0]] : -1;
//	printf("kore %c %d\n", str[0], type);
		
		switch (type) {
		case SPACE:
			str = str[1 .. str.length];
			while (str.length > 0 && (str[0] in types) && types[str[0]] == SPACE) {
				str = str[1 .. str.length];
			}
			tokens ~= new Token(SPACE, line);
			break;
		case COMMENT:
			while (str.length > 0 && str[0] != '\n') {
//				printf("%c",str[0]);
				str = str[1 .. str.length];
			}
			//if (str.length > 0) str = str[1 .. str.length];
			break;
		case LN:
			str = str[1 .. str.length];
			tokens ~= new Token(LN, line++);
			break;
		case CAMMA:
			str = str[1 .. str.length];
			tokens ~= new Token(CAMMA, line);
			break;
		case STRING:
			for (int i = 1; i < str.length; i++) {
				if (str[i] == '\'') {
					if (str.length > i + 1 && str[i + 1] == '\'') {
						continue;
					}
					tokens ~= new Token(STRING, line, str[1 .. i].replace("''", "'"));
					str = str[i + 1 .. str.length];
					continue loop;
				}
			}
			throw new Error("not terminate string");
			break;
		case MINUS:
			minus = -1;
		case NUMBER:
			data = 0;
			while(str.length > 0 && (str[0] >='0' && str[1] <= '9') ) {
				data *= 10;
				data += str[0] - '0';
				str = str[1 .. str.length];
			}
			tokens ~= new Token(NUMBER, line, data * minus);
			minus = 1;
			break;
		case -1:
			string = str[0 .. 1].dup;
			str    = str[1 .. str.length];
			while (str.length > 0 && 
				((str[0] >= 'a' && str[0] <= 'z') || 
				 (str[0] >= 'A' && str[0] <= 'Z') ||  
				 (str[0] >= '0' && str[0] <= '9')) ) {
				string ~= str[0];
				str = str[1 .. str.length];
			}
			tokens ~= new Token(ID, line, string);
		default:
			break;
		}
	}
	return tokens;
}

alias std.string.split split;

/**
 * 
 */
Token[][] split(Token[] tokens, int type) {
	Token[][] ts;
	Token[] ts2;
	bool f = false;
	foreach (Token t; tokens) {
		if (t.type == type) {
			ts ~= ts2.dup;
			ts2.length = 0;
			f = true;
			continue;
		}
		f = false;
		ts2 ~= t;
	}
	if (f || ts2.length > 0)
		ts ~= ts2;
	return ts;
}

/**
 *
 */
Token[] join(Token[][] tss) {
	Token[] ts;
	foreach (Token[] ts2; tss) {
		ts ~= ts2;
	}
	return ts;
}

/**
 * 
 */
Token[][][] lines(char[] str) {
	Token[] ts = lex(str);
	Token[][] tss = ts.split(LN);
	Token[][][] c;
	foreach (Token[] ts2; tss) {
		c ~= ts2.split(SPACE);
	}
	return c;
}

/**
 * 
 */
char[] toString(Token[] ts) {
	char[] rc;
	foreach (Token t; ts) {
		rc ~= t.toString();
	}
	return rc;
}

/**
 * 
 */
char[] toString(Token[][] tss) {
	char[] rc;
	char[] s = "";
	foreach (Token[] ts; tss) {
		rc ~= s ~ ts.toString();
		s = " ";
	}
	return rc;
}

int htoi(int c) {
	return ((c | 0x20) - '0') % ('W' - '0');
}
int htoi(char[] str) {
	int sum;
	foreach(char c; str)
		sum = sum * 16 + htoi(c);
	return sum;
}
/**
 * 
 */
int main() {
	char[][] ts =
		"NOP  ,p   ,00,"
		"LD   ,rr  ,10,"
		"ST   ,rarx,11,"
		"LAD  ,rarx,12,"
		"ADDA ,rr  ,20,"
		"SUBA ,rr  ,21,"
		"ADDL ,rr  ,22,"
		"SUBL ,rr  ,23,"
		"AND  ,rr  ,30,"
		"OR   ,rr  ,31,"
		"XOR  ,rr  ,32,"
		"CPA  ,rr  ,40,"
		"CPL  ,rr  ,41,"
		"SLA  ,rarx,50,"
		"SRA  ,rarx,51,"
		"SLL  ,rarx,52,"
		"SRL  ,rarx,53,"
		"JMI  ,arx ,61,"
		"JNZ  ,arx ,62,"
		"JZE  ,arx ,63,"
		"JUMP ,arx ,64,"
		"JPL  ,arx ,65,"
		"JOV  ,arx ,66,"
		"PUSH ,arx ,70,"
		"POP  ,r   ,71,"
		"CALL ,arx ,80,"
		"RET  ,p   ,81,"
		"SVC  ,arx ,F0,"
		"START,st  ,00,"
		"END  ,ed  ,00,"
		"DC   ,dc  ,00,"
		"DS   ,ds  ,00,"
		"IN   ,p   ,90,"
		"OUT  ,p   ,91,"
		"RPUSH,p   ,92,"
		"RPOP ,p   ,93".replace(" ","").split(",");
	void function(ubyte, Token[][],int[char[]])[char[]] fs;
	int  function(Token[][])[char[]] fls;
	ubyte[char[]] bs;
	for (int i = 0; i < ts.length; i += 3) {
		switch(ts[i + 1]) {
		case "p":    fs[ts[i]] = &p;    fls[ts[i]] = &pl;    break;
		case "rr":   fs[ts[i]] = &rr;   fls[ts[i]] = &rrl;   break;
		case "arx":  fs[ts[i]] = &arx;  fls[ts[i]] = &arxl;  break;
		case "rarx": fs[ts[i]] = &rarx; fls[ts[i]] = &rarxl; break;
		case "r":    fs[ts[i]] = &r;    fls[ts[i]] = &rl;    break;
		case "st":   fs[ts[i]] = &st;   fls[ts[i]] = &stl;   break;
		case "ed":   fs[ts[i]] = &ed;   fls[ts[i]] = &edl;   break;
		case "dc":   fs[ts[i]] = &dc;   fls[ts[i]] = &dcl;   break;
		case "ds":   fs[ts[i]] = &ds;   fls[ts[i]] = &dsl;   break;
		}
		bs[ts[i]] = htoi(ts[i + 2]);
	}
	Token[][][] cs = (cast(char[])read("asm.cas")).lines();
	int[char[]] symtable;
	int[char[]] symline;
	int address = 0;
	int symbolTableLength = 0;
	Token[][][] cs2;
	foreach (Token[][] c; cs) {
		if (c.length == 0) continue;
//		printf("// %.*s\n", c.toString());
		if (c[0].length > 0) {
			if (c[0][0].string in symtable)
				throw new Error("Label '" ~ c[0][0].string ~ "' already defined");
			symtable[c[0][0].string] = address;
			symline[c[0][0].string] = c[0][0].line;
			symbolTableLength += c[0][0].string.length + 5;
//			printf("// %8.*s: %04x\n", c[0][0].string, address);
		}
		if (c.length == 1) continue;
		char[] s = c[1][0].string;
		address += fls[s](c);
		if (s == "END") break;
	}
	if (!(startLabel in symtable)) {
		throw new Error("Start label '" ~ startLabel ~ "' doesn't defined");
	}
	printf("// file size\n");
	p('C');p('S');p('L');p('2');
	int fileSize = 4 + 4 + 2 + 2 + address + symbolTableLength + 1;
	pw((fileSize>>16) & 0xffff);
	pw(fileSize & 0xffff);
	ln();
	printf("// start address\n");
	pw(symtable[startLabel]);
	ln();
	printf("// length\n");
	pw(address);
	ln();
	foreach (Token[][] c; cs) {
		if (c.length == 0) continue;
		printf("// %.*s\n", c.toString());
		if (c.length == 1) continue;
		char[] s = c[1][0].string;
		fs[s](bs[s], c, symtable);
		ln();
		if (s == "END") break;
	}
	printf("// SYMBOL TABLE\n");
	foreach (char[] l, int a; symtable) {
		int line = symline[l];
		printf("// %8.*s: %04x (%d)\n", l, a, line);
		foreach(char c; l)p(c);
		p(0);
		pw(a);
		pw(line);
		ln();
	}
	p(0);
	write("out.o", cast(void[])output);
	return 0;
}

/**
 * 
 */
int regs(Token c) {
	char[][] regs = ["GR0", "GR1", "GR2", "GR3", "GR4", "GR5", "GR6", "GR7"];
	for (int i = 0; i < regs.length; i++) {
		if (c.type == ID && c.string == regs[i]) {
			return i;
		}
	}
	return -1;
}

/**
 * 
 */
void rr(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	
	int r[2];
	for (int i = 0; i < 2; i++) {
		r[i] = regs(cp[i][0]);
		if (r[i] == -1) return rarx(b, c, symtable);
	}
	p(b | 0x4);
	p((r[0] << 4) | r[1]);
}

int rrl(Token[][] c) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	
	int r[2];
	for (int i = 0; i < 2; i++) {
		r[i] = regs(cp[i][0]);
		if (r[i] == -1) return rarxl(c);
	}
	return 2;
}
/**
 * 
 */
void rarx(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	int r = regs(cp[0][0]);
	ushort a = getNumber(cp[1][0], symtable);
	int x = 0;
	if (cp.length > 2 && c[2].length > 0) {
		x = regs(cp[2][0]);
	}
	p(b);
	p((r << 4) | x);
	pw(a);
}
int rarxl(Token[][] c) {
	return 4;
}

int getNumber(Token c, int[char[]] symtable) {
	if (c.type == NUMBER) return c.data;
	if (c.type == ID) {
		if (c.string in symtable)
			return symtable[c.string];
		throw new Error("label " ~ c.string ~ " is undefined");
	}
	throw new Error("");
}

/**
 * 
 */
void arx(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	int r = 0;//regs(cp[0][0]);
	ushort a = getNumber(cp[0][0], symtable);
	int x = 0;
	if (cp.length > 2 && c[2].length > 0) {
		x = regs(cp[2][0]);
	}
	p(b);
	p((r << 4) | x);
	pw(a);
}
int arxl(Token[][] c) {
	return 4;
}
/**
 * 
 */
void r(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	int r = regs(cp[0][0]);
	int x = 0;

	p(b);
	p((r << 4) | x);
}
int rl(Token[][] c) {
	return 2;
}
/**
 * 
 */
void st(ubyte b, Token[][] c, int[char[]] symtable) {
}
char[] startLabel;
int stl(Token[][] c) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	startLabel = cp[0][0].string;
	return 0;
}
/**
 * 
 */
void ed(ubyte b, Token[][] c, int[char[]] symtable) {}
int edl(Token[][] c) { return 0; }
/**
 * 
 */
void dc(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	if (cp[0][0].type == STRING) {
		for (int i = 0; i < cp[0][0].string.length; i++) {
			pw(cp[0][0].string[i]);
		}
		return;
	}
	int a = getNumber(cp[0][0], symtable);
	pw(a);
}
int dcl(Token[][] c) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	if (cp[0][0].type == STRING) {
		return cp[0][0].string.length * 2;
	}
	return 2;
}
/**
 * 
 */
void ds(ubyte b, Token[][] c, int[char[]] symtable) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	if (cp[0][0].type != NUMBER)
		throw new Error("DS meirei no parameter ha suuji janaito dame desu");
	int a = cp[0][0].data;
	for (int i = 0; i < a; i++) {
		p(0);
	}
}
int dsl(Token[][] c) {
	Token[][] cp = c[2 .. c.length].join().split(CAMMA);
	if (cp[0][0].type != NUMBER)
		throw new Error("DS meirei no parameter ha suuji janaito dame desu");
	int a = cp[0][0].data;
	return a;
}
/**
 * 1バイト出力
 */
ubyte[] output;
void p(ubyte a, Token[][] c = null, int[char[]] symtable = null) {
	printf("0x%02x,", a);
	output ~= a;
}
int pl(Token[][] c) {
	return 1;
}
/**
 * 2バイト出力
 */
void pw(ushort w) {
	p((w >> 8) & 0xff);
	p(w & 0xff);
}
/**
 * 改行を表示する。
 */
void ln() {
	printf("\n");
}