changeset 6:74a6d629b78f

Added assembler. Removed hand-assembled version of hello world example
author Michael Pavone <pavone@retrodev.com>
date Sat, 26 Mar 2016 23:31:08 -0700
parents 18b66690ae13
children 8f9a05e2e425
files Makefile helloworld.bin src/asm.c src/cpu.c src/cpu.h
diffstat 5 files changed, 650 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat Mar 26 23:30:50 2016 -0700
+++ b/Makefile	Sat Mar 26 23:31:08 2016 -0700
@@ -12,6 +12,9 @@
 
 $(TARGETDIR)/s16 : $(TARGETDIR)/main.o $(TARGETDIR)/cpu.o
 	$(CC) -o $@ $^ $(LDFLAGS)
+	
+$(TARGETDIR)/asm : $(TARGETDIR)/asm.o $(TARGETDIR)/cpu.o
+		$(CC) -o $@ $^ $(LDFLAGS)
 
 $(TARGETDIR)/%.o : src/%.c
 	$(CC) $(CFLAGS) -c -o $@ $<
Binary file helloworld.bin has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/asm.c	Sat Mar 26 23:31:08 2016 -0700
@@ -0,0 +1,633 @@
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "cpu.h"
+
+typedef enum {
+	IMMED4,
+	IMMED8,
+	IMMEDHI,
+	BCCDST,
+	DCB,
+	DCW,
+	DCL
+} reftype;
+
+typedef struct {
+	uint16_t address;
+	reftype  type;
+} reference;
+
+
+typedef struct {
+	char      *name;
+	reference *references;
+	size_t    num_references;
+	size_t    reference_storage;
+	uint16_t  address;
+	uint8_t   valid;
+} label;
+
+typedef struct {
+	label  *labels;
+	size_t num_labels;
+	size_t label_storage;
+} label_meta;
+
+typedef struct {
+	int immed_min;
+	int immed_max;
+	uint16_t base;
+	uint8_t  first_shift;
+	uint8_t  second_shift;
+	uint8_t  expected_args;
+} inst_info;
+
+label *add_label(label_meta *labels, char *name, uint16_t address, uint8_t valid)
+{
+	if (labels->num_labels == labels->label_storage) {
+		labels->label_storage *= 2;
+		labels->labels = realloc(labels->labels, sizeof(label) * labels->label_storage);
+	}
+	labels->labels[labels->num_labels].name = strdup(name);
+	labels->labels[labels->num_labels].references = NULL;
+	labels->labels[labels->num_labels].address = address;
+	labels->labels[labels->num_labels].valid = valid;
+	labels->num_labels++;
+	return labels->labels + labels->num_labels - 1;
+}
+
+label *find_label(label_meta *meta, char *name)
+{
+	for (size_t i = 0; i < meta->num_labels; i++)
+	{
+		if (!strcmp(name, meta->labels[i].name)) {
+			return meta->labels + i;
+		}
+	}
+	return NULL;
+}
+
+uint16_t find_string_arr(char ** list, char *str, uint16_t num_entries)
+{
+	for (uint16_t i = 0; i < num_entries; i++)
+	{
+		if (!strcmp(list[i], str)) {
+			return i;
+		}
+	}
+	return num_entries;
+}
+
+inst_info find_mnemonic(char *mnemonic)
+{
+	uint16_t index = find_string_arr(mnemonics, mnemonic, SINGLE_SOURCE);
+	inst_info ret;
+	if (index < SINGLE_SOURCE) {
+		ret.base = index;
+		ret.first_shift = 4;
+		if (index == LDIM || index == LDIMH) {
+			ret.second_shift = 12;
+			ret.expected_args = 2;
+			ret.immed_min = -128;
+			ret.immed_max = 256;
+		} else {
+			ret.second_shift = 8;
+			ret.expected_args = 3;
+			ret.immed_min = ret.immed_max = 0;
+		}
+		return ret;
+	}
+	index = find_string_arr(mnemonics_single_src, mnemonic, SINGLE_REG);
+	if (index < SINGLE_REG) {
+		ret.base = index << 4 | SINGLE_SOURCE;
+		ret.first_shift = 8;
+		ret.second_shift = 12;
+		ret.expected_args = 2;
+		if (index >= INI) {
+			if (index >= ADDI) {
+				ret.immed_min = -8;
+				ret.immed_max = 8;
+			} else {
+				ret.immed_min = 0;
+				ret.immed_max = 15;
+			}
+		} else {
+			ret.immed_min = ret.immed_max = 0;
+		}
+		return ret;
+	}
+	index = find_string_arr(mnemonics_single_reg, mnemonic, SETENUM+1);
+	if (index > SETENUM) {
+		ret.base = 0xFFFF;
+		return ret;
+	}
+	ret.base = index << 8 | SINGLE_REG << 4 | SINGLE_SOURCE;
+	ret.immed_min = ret.immed_max = 0;
+	ret.first_shift = 12;
+	ret.second_shift = 0;
+	ret.expected_args = 1;
+	return ret;
+}
+
+void add_reference(label *label, uint16_t address, reftype type)
+{
+	if (!label->references) {
+		label->reference_storage = 4;
+		label->references = malloc(sizeof(reference) * label->reference_storage);
+		label->num_references = 0;
+	} else if (label->num_references == label->reference_storage) {
+		label->reference_storage *= 2;
+		label->references = realloc(label->references, sizeof(reference) * label->reference_storage);
+	}
+	
+	label->references[label->num_references].address = address;
+	label->references[label->num_references].type = type;
+	label->num_references++;
+}
+
+char * get_arg(char **pos)
+{
+	char *linebuf = *pos;
+	while (*linebuf && isspace(*linebuf) && *linebuf != ';')
+	{
+		linebuf++;
+	}
+	char * start = linebuf;
+	char * end = start;
+	while (*linebuf && *linebuf != ';' && *linebuf != ',')
+	{
+		if (!isspace(*linebuf)) {
+			end = linebuf+1;
+		}
+		linebuf++;
+	}
+	if (start == end) {
+		return NULL;
+	}
+	if (*end) {
+		if (*linebuf == ',') {
+			linebuf++;
+		} else {
+			linebuf = end;
+		}
+		*end = 0;
+	}
+	*pos = linebuf;
+	return start;
+}
+
+void free_labels (label_meta *meta)
+{
+	for (size_t i = 0; i < meta->num_labels; i++)
+	{
+		free(meta->labels[i].name);
+		if(meta->labels[i].references) {
+			free(meta->labels[i].references);
+		}
+	}
+	free(meta->labels);
+}
+
+int handle_dc(char size, char *linebuf, uint8_t *outbuf, uint16_t *pc, label_meta *meta)
+{
+	char *arg;
+	long value;
+	char *start = linebuf;
+	char *orig = strdup(linebuf);
+	int in_string = 0;
+	while ((arg = in_string ? linebuf : get_arg(&linebuf)))
+	{
+		//TODO: actual error checking
+		if (arg[0] == '$' || (arg[0] == '0' && arg[1] == 'x')) {
+			value = strtol(arg, NULL, 16);
+		} else if (arg[0] >= '0' && arg[0] <= '9') {
+			value = strtol(arg, NULL, 10);
+		} else if (arg[0] == '"') {
+			if (arg[1] == '"') {
+				//emtpy string or end of string
+				in_string = 0;
+				continue;
+			}
+			if (arg[1] == '\\' && arg[2]) {
+				switch(arg[2])
+				{
+				case 'n':
+					value = '\n';
+					break;
+				case 't':
+					value = '\t';
+					break;
+				case 'r':
+					value = '\r';
+					break;
+				case '"':
+				case '\\':
+					value = arg[2];
+					break;
+				default:
+					fprintf(stderr, "WARNING: Unrecognized escape char %c\n", arg[2]);
+					value = arg[2];
+					break;
+				}
+				arg++;
+			} else {
+				value = arg[1];
+			}
+			in_string = 1;
+			arg[1] = '"';
+			linebuf = arg+1;
+			int len = strlen(linebuf);
+			//undo termination done by get_arg
+			linebuf[len] = orig[len + linebuf-start];
+		} else {
+			label *l = find_label(meta, arg);
+			if (!l) {
+				l = add_label(meta, arg, 0, 0);
+			}
+			if (l->valid) {
+				value = l->address;
+			} else {
+				value = 0;
+				add_reference(l, *pc, size == 'b' ? DCB : size == 'w' ? DCW : DCL);
+			}
+		}
+		switch (size)
+		{
+		case 'b':
+			if (value < -128 || value > 255) {
+				fprintf(stderr, "WARNING: %s is too large to fit in a byte\n", arg);
+			}
+			if (*pc >= 48 * 1024) {
+				fputs("ERROR: Hit end of ROM space\n", stderr);
+				free(orig);
+				return 0;
+			}
+			outbuf[(*pc)++] = value;
+			break;
+		case 'w':
+			if (value < -32768 || value > 65535) {
+				fprintf(stderr, "WARNING: %s is too large to fit in a word\n", arg);
+			}
+			if (*pc >= 48 * 1024 - 1) {
+				fputs("ERROR: Hit end of ROM space\n", stderr);
+				free(orig);
+				return 0;
+			}
+			outbuf[(*pc)++] = value >> 8;
+			outbuf[(*pc)++] = value;
+			break;
+		case 'l':
+			if (*pc >= 48 * 1024 - 3) {
+				fputs("ERROR: Hit end of ROM space\n", stderr);
+				free(orig);
+				return 0;
+			}
+			outbuf[(*pc)++] = value >> 24;
+			outbuf[(*pc)++] = value >> 16;
+			outbuf[(*pc)++] = value >> 8;
+			outbuf[(*pc)++] = value;
+			break;
+		}
+	}
+	free(orig);
+	return 1;
+}
+
+int process_arg(uint16_t *inst, char *arg, int arg_shift, int immed_min, int immed_max, label_meta *meta, uint16_t pc)
+{
+	long value;
+	if (arg[0] == 'r' && arg[1] >= '0' && arg[1] <= '9' && (arg[2] == 0 || arg[3] == 0)) {
+		//posible register
+		value = strtol(arg+1, NULL, 10);
+		if (value >= 0 && value < 16) {
+			*inst |= value << arg_shift;
+			return 1;
+		}
+	}
+	if (!strcmp(arg, "pc")) {
+		*inst |= REG_PC << arg_shift;
+	}
+	if (!strcmp(arg, "sr")) {
+		*inst |= REG_SR << arg_shift;
+	}
+	if (immed_min == immed_max) {
+		fprintf(stderr, "ERROR: Non-register argument %s where a register is required\n", arg);
+		return 0;
+	}
+	
+	//TODO: actual error checking
+	if (arg[0] == '$' || (arg[0] == '0' && arg[1] == 'x')) {
+		value = strtol(arg, NULL, 16);
+	} else if (arg[0] >= '0' && arg[0] <= '9') {
+		value = strtol(arg, NULL, 10);
+	} else {
+		label *l = find_label(meta, arg);
+		if (!l) {
+			l = add_label(meta, arg, 0, 0);
+		}
+		if (l->valid) {
+			value = l->address;
+		} else {
+			value = 0;
+			add_reference(l, pc, (*inst & 0xF) == LDIMH ? IMMEDHI : (*inst & 0xF) == LDIM ? IMMED8 : IMMED4);
+		}
+	}
+	if (value > immed_max || value < immed_min) {
+		fprintf(stderr, "WARNING: %s is too big to fit in an %s\n", arg, (*inst & 0xF) <= LDIMH ? "byte" : "nibble");
+	}
+	if (immed_max == 8) {
+		if (value == 8) {
+			value = 0;
+		}
+		value &= 0xF;
+	} else {
+		value &= 0xFF;
+	}
+	*inst |= value << arg_shift;
+	return 1;
+}
+
+char * condition_names[] = {
+	"ra", "rn", "eq", "ne", "mi", "pl", "cs", "cc", "gr", "le"
+};
+
+int handle_bcc(char *cc, char *args, uint8_t *outbuf, uint16_t pc, label_meta *meta)
+{
+	uint16_t intcc = find_string_arr(condition_names, cc, COND_LEQ+1);
+	if (intcc > COND_LEQ) {
+		fprintf(stderr, "ERROR: Invalid condition code %s\n", cc);
+		return 0;
+	}
+	char *dest = get_arg(&args);
+	if (!dest) {
+		fprintf(stderr, "ERROR: Missing argument to b%s\n", cc);
+		return 0;
+	}
+	char *extra = get_arg(&args);
+	if (extra) {
+		fprintf(stderr, "ERROR: Extra argument %s to b%s\n", extra, cc);
+	}
+	label *l = find_label(meta, dest);
+	if (!l) {
+		l = add_label(meta, dest, 0, 0);
+		add_reference(l, pc, BCCDST);
+	}
+	uint16_t dest_addr = l->valid ? l->address : pc + 4;
+	if (dest_addr & 1) {
+		fprintf(stderr, "ERROR: Label %s refers to odd address %X which is illegal for b%s\n", dest, dest_addr, cc);
+		return 0;
+	}
+	int32_t diff = dest_addr - (pc + 4);
+	if (diff < -512 || diff > 510) {
+		fprintf(stderr, "ERROR: Label %s is out of range for b%s\n", dest, cc);
+		return 0;
+	}
+	diff &= 0x1FE;
+	uint16_t inst = BCC | diff << 3 | intcc << 12;
+	outbuf[pc] = inst >> 8;
+	outbuf[pc+1] = inst;
+	return 1;
+}
+
+uint8_t assemble_file(FILE *input, FILE *output)
+{
+	//fixed size buffers are lame, but so are lines longer than 4K characters
+	//this is good enough for the really simple first version
+	char linebuf[4096];
+	//maximum program size is 48KB
+	uint8_t outbuf[48*1024];
+	uint16_t pc = 0;
+	
+	size_t num_labels = 0;
+	size_t label_storage = 1024;
+	label_meta labels = {
+		.labels = malloc(sizeof(label) * 1024),
+		.label_storage = 1024,
+		.num_labels = 0
+	};
+	int line = 0;
+	
+	while (fgets(linebuf, sizeof(linebuf), input) && pc < sizeof(outbuf)/sizeof(uint16_t))
+	{
+		line++;
+		char *lname = NULL;
+		char *cur = linebuf;
+		if (!isspace(*cur)) {
+			lname = cur;
+			while(*cur && *cur != ':' && !isspace(*cur))
+			{
+				cur++;
+			}
+			if (*cur) {
+				*cur = 0;
+				cur++;
+			}
+		}
+		while (*cur && isspace(*cur))
+		{
+			cur++;
+		}
+		if (!*cur || *cur == ';') {
+			if (lname) {
+				label *l = find_label(&labels, lname);
+				if (l) {
+					l->address = pc;
+					l->valid = 1;
+				} else {
+					add_label(&labels, lname, pc, 1);
+				}
+			}
+			continue;
+		}
+		char *mnemonic = cur;
+		while (*cur && !isspace(*cur) && *cur != ';')
+		{
+			cur++;
+		}
+		if (!*cur || *cur == ';') {
+			*cur = 0;
+			fprintf(stderr, "Missing arguments to instruction %s on line %d\n", mnemonic, line);
+			goto error;
+		}
+		*cur = 0;
+		cur++;
+		if (!strncmp(mnemonic, "dc.", 3) && (mnemonic[3] == 'b' || mnemonic[3] == 'w' || mnemonic[3] == 'l')) {
+			if (mnemonic[3] != 'b' && pc & 1) {
+				outbuf[pc] = 0;
+				pc++;
+			}
+			if (lname) {
+				label *l = find_label(&labels, lname);
+				if (l) {
+					l->address = pc;
+					l->valid = 1;
+				} else {
+					add_label(&labels, lname, pc, 1);
+				}
+			}
+			if (!handle_dc(mnemonic[3], cur, outbuf, &pc, &labels)) {
+				goto error;
+			}
+			continue;
+		}
+		//automatically align to word boundary
+		if (pc & 1) {
+			outbuf[pc] = 0;
+			pc++;
+		}
+		if (lname) {
+			label *l = find_label(&labels, lname);
+			if (l) {
+				l->address = pc;
+				l->valid = 1;
+			} else {
+				add_label(&labels, lname, pc, 1);
+			}
+		}
+		if (mnemonic[0] == 'b' && strlen(mnemonic) == 3) {
+			if (!handle_bcc(mnemonic + 1, cur, outbuf, pc, &labels)) {
+				goto error;
+			}
+			pc+=2;
+			continue;
+		}
+		char *firstarg = get_arg(&cur);
+		
+		if (!firstarg) {
+			fprintf(stderr, "Missing arguments to instruction %s on line %d\n", mnemonic, line);
+			goto error;
+		}
+		char *secondarg = get_arg(&cur);
+		char *thirdarg;
+		int num_args;
+		if (secondarg) {
+			thirdarg = get_arg(&cur);
+			num_args = thirdarg ? 3 : 2;
+		} else {
+			thirdarg = NULL;
+			num_args = 1;
+		}
+		
+		inst_info inf = find_mnemonic(mnemonic);
+		if (inf.base == 0xFFFF) {
+			fprintf(stderr, "Invalid mnemonic %s on line %d\n", mnemonic, line);
+			goto error;
+		}
+		if (inf.expected_args != num_args) {
+			fprintf(stderr, "Instruction %s expects %d args, but %d were given on line %d\n", mnemonic, inf.expected_args, num_args, line);
+			goto error;
+		}
+		
+		uint16_t inst = inf.base;
+		if (!process_arg(&inst, firstarg, inf.first_shift, inf.immed_min, inf.immed_max, &labels, pc)) {
+			goto error;
+		}
+		if (secondarg) {
+			if (!process_arg(&inst, secondarg, inf.second_shift, 0, 0, &labels, pc)) {
+				goto error;
+			}
+			if (thirdarg) {
+				if (!process_arg(&inst, thirdarg, inf.second_shift+4, 0, 0, &labels, pc)) {
+					goto error;
+				}
+			}
+		}
+		outbuf[pc++] = inst >> 8;
+		outbuf[pc++] = inst;
+	}
+	for (int i = 0; i < labels.num_labels; i++)
+	{
+		if (labels.labels[i].references) {
+			if (!labels.labels[i].valid) {
+				fprintf(stderr, "ERROR: label %s is used but not defined\n", labels.labels[i].name);
+				goto error;
+			}
+			uint16_t address = labels.labels[i].address;
+			for(reference *ref = labels.labels[i].references; 
+			    ref < labels.labels[i].references + labels.labels[i].num_references;
+				ref++
+			)
+			{
+				//TODO: Warn when addresses don't fit
+				switch(ref->type)
+				{
+				case IMMED4:
+					if (address == 8) {
+						address = 0;
+					}
+					outbuf[ref->address] |= address & 0xF;
+					break;
+				case IMMED8:
+					outbuf[ref->address] |= (address & 0xF0) >> 4;
+					outbuf[ref->address+1] |= (address & 0xF) << 4;
+					break;
+				case IMMEDHI:
+					outbuf[ref->address] |= (address & 0xF000) >> 12;
+					outbuf[ref->address+1] |= (address & 0xF00) >> 4; 
+					break;
+				case BCCDST: {
+					if (address & 1) {
+						fprintf(stderr, "ERROR: Label %s refers to odd address %X which is illegal for bcc\n", labels.labels[i].name, address);
+						goto error;
+					}
+					int diff = address - (ref->address + 4);
+					if (diff < -512 || diff > 510) {
+						fprintf(stderr, "ERROR: Label %s has address %X which is out of range of bcc at %X\n", labels.labels[i].name, address, ref->address);
+					}
+					outbuf[ref->address] |= (diff & 0x1E0) >> 5;
+					outbuf[ref->address+1] |= (diff & 0x01E) << 3;
+					break;
+				}
+				case DCB:
+					outbuf[ref->address] = address;
+					break;
+				case DCW:
+					outbuf[ref->address] = address >> 8;
+					outbuf[ref->address+1] = address;
+					break;
+				case DCL:
+					outbuf[ref->address] = 0;
+					outbuf[ref->address+1] = 0;
+					outbuf[ref->address+2] = address >> 8;
+					outbuf[ref->address+3] = address;
+					break;
+				}
+			}
+		}
+	}
+	if (pc == fwrite(outbuf, 1, pc, output)) {
+		free_labels(&labels);
+		return 1;
+	}
+	fputs("Error writing to output file\n", stderr);
+error:
+	free_labels(&labels);
+	return 0;
+}
+
+
+int main(int argc, char ** argv)
+{
+	if (argc < 3) {
+		fputs("Usage: asm INFILE OUTFILE\n", stderr);
+		return 1;
+	}
+	FILE *infile = strcmp("-", argv[1]) ? fopen(argv[1], "r") : stdin;
+	if (!infile) {
+		fprintf(stderr, "Failed to open %s for reading\n", argv[1]);
+		return 1;
+	}
+	FILE *outfile = strcmp("-", argv[2]) ? fopen(argv[2], "w") : stdout;
+	if (!outfile) {
+		fprintf(stderr, "Failed to open %s for writing\n", argv[2]);
+		return 1;
+	}
+	int ret = assemble_file(infile, outfile);
+	fclose(infile);
+	fclose(outfile);
+	return ret;
+}
--- a/src/cpu.c	Sat Mar 26 23:30:50 2016 -0700
+++ b/src/cpu.c	Sat Mar 26 23:31:08 2016 -0700
@@ -23,9 +23,6 @@
 #define FLAG_C 8
 #define FLAG_N 16
 
-#define REG_PC 14
-#define REG_SR 15
-
 cpu* alloc_cpu(uint32_t clock_divider, uint32_t num_regions, memory_region *regions)
 {
 	size_t alloc_size = sizeof(cpu) + sizeof(memory_region) * num_regions;
@@ -375,6 +372,14 @@
 	"ldim", "ldimh", "ld8", "ld16", "str8", "str16", "add", "adc", "and", "or", "xor", "lsl", "lsr", "asr", "bcc", "single"
 };
 
+char * mnemonics_single_src[] = {
+	"mov", "neg", "not", "cmp", "call", "swap", "in", "out", "ini", "outi", "addi", "andi", "ori", "lsi", "cmpi", "single reg"
+};
+
+char * mnemonics_single_reg[] = {
+	"reti", "trap", "trapi", "getepc", "setepc", "getesr", "setesr", "getenum", "setenum", "setuer", "getuer"
+};
+
 void run_instruction(cpu *context)
 {
 	uint16_t instruction = context->prefetch;
--- a/src/cpu.h	Sat Mar 26 23:30:50 2016 -0700
+++ b/src/cpu.h	Sat Mar 26 23:31:08 2016 -0700
@@ -45,6 +45,9 @@
 
 cpu* alloc_cpu(uint32_t clock_divider, uint32_t num_regions, memory_region *regions);
 void run_cpu(cpu *context, uint32_t target_cycle);
+extern char * mnemonics[];
+extern char * mnemonics_single_src[];
+extern char * mnemonics_single_reg[];
 
 enum {
 	LDIM,
@@ -111,4 +114,7 @@
 	COND_LEQ
 };
 
+#define REG_PC 14
+#define REG_SR 15
+
 #endif //CPU_H_