changeset 43:6e7bfe83d2b0

Changed the design to vastly simplify the video hardware and support a 23-bit address space on the CPU
author Michael Pavone <pavone@retrodev.com>
date Sat, 27 Aug 2016 22:38:31 -0700
parents a64e11e48a41
children 718aaedc4582
files bgcolor.s16 simple_console.txt src/cpu.c src/cpu.h src/main.c src/mnemonics.c src/vdp.c src/vdp.h
diffstat 8 files changed, 238 insertions(+), 399 deletions(-) [+]
line wrap: on
line diff
--- a/bgcolor.s16	Mon Apr 11 23:35:51 2016 -0700
+++ b/bgcolor.s16	Sat Aug 27 22:38:31 2016 -0700
@@ -8,22 +8,28 @@
 	;r6 = right edge
 	;r7 = counter
 	;r8 = secondary counter
-	ldim 248, r4
+	ldim 1, r0
+	ldim 240, r4
 	ldimh 0, r4
 	;320 == $140
 	ldim $40, r6
 	ldimh 1, r6
 top
-	ldim 0, r0
-	ldimh $FF, r0
 	ldim 0, r1
 	ldim 1, r2
-	ldim 8, r3
+	ldim 5, r3
+	ldimh 1, r3
 	ldim 16, r7
 	ldim 0, r8
 	
+waitfirst
+	ini $C, r5
+	cmp r5, r3
+	bne waitfirst
+	ldim -1, r3
+	bra waithblank
+	
 linestart
-	outi 14, r0
 waitline
 	ini $C, r5
 	cmp r5, r3
@@ -34,7 +40,8 @@
 	cmp r5, r6
 	bgr waithblank
 	
-	outi 15, r1
+	outi 14, r0
+	outi 14, r1
 	add r2, r1, r1
 	addi 2, r3
 	
--- a/simple_console.txt	Mon Apr 11 23:35:51 2016 -0700
+++ b/simple_console.txt	Sat Aug 27 22:38:31 2016 -0700
@@ -43,13 +43,15 @@
 	A = register containing pointer to function
 	D = register that will store PC value
 5: swap
-6: in
-7: out
-8: ini
-9: outi
-A: addi
-B: andi
-C: ori
+6: longjmp
+	A = register containing pointer to function
+	D = register containing new code segment value
+7: ini
+8: outi
+9: addi
+A: andi
+B: ori
+C: xori
 D: ls[lr]i
 	MSB of AAAA determines direction
 	LS 3 bits determines magnitude
@@ -60,21 +62,21 @@
 DDDD OOOO 1111 1111  
 
 0: reti - return from interrupt, D = register to restore from uer
-1: trap
-2: trapi
-3: getepc
-4: setepc
-5: getesr
-6: setesr
-7: getenum
-8: setenum
-9: getuer
-A: setuer
-B: invalid
-C: invalid
-D: invalid
-E: invalid
-F: invalid
+1: trapi
+2: getepc
+3: setepc
+4: getepch
+5: setepch
+6: getesr
+7: setesr
+8: getenum
+9: setenum
+A: getuer
+B: setuer
+C: getvbr
+D: setvbr
+E: getdatabanks
+F: setdatabanks
 
 
 Registers:
@@ -86,9 +88,11 @@
 
 Special Registers
 epc - Exception PC - Stores PC value to resume to when entering an exception handler
+epch - High 7-bits of PC
 esr - Exception SR - same as above, but for SR
 eur - Exceptuion User Reg - reg for temporary storage of a reg in a handler, intended to be used for the stack pointer
 enum - Exception Number - holds the number of the most recent exception
+databanks - Holds the upper bytes used for data accesses (MSB = byte for upper half of memory space, LSB = byte for lower half of memory space)
 
 IO: Ports
 
@@ -101,7 +105,7 @@
 	Load value for a 16-bit down-counter
 	Polarity of output is switched on transition from 1 to 0
 	Value is loaded on cycles where counter is 0
-	Special case value of 0 in this register forces polarity to positive
+	Special case value of 0 in this register forces polarity to positive, useful for PCM playback
 5: Channel B Freq
 6: Channel C Freq
 7: Channel D Freq
@@ -112,82 +116,46 @@
 A: Timer Freq
 B: "Serial" Debug Port
 
-C: Write Vertical Scroll : Read Vertical Position
-	MMMM MCCC CCCC CFFF
-	C = coarse scroll bit
-	F = fine scroll bit
-	M = mask bit
-		controls which bits come directly from register
-		and which bits come from the sum of the register
-		and the current line number
-		
-D: Write Horizontal Scroll : Read Horizontal Position
-	xxxx xxxC CCCC CFFF
-	C = coarse scroll bit
-	F = fine scroll bit
-	x = unused
-E: Write Dest Address : Read Status
-F: Write Data : Read ???
+C: Framebuffer start offset : Read Vertical Position
+D: Graphics mode : Read Horizontal Position
+	EDVP PSSS SSEE EEEF
+	E = 0 display disabled, 1 display enabled
+	D = 0 4 bpp, 1 8bpp
+	F = 0 front buffer is lower 64K, 1 front buffer is upper 64K
+	P = palette select
+	S = blanked lines at start of frame
+	E = blanked lines at end of frame
+E: CRAM update port : Read Status
+	1st write determines destination and length
+	DDDD DDDD LLLL LLLL
+	Next L writes are written to D and subsequent addresses
+
+F: Reserved
 
 
-Name Table Start Address:
-	0VVV VVVV VHHH HHH0
-	
-	V = Coarse scroll bit from vertical scroll value
-	H = Coarse scroll bit from horizontal scroll value
+Video Hardware:
+	128KB of VRAM organized into two 64KB linear framebuffers
+	Pixels are 4bpp or 8bbp in chunky format
+	256 words of 16-bit CRAM
+	In 4bpp mode, P field of graphics mode selects one of 4, 16 color palettes
+	S and E fields of mode allow hardware letter boxing, effectively increasing the length of vblank
+	F field selects which 64KB framebuffer is used for the active display
+	Active buffer is inaccessible to CPU, but CPU has free reign over inactive buffer
+	H-Counter goes from 0-415 and then wraps back to zero
+	V-Counter goes from 0-261 and then wraps back to zero
+	V-Counter increments when H-Counter wraps
 	
-VDP Memory Map
-0000 - 7FFF = Pattern/Name Table RAM
-8000 - FDFF = unused, returns $FFFF
-FE00 - FEFF = Sprite RAM
-FF00 - FF7F = Palette RAM
-FF80 - FFFF = Palette RAM (mirror)
-
-Another View -
-	MSB determines whether destination is main RAM
-	For MSB 1
-		Next 7 MSB determines which special RAM is the destination
-		Only values of 7E and 7F are valid for these bits at present
-		8 LSB determine offset (1 LSB ignored)
-		
-Writing to Data port puts value into a one word FIFO
-Offset within memory type is increment when word is pulled from FIFO
-Writing to the FIFO when it is full will cause the existing entry to be overwritten
-
+Memory Map
+	23-bit address space
+	$0      - $3FFFFF - ROM
+	$400000 - $4FFFFF - 128K RAM, mirrored every 128KB
+	$500000 - $6FFFFF - Reserved
+	$700000 - $7FFFFF - 64KB VRAM back buffer, mirrored every 64KB
 
-Pattern Format:
-4bpp in an 8x8 tile arrangement
-
-
-Sprite Table Entry:
-
-XXXXXXXX YYYYYYYY
-IXPVHSNN NNNNNNNN
-
-S: size -- 0 = 8x8, 1 = 16x16
-P: Palette selector
-H: Horizontal flip
-V: Vertical flip
-I: Priority
-N: Name
-X: X position (left of screen = 16, right of screen = ?)
-Y: Y position (top of screen = 16, bottom of screen = 240)
-
-
-Name Table Entry:
-
-IPPVHxNN NNNNNNNN
-
-P: Palette selector
-H: Horizontal flip
-V: Vertical flip
-I: Priority
-N: Name
-x: Unused, should be set to 0
 
 26.112 MHZ Clock
 Dot Clock Divider 4
-CPU Clock Divider 20 (assuming 1 cycle/instruction, 5 for 4 cycles/instruction)
+CPU Clock Divider 4 (assuming 1 cycle/instruction, 1 for 4 cycles/instruction)
 Audio Timer Divider 34
 Audio Output Divider 544
 Interrupt Timer Divider 32
@@ -195,19 +163,3 @@
 Alternatively 13.056 Mhz clock and cut the dividers in half
 
 
-
-H-Counter goes from 0-415 and then wraps back to zero
-V-Counter goes from 0-261 and then wraps back to zero
-V-Counter increments when H-Counter wraps
-
-Rendering Process:
-208 main VRAM access slots
-123 slots for background
-160-123 = 37 extra slots during active display
-          48 slots during inactive display
-80 slots for sprite rendering
-5 slots remain for refresh or external access
-		  
-Since sprite rendering needs to intrude on active display period,
-a double buffered line buffer is needed. Useful for VGA compatibility anyway
-
--- a/src/cpu.c	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/cpu.c	Sat Aug 27 22:38:31 2016 -0700
@@ -35,7 +35,7 @@
 	return context;
 }
 
-uint16_t cpu_read_16(cpu *context, uint16_t address)
+uint16_t cpu_read_16(cpu *context, uint32_t address)
 {
 	context->cycles += context->clock_inc;
 	if (address & 1) {
@@ -53,7 +53,7 @@
 	return 0xFFFF;
 }
 
-uint8_t cpu_read_8(cpu *context, uint16_t address)
+uint8_t cpu_read_8(cpu *context, uint32_t address)
 {
 	context->cycles += context->clock_inc;
 	memory_region *cur = context->mem_regions;
@@ -66,7 +66,7 @@
 	return 0xFF;
 }
 
-void cpu_write_16(cpu *context, uint16_t address, uint16_t value)
+void cpu_write_16(cpu *context, uint32_t address, uint16_t value)
 {
 	context->cycles += context->clock_inc;
 	if (address & 1) {
@@ -85,7 +85,7 @@
 	}
 }
 
-void cpu_write_8(cpu *context, uint16_t address, uint8_t value)
+void cpu_write_8(cpu *context, uint32_t address, uint8_t value)
 {
 	context->cycles += context->clock_inc;
 	memory_region *cur = context->mem_regions;
@@ -119,7 +119,7 @@
 
 void fetch_instruction(cpu *context)
 {
-	context->prefetch = cpu_read_16(context, context->regs[REG_PC]);
+	context->prefetch = cpu_read_16(context, context->pc_msb | context->regs[REG_PC]);
 	context->regs[REG_PC] += 2;
 	context->state = STATE_NORMAL;
 }
@@ -127,9 +127,11 @@
 void vector_fetch(cpu *context)
 {
 	context->exception_pc = context->regs[REG_PC] - 2;
+	context->exception_pch = context->pc_msb >> 16;
 	context->exception_sr = context->regs[REG_SR];
 	context->regs[REG_SR] &= ~(STATUS_INT0_ENABLE | STATUS_INT1_ENABLE);
 	context->regs[REG_PC] = cpu_read_16(context, context->vector_base + context->exception * 2);
+	context->pc_msb = 0;
 	context->state = STATE_NEED_FETCH;
 }
 
@@ -236,10 +238,6 @@
 		context->regs[REG_SR] = context->exception_sr;
 		context->state = STATE_NEED_FETCH;
 		return;
-	case TRAP:
-		context->state = STATE_EXCEPTION_START;
-		context->exception = context->regs[dst];
-		return;
 	case TRAPI:
 		context->state = STATE_EXCEPTION_START;
 		context->exception = dst;
@@ -274,6 +272,13 @@
 	case SETVBR:
 		context->vector_base = context->regs[dst];
 		break;
+	case GETDATABANKS:
+		context->regs[dst] = context->data_high_msb >> 7 | context->data_low_msb >> 15;
+		break;
+	case SETDATABANKS:
+		context->data_high_msb = (context->regs[dst] & 0xFF00) << 7;
+		context->data_low_msb = (context->regs[dst] & 0xFF) << 15;
+		break;
 	default:
 		context->state = STATE_EXCEPTION_START;
 		context->exception = EXCEPTION_INVALID_INSTRUCTION;
@@ -321,12 +326,11 @@
 			return;
 		}
 		break;
-	case IN:
-		context->regs[dst] = cpu_read_port(context, context->regs[a]);
+	case LONGJMP:
+		context->regs[REG_PC] = context->regs[a];
+		context->pc_msb = (context->regs[dst] & 0x7F) << 16;
+		context->state = STATE_NEED_FETCH;
 		break;
-	case OUT:
-		cpu_write_port(context, context->regs[a], context->regs[dst]);
-		return;
 	case INI:
 		context->regs[dst] = cpu_read_port(context, a);
 		break;
@@ -346,6 +350,10 @@
 		context->regs[dst] = context->regs[dst] | format_immediate(a);
 		update_flags_bitwise(context, context->regs[dst]);
 		break;
+	case XORI:
+		context->regs[dst] = context->regs[dst] ^ format_immediate(a);
+		update_flags_bitwise(context, context->regs[dst]);
+		break;
 	case LSI:
 		shift = a & 7;
 		if (!shift) {
@@ -377,6 +385,19 @@
 	}
 }
 
+uint32_t get_data_address(cpu *context, uint8_t a, uint8_t b)
+{
+	uint32_t address = context->regs[a] + context->regs[b];
+	if (a == REG_PC || b == REG_PC) {
+		address |= context->pc_msb;
+	} else if (address & 0x8000) {
+		address = (address & 0x7FFF) | context->data_high_msb;
+	} else {
+		address |= context->data_low_msb;
+	}
+	return address;
+}
+
 void run_instruction(cpu *context)
 {
 	uint16_t instruction = context->prefetch;
@@ -396,16 +417,16 @@
 		context->regs[dst] |= a << 12 | b << 8;
 		break;
 	case LD8:
-		context->regs[dst] = cpu_read_8(context, context->regs[a] + context->regs[b]);
+		context->regs[dst] = cpu_read_8(context, get_data_address(context, a, b));
 		break;
 	case LD16:
-		context->regs[dst] = cpu_read_16(context, context->regs[a] + context->regs[b]);
+		context->regs[dst] = cpu_read_16(context, get_data_address(context, a, b));
 		break;
 	case STR8:
-		cpu_write_8(context, context->regs[a] + context->regs[b], context->regs[dst]);
+		cpu_write_8(context, get_data_address(context, a, b), context->regs[dst]);
 		return;
 	case STR16:
-		cpu_write_16(context, context->regs[a] + context->regs[b], context->regs[dst]);
+		cpu_write_16(context,get_data_address(context, a, b), context->regs[dst]);
 		return;
 	case ADD:
 		tmp = context->regs[a] + context->regs[b];
--- a/src/cpu.h	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/cpu.h	Sat Aug 27 22:38:31 2016 -0700
@@ -18,8 +18,9 @@
 
 typedef struct {
 	uint8_t  *base;
-	uint16_t start;
-	uint16_t end;
+	uint32_t start;
+	uint32_t end;
+	uint32_t mask;
 	uint8_t  flags;
 } memory_region;
 
@@ -33,9 +34,13 @@
 	uint16_t regs[16];
 	uint16_t exception;
 	uint16_t exception_pc;
+	uint16_t exception_pch;
 	uint16_t exception_sr;
 	uint16_t exception_ur;
 	uint16_t vector_base;
+	uint32_t pc_msb;
+	uint32_t data_low_msb;
+	uint32_t data_high_msb;
 	
 	uint16_t prefetch;
 	
@@ -83,13 +88,13 @@
 	CMP,
 	CALL,
 	SWAP,
-	IN,
-	OUT,
+	LONGJMP,
 	INI,
 	OUTI,
 	ADDI,
 	ANDI,
 	ORI,
+	XORI,
 	LSI,
 	CMPI,
 	SINGLE_REG
@@ -97,10 +102,11 @@
 
 enum {
 	RETI,
-	TRAP,
 	TRAPI,
 	GETEPC,
 	SETEPC,
+	GETEPCH,
+	SETEPCH,
 	GETESR,
 	SETESR,
 	GETEUR,
@@ -108,7 +114,9 @@
 	GETENUM,
 	SETENUM,
 	GETVBR,
-	SETVBR
+	SETVBR,
+	GETDATABANKS,
+	SETDATABANKS,
 };
 
 enum {
--- a/src/main.c	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/main.c	Sat Aug 27 22:38:31 2016 -0700
@@ -12,8 +12,8 @@
 #define CYCLES_PER_FRAME (832*262)
 #define MASTER_CLOCK 13056000
 
-uint8_t rom[48 * 1024];
-uint8_t ram[16 * 1024];
+uint8_t rom[4 * 1024 * 1024];
+uint8_t ram[128 * 1024];
 
 enum {
 	PORT_CONTROLLER_1,
@@ -28,10 +28,10 @@
 	PORT_VOLUME_CD,
 	PORT_TIMER,
 	PORT_SERIAL,
-	PORT_VERTICAL,
-	PORT_HORIZONTAL,
-	PORT_VRAM_ADDRESS,
-	PORT_VRAM_DATA
+	PORT_START_OFFSET,
+	PORT_VIDEO_MODE,
+	PORT_CRAM,
+	RESERVED_3
 };
 
 typedef struct {
@@ -52,55 +52,49 @@
 	return getchar();
 }
 
-void vertical_port_write(cpu *context, uint8_t port, uint16_t value)
+void offset_port_write(cpu *context, uint8_t port, uint16_t value)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
-	system->video.vscroll = value;
+	system->video.start_offset = value;
 }
 
-uint16_t vertical_port_read(cpu *context, uint8_t port)
+uint16_t offset_port_read(cpu *context, uint8_t port)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
 	return system->video.vcounter;
 }
 
-void horizontal_port_write(cpu *context, uint8_t port, uint16_t value)
+void mode_port_write(cpu *context, uint8_t port, uint16_t value)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
-	vdp_write_hscroll(&system->video, value);
+	vdp_write_mode(&system->video, value);
+	context->mem_regions[2].base = vdp_get_back_buffer(&system->video);
 }
 
-uint16_t horizontal_port_read(cpu *context, uint8_t port)
+uint16_t mode_port_read(cpu *context, uint8_t port)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
 	return system->video.hcounter;
 }
 
-void address_port_write(cpu *context, uint8_t port, uint16_t value)
+void cram_port_write(cpu *context, uint8_t port, uint16_t value)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
-	vdp_write_address(&system->video, value);
+	vdp_write_cram(&system->video, value);
 }
 
-uint16_t address_port_read(cpu *context, uint8_t port)
+uint16_t cram_port_read(cpu *context, uint8_t port)
 {
 	console *system = context->system;
 	vdp_run(&system->video, context->cycles);
 	return system->video.status;
 }
 
-void data_port_write(cpu *context, uint8_t port, uint16_t value)
-{
-	console *system = context->system;
-	vdp_run(&system->video, context->cycles);
-	vdp_write_data(&system->video, value);
-}
-
 void frequency_port_write(cpu *context, uint8_t port, uint16_t value)
 {
 	console *system = context->system;
@@ -179,8 +173,9 @@
 }
 
 memory_region regions[] = {
-	{rom, 0, sizeof(rom)-1, MEM_READ},
-	{ram, sizeof(rom), sizeof(rom)-1+sizeof(ram), MEM_READ|MEM_WRITE},
+	{ .base = rom, .start = 0, .end = sizeof(rom)-1, .mask = 0x7FFFFF, .flags = MEM_READ },
+	{ .base = ram, .start = sizeof(rom), .end = 0x4FFFFF, .mask = 0xFFFF, .flags = MEM_READ|MEM_WRITE },
+	{ .base = NULL, .start = 0x700000, .end = 0x7FFFFF, .mask = 0xFFFF, .flags = MEM_READ|MEM_WRITE }
 };
 
 void run_console(console *context)
@@ -215,10 +210,12 @@
 	if ((read = fread(rom, 1, sizeof(rom), f)) < sizeof(rom))  {
 		memset(rom + read, 0xFF, sizeof(rom)-read);
 	}
+	fclose(f);
 	console context;
-	context.proc = alloc_cpu(10, sizeof(regions)/sizeof(memory_region), regions);
+	context.proc = alloc_cpu(2, sizeof(regions)/sizeof(memory_region), regions);
 	context.proc->system = &context;
 	vdp_init(&context.video, 2);
+	context.proc->mem_regions[2].base = vdp_get_back_buffer(&context.video);
 	timer_init(&context.timer, 16);
 	controller_init(&context.pads);
 	context.proc->port_handlers[PORT_CONTROLLER_1].read = controller_port_read;
@@ -232,13 +229,12 @@
 	context.proc->port_handlers[PORT_TIMER].write = timer_port_write;
 	context.proc->port_handlers[PORT_SERIAL].write = debug_port_write;
 	context.proc->port_handlers[PORT_SERIAL].read = debug_port_read;
-	context.proc->port_handlers[PORT_VERTICAL].write = vertical_port_write;
-	context.proc->port_handlers[PORT_VERTICAL].read = vertical_port_read;
-	context.proc->port_handlers[PORT_HORIZONTAL].write = horizontal_port_write;
-	context.proc->port_handlers[PORT_HORIZONTAL].read = horizontal_port_read;
-	context.proc->port_handlers[PORT_VRAM_ADDRESS].write = address_port_write;
-	context.proc->port_handlers[PORT_VRAM_ADDRESS].read = address_port_read;
-	context.proc->port_handlers[PORT_VRAM_DATA].write = data_port_write;
+	context.proc->port_handlers[PORT_START_OFFSET].write = offset_port_write;
+	context.proc->port_handlers[PORT_START_OFFSET].read = offset_port_read;
+	context.proc->port_handlers[PORT_VIDEO_MODE].write = mode_port_write;
+	context.proc->port_handlers[PORT_VIDEO_MODE].read = mode_port_read;
+	context.proc->port_handlers[PORT_CRAM].write = cram_port_write;
+	context.proc->port_handlers[PORT_CRAM].read = cram_port_read;
 	
 	if (!system_init(640, 480, 48000)) {
 		return 1;
--- a/src/mnemonics.c	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/mnemonics.c	Sat Aug 27 22:38:31 2016 -0700
@@ -4,9 +4,9 @@
 };
 
 char * mnemonics_single_src[] = {
-	"mov", "neg", "not", "cmp", "call", "swap", "in", "out", "ini", "outi", "addi", "andi", "ori", "lsi", "cmpi", "single reg"
+	"mov", "neg", "not", "cmp", "call", "swap", "longjmp", "ini", "outi", "addi", "andi", "ori", "xori", "lsi", "cmpi", "single reg"
 };
 
 char * mnemonics_single_reg[] = {
-	"reti", "trap", "trapi", "getepc", "setepc", "getesr", "setesr", "getenum", "setenum", "setuer", "getuer", "getvbr", "setvbr"
+	"reti", "trapi", "getepc", "setepc", "getepch", "setepch", "getesr", "setesr", "getenum", "setenum", "getuer", "setuer", "getvbr", "setvbr", "getdatabanks", "setdatabanks"
 };
--- a/src/vdp.c	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/vdp.c	Sat Aug 27 22:38:31 2016 -0700
@@ -4,225 +4,105 @@
 #include "vdp.h"
 #include "system.h"
 
+#define MAX_ACTIVE_LINES 240
+#define TOTAL_LINES 262
+#define ACTIVE_WIDTH 320
+#define TOTAL_WIDTH 416
+
+#define VDP_STATUS_FB_SELECT    1
+#define VDP_STATUS_PENDING_VINT 2
+#define VDP_STATUS_VBLANK       4
+#define VDP_STATUS_CRAM_PENDING 8
+#define VDP_STATUS_VINT_ENABLED 0x2000
+#define VDP_STATUS_DEPTH        0x4000
+#define VDP_STATUS_ENABLED      0x8000
+
 void vdp_init(vdp *context, uint32_t clock_div)
 {
 	memset(context, 0, sizeof(vdp));
 	//clock div specifies the pixel clock divider
 	//but our emulation step is half that fast
 	context->clock_inc = clock_div*2;
-	context->drawbuffer = context->linebuffers;
-	context->readbuffer = context->linebuffers+320;
 }
 
 void vdp_run(vdp *context, uint32_t target)
 {
+	uint8_t *current_fb = context->status & VDP_STATUS_FB_SELECT ? context->vram + 64*1024 : context->vram;
+	
 	while (context->cycles < target)
 	{
 		context->hcounter+=2;
-		if (context->hcounter == 416) {
+		if (context->hcounter == TOTAL_WIDTH) {
 			context->hcounter = 0;
 			context->vcounter++;
-			if (context->vcounter == 262) {
+			if (context->vcounter == TOTAL_LINES) {
 				context->vcounter = 0;
 			}
 		}
-		context->status &= ~(VDP_STATUS_VRAM|VDP_STATUS_SRAM);
-		//Render to linebuffer
-		if ((context->status & VDP_STATUS_ENABLED) && context->vcounter > 15 && context->vcounter < 240 && context->hcounter < 406) {
-			if (context->hcounter < 246) {
-				context->status |= VDP_STATUS_VRAM;
-				if (!context->hcounter) {
-					//flip linebuffers
-					if (context->drawbuffer == context->linebuffers) {
-						context->drawbuffer = context->linebuffers + 328;
-						context->readbuffer = context->linebuffers;
-					} else {
-						context->drawbuffer = context->linebuffers;
-						context->readbuffer = context->linebuffers + 328;
-					}
-					context->draw_dest = 0;
-					//enable sprite scanning
-					context->status |= VDP_STATUS_SPRITE_SCAN;
-					context->current_draw = 0;
-				}
-				if (context->draw_counter) {
-					context->draw_counter--;
-					uint16_t pixels = context->vram[context->draw_source++];
-					for (int i = context->hflip ? 0 : 12; i >= 0 && i < 16; i+= context->hflip ? 4 : -4)
-					{
-						uint8_t pixel = ((pixels >> i) & 0xF) | context->palpriority;
-						context->drawbuffer[context->draw_dest ^ (context->hflip << 2)] = pixel;
-						context->draw_dest++;
-					}
-				} else {
-					//00VV VVVV VVHH HHHH
-					uint16_t vpos = (context->vscroll & 0x7FF) + context->vcounter - 16;
-					uint16_t vmask = (context->vscroll >> 2) & 0x3E00;
-					uint16_t vcoarse = (vpos << 3) & 0x3FC0;
-					uint16_t vfine = vpos & 7;
-					uint16_t hcoarse = ((context->hscroll >> 3) + context->hcounter/6) & 0x3F;
-					uint16_t tableaddress = hcoarse | (vcoarse & ~vmask) | ((context->vscroll << 3) & vmask);
-					//printf("VCounter: %X, VScroll: %X, HCounter: %X, Table: %X\n", context->vcounter, context->vscroll, context->hcounter, tableaddress);
-					uint16_t entry = context->vram[tableaddress];
-					context->draw_source = (entry & 0x3FF) * 16;
-					if (entry & 0x1000) {
-						context->draw_source += 14 - vfine * 2;
-					} else {
-						context->draw_source += vfine * 2;
-					}
-					context->palpriority = entry >> 9 & 0x70;
-					context->draw_counter = 2;
-					context->hflip = (entry & 0x800) != 0;
-				}
-				if (context->status & VDP_STATUS_SPRITE_SCAN) {
-					context->status |= VDP_STATUS_SRAM;
-					uint16_t pos = context->sram[context->hcounter];
-					uint16_t y = pos & 0xFF;
-					uint16_t x = pos >> 8;
-					uint16_t atts = context->sram[context->hcounter+1];
-					x |= atts << 2 & 0x100;
-					if (x | y) {
-						uint16_t size = atts & 0x400 ? 16 : 8;
-						if (context->vcounter >= y && context->vcounter < y + size) {
-							uint16_t address = (atts & 0x3F) * 16;
-							if (atts & 0x1000) {
-								address += (size-1) * 2 - (context->vcounter - y) * 2;
-							} else {
-								address += (context->vcounter - y) * 2;
-							}
-							context->sprite_draws[context->current_draw].source = address;
-							context->sprite_draws[context->current_draw].x = x;
-							context->sprite_draws[context->current_draw].hflip = (atts & 0x800) != 0;
-							context->sprite_draws[context->current_draw].palpriority = 0x80 | (atts >> 9 & 0x50);
-							context->current_draw++;
-							if (size == 16) {
-								context->sprite_draws[context->current_draw].source = address + 32;
-								context->sprite_draws[context->current_draw].x = x + 8;
-								context->sprite_draws[context->current_draw].hflip = (atts & 0x800) != 0;
-								context->sprite_draws[context->current_draw].palpriority = 0x80 | (atts >> 9 & 0x50);
-								if (context->sprite_draws[context->current_draw].hflip) {
-									context->sprite_draws[context->current_draw].x -= 8;
-									context->sprite_draws[context->current_draw-1].x += 8;
-								}
-							}
-							context->current_draw++;
-							if (context->current_draw == 40) {
-								//no more rendering capacity
-								context->status &= ~VDP_STATUS_SPRITE_SCAN;
-								context->current_draw = 0;
-							}
-						}
-					} else {
-						//hit sprite list terminator
-						context->status &= ~VDP_STATUS_SPRITE_SCAN;
-						context->current_draw = 0;
-					}
-				}
-			} else {
-				sprite_draw *draw = context->sprite_draws + (context->current_draw >> 1);
-				if (draw->palpriority) {
-					context->status |= VDP_STATUS_VRAM;
-					uint16_t pixels = context->vram[draw->source + (context->current_draw & 1)];
-					uint16_t x = draw->x - 16 + (context->hscroll & 7);
-					for (int i = draw->hflip ? 0 : 12; i >= 0 && i < 16; i+= draw->hflip ? 4 : -4, x++)
-					{
-						uint8_t pixel = (pixels >> i) & 0xF;
-						if (pixel && x < 328 && ((draw->palpriority & 0x40) || !(context->drawbuffer[x] & 0x40))) {
-							context->drawbuffer[x ^ (draw->hflip << 2)] = pixel | draw->palpriority;
-						}
-					}
-					if (context->current_draw & 1) {
-						draw->palpriority = 0;
-					} else {
-						draw->x += 4;
-					}
-				}
-				context->current_draw++;
-			}
-		}
 		//Draw to framebuffer
-		if (context->vcounter > 8 && context->vcounter < 249 && context->hcounter < 320) {
-			if (!context->hcounter && context->vcounter == 9) {
+		if (context->vcounter < MAX_ACTIVE_LINES && context->hcounter < ACTIVE_WIDTH) {
+			if (!context->framebuffer) {
 				context->framebuffer = system_get_framebuffer(&context->pitch);
 				//pitch is in terms of bytes, but we want it in terms of pixels
 				context->pitch /= sizeof(uint16_t);
-				//clear pending interrupt flag since VBlank is over
-				context->status &= ~VDP_STATUS_PENDING_VINT;
 			}
-			uint16_t *dest = context->framebuffer + (context->vcounter - 9) * context->pitch + context->hcounter;
-			if (context->status & VDP_STATUS_ENABLED && context->vcounter > 16 && context->vcounter < 241) {
-				*dest = context->cram[0x3F & context->readbuffer[context->hcounter]];
-				dest++;
-				*dest = context->cram[0x3F & context->readbuffer[context->hcounter+1]];
+			uint16_t *dest = context->framebuffer + context->vcounter * context->pitch + context->hcounter;
+			if (
+				context->status & VDP_STATUS_ENABLED 
+				&& context->vcounter >= context->top_skip 
+				&& context->vcounter < MAX_ACTIVE_LINES - context->bottom_skip
+			) {
+				if (context->status & VDP_STATUS_DEPTH) {
+					uint16_t offset = context->start_offset + (context->vcounter - context->top_skip) * ACTIVE_WIDTH + context->hcounter;
+					//8bpp
+					*(dest++) = context->cram[current_fb[offset++]];
+					*dest = context->cram[current_fb[offset]];
+				} else {
+					//4bpp
+					uint8_t pixels = current_fb[context->start_offset + (context->vcounter - context->top_skip) * ACTIVE_WIDTH + context->hcounter >> 1];
+					*(dest++) = context->cram[context->pal_select | pixels >> 4];
+					*dest = context->cram[context->pal_select | (pixels & 0xF)];
+				}
 			} else {
-				//Display is disabled or we're in the border area, draw the background color
-				*dest = *context->cram;
-				dest++;
-				*dest = *context->cram;
+				*(dest++) = context->cram[0];
+				*dest = context->cram[0];
 			}
-		} else if(!context->hcounter && context->vcounter == 249) {
-			if (context->status & VDP_STATUS_ENABLED) {
-				context->status |= VDP_STATUS_PENDING_VINT;
-			}
+		} else if (context->framebuffer && context->hcounter < ACTIVE_WIDTH) {
 			system_framebuffer_updated();
 			context->framebuffer = NULL;
 		}
-		//Handle the FIFO
-		if (context->status & VDP_STATUS_FIFO) {
-			switch (context->fifo_dest)
-			{
-			case FIFO_DEST_VRAM:
-				if (!(context->status & VDP_STATUS_VRAM)) {
-					context->vram[context->dest_offset++] = context->fifo;
-					context->dest_offset &= sizeof(context->vram)/2-1;
-					context->status &= ~VDP_STATUS_FIFO;
-				}
-				break;
-			case FIFO_DEST_SRAM:
-				if (!(context->status & VDP_STATUS_SRAM)) {
-					context->sram[context->dest_offset++] = context->fifo;
-					context->dest_offset &= sizeof(context->sram)/2-1;
-					context->status &= ~VDP_STATUS_FIFO;
-				}
-				break;
-			case FIFO_DEST_CRAM:
-				context->cram[context->dest_offset++] = context->fifo;
-				context->dest_offset &= sizeof(context->cram)/2-1;
-				context->status &= ~VDP_STATUS_FIFO;
-				break;
+		if (!context->hcounter) {
+			if (context->vcounter == (context->vcounter - context->bottom_skip)) {
+				context->status |= VDP_STATUS_PENDING_VINT | VDP_STATUS_VBLANK;
+			} else if (context->vcounter == context->top_skip) {
+				//clear pending interrupt flag since VBlank is over
+				context->status &= ~(VDP_STATUS_PENDING_VINT | VDP_STATUS_VBLANK);
 			}
 		}
 		context->cycles += context->clock_inc;
 	}
 }
-void vdp_write_address(vdp *context, uint16_t value)
+void vdp_write_mode(vdp *context, uint16_t value)
 {
-	context->status &= ~VDP_STATUS_FIFO;
-	if (!(value & 0x8000)) {
-		context->fifo_dest = FIFO_DEST_VRAM;
-		context->dest_offset = (value & (sizeof(context->vram) -1))/2;
-	} else if ((value & 0xFF00) == 0xFE00) {
-		context->fifo_dest = FIFO_DEST_SRAM;
-		context->dest_offset = (value & (sizeof(context->sram) -1))/2;
-	} else if ((value & 0xFF00) == 0xFF00) {
-		context->fifo_dest = FIFO_DEST_CRAM;
-		context->dest_offset = (value & (sizeof(context->cram) -1))/2;
-	} 
+	uint16_t status_bits = VDP_STATUS_ENABLED | VDP_STATUS_DEPTH | VDP_STATUS_VINT_ENABLED | VDP_STATUS_FB_SELECT;
+	context->status &= ~status_bits;
+	context->status |= value & status_bits;
+	context->pal_select = value >> 7 & 0x30;
+	context->top_skip = value >> 6 & 0x1F;
+	context->bottom_skip = value >> 1 & 0x1F;
 }
 
-void vdp_write_data(vdp *context, uint16_t value)
+void vdp_write_cram(vdp *context, uint16_t value)
 {
-	context->fifo = value;
-	context->status |= VDP_STATUS_FIFO;
-}
-
-void vdp_write_hscroll(vdp *context, uint16_t value)
-{
-	context->hscroll = value & 0x1FF;
-	if (value & 0x8000) {
-		context->status |= VDP_STATUS_ENABLED;
+	if (context->status & VDP_STATUS_CRAM_PENDING) {
+		context->cram[context->pal_write_index++] = value;
+		if (!(--context->pal_write_count)) {
+			context->status &= ~VDP_STATUS_CRAM_PENDING;
+		}
 	} else {
-		context->status &= ~VDP_STATUS_ENABLED;
+		context->pal_write_count = value;
+		context->pal_write_index = value >> 8;
+		context->status |= VDP_STATUS_CRAM_PENDING;
 	}
 }
 
@@ -232,11 +112,12 @@
 		return 0;
 	} else if (context->status & VDP_STATUS_ENABLED) {
 		uint32_t next_line = context->vcounter + 1;
-		uint32_t next_line_cyc = context->cycles + ((416 - context->hcounter) >> 1) * context->clock_inc;
-		if (context->vcounter < 249) {
-			return next_line_cyc + (249 - next_line) * 832;
+		uint32_t next_line_cyc = context->cycles + ((TOTAL_WIDTH - context->hcounter) >> 1) * context->clock_inc;
+		uint32_t vint_line = (MAX_ACTIVE_LINES - context->bottom_skip);
+		if (context->vcounter < vint_line) {
+			return next_line_cyc + (vint_line - next_line) * (TOTAL_WIDTH >> 1) * context->clock_inc;
 		} else {
-			return next_line_cyc + (249 + 262 - next_line) * 832;
+			return next_line_cyc + (vint_line + TOTAL_LINES - next_line) * (TOTAL_WIDTH >> 1) * context->clock_inc;
 		}
 	} else {
 		return 0xFFFFFFFF;
@@ -252,3 +133,8 @@
 {
 	return (context->status & VDP_STATUS_PENDING_VINT) != 0;
 }
+
+uint8_t *vdp_get_back_buffer(vdp *context)
+{
+	return context->status & VDP_STATUS_FB_SELECT ? context->vram : context->vram + 64*1024;
+}
--- a/src/vdp.h	Mon Apr 11 23:35:51 2016 -0700
+++ b/src/vdp.h	Sat Aug 27 22:38:31 2016 -0700
@@ -2,69 +2,38 @@
 #define VDP_H_
 
 typedef struct {
-	uint16_t source;
-	uint16_t x;
-	uint8_t  hflip;
-	uint8_t  palpriority;
-} sprite_draw;
-
-typedef struct {
 	uint16_t *framebuffer;
-	uint8_t  *drawbuffer;
-	uint8_t  *readbuffer;
 	uint32_t cycles;
 	uint32_t clock_inc;
 	int      pitch;
 	
-	uint16_t fifo;
-	uint16_t dest_offset;
 	uint16_t status;
 	
 	uint16_t vcounter;
 	uint16_t hcounter;
 	
-	uint16_t vscroll;
-	uint16_t hscroll;
 	
-	uint16_t draw_source;
-	uint16_t draw_dest;
+	uint8_t  vram[128*1024];
+	uint16_t cram[256];
 	
+	uint16_t start_offset;
 	
-	uint16_t vram[32*512];
-	uint8_t  linebuffers[328*2];
-	uint16_t sram[64*2];
-	uint16_t cram[64];
+	uint8_t  top_skip;
+	uint8_t  bottom_skip;
+	uint8_t  pal_select;
 	
-	sprite_draw sprite_draws[40];
-	
-	uint8_t  fifo_dest;
-	uint8_t  draw_counter;
-	uint8_t  hflip;
-	uint8_t  palpriority;
-	uint8_t  current_draw;
+	uint8_t  pal_write_index;
+	uint8_t  pal_write_count;
 } vdp;
 
-enum {
-	FIFO_DEST_INVALID,
-	FIFO_DEST_VRAM,
-	FIFO_DEST_SRAM,
-	FIFO_DEST_CRAM
-};
-
-#define VDP_STATUS_FIFO         1
-#define VDP_STATUS_VRAM         2
-#define VDP_STATUS_SRAM         4
-#define VDP_STATUS_ENABLED      8
-#define VDP_STATUS_SPRITE_SCAN  16
-#define VDP_STATUS_PENDING_VINT 32
 
 void vdp_init(vdp *context, uint32_t clock_div);
 void vdp_run(vdp *context, uint32_t target);
-void vdp_write_address(vdp *context, uint16_t value);
-void vdp_write_data(vdp *context, uint16_t value);
-void vdp_write_hscroll(vdp *context, uint16_t value);
+void vdp_write_mode(vdp *context, uint16_t value);
+void vdp_write_cram(vdp *context, uint16_t value);
 uint32_t vdp_next_interrupt(vdp *context);
 void vdp_ack_interrupt(vdp *context);
 uint8_t vdp_interrupt_pending(vdp *context);
+uint8_t *vdp_get_back_buffer(vdp *context);
 
 #endif //VDP_H_