### Some sketches for a stack bytecode interpreter .bss proc_table: .fill 4096 .data opcode_table: .quad localfetch_bytecode,localfetch_bytecode,localfetch_bytecode,localfetch_bytecode .quad localfetch_bytecode,localfetch_bytecode,localfetch_bytecode,localfetch_bytecode .quad localstore_bytecode,localstore_bytecode,localstore_bytecode,localstore_bytecode .quad localstore_bytecode,localstore_bytecode,localstore_bytecode,localstore_bytecode .quad slicefetch_bytecode, slicefetch_bytecode, current_bytecode, current_bytecode .quad advance_bytecode, advance_bytecode .quad add_bytecode, call_bytecode, ret_bytecode, drop_bytecode .quad and_bytecode .text .macro next xor %eax,%eax mov (%rbp), %al # fetch instruction inc %rbp # advance program counter jmp *opcode_table(,%rax,8) # invoke appropriate handler .endm .globl interp interp: next ## This is not quite right because we need to set up a new ## activation record poppimg the right number of arguments ## into locals. And in that case we might as well store the ## return address in it too; we don't need a return stack ## pointer, just a frame pointer. call_bytecode: xor %eax, %eax mov (%rbp), %ax # load 16-bit procedure ID from instruction stream add $2, %rbp mov %rbp, (%rdi) # store program counter on return stack sub $8, %rdi # bump return stack pointer mov proc_table(,%rax,8), %rbp # load entry point for new bytecode from proc table next ret_bytecode: add $8, %rdi mov (%rdi), %rbp next localstore_bytecode: and $7, %eax # low bits of instruction have local index .equiv locals_offset, 32 # the position where locals start in the activation record %rdx points to mov %rcx, locals_offset(%rdx,%rax,8) # store top of stack # don't pop operand stack; an explicit drop bytecode can do that next localfetch_bytecode: mov %rcx, (%rsi) # push top of operand stack sub $8, %rsi and $7, %eax # low bits of instruction have local index mov locals_offset(%rdx,%rax,8), %rcx next add_bytecode: add $8, %rsi add (%rsi), %rcx next dynamic_add_bytecode: add $8, %rsi mov (%rsi), %r8 mov %r8, %rdi # we're repurposing %rdi as a scratch or %rcx, %rdi # merge together tag bits test $7, %dil # any nonzero tag bits means pointer jnz slow_arithmetic_path add %r8, %rcx next and_bytecode: add $8, %rsi and (%rsi), %ecx next drop_bytecode: add $8, %rsi mov (%rsi), %rcx next slow_arithmetic_path: # Now we have the operands in %r8 and %rcx and the arithmetic # bytecode in %rax, so we can do whatever we want to add them # or subtract them or whatever. This has to be within 128 bytes # of all jumps to it. next bounds_error: ## A similar exception-handling path. XXX TODO next slicefetch_bytecode: ## We want to fetch a 64-bit word from a slice of 64-bit ## words. %r9 points at our c-list of slices, each of which ## has a base and a bound, each 64 bits. The low bit of the ## opcode is the index into that list. and $1, %eax shl $1, %eax lea (%r9,%rax,8), %rax # pointer to slice mov 8(%rax), %r8 # bound cmp %rcx, %r8 # is the index small enough? jbe bounds_error # if we pass this, %rcx is OK XXX mov (%rax), %rax # now get the base pointer mov (%rax,%rcx,8), %rcx # fetch the actual result next current_bytecode: ## Suppose we guarantee that these slices are never empty? ## And represent them as [begin, end) pointer pairs. and $1, %eax shl $1, %eax lea (%r9,%rax,8), %rax mov (%rax), %rcx next advance_bytecode: and $1, %eax shl $1, %eax lea (%r9,%rax,8), %rax mov %rcx, (%rsi) sub $8, %rsi mov (%rax), %r8 cmp 8(%rax), %r8 setb %cl movzx %cl, %ecx lea (%r8,%rcx,8), %r8 mov %r8, (%rax) next forthcells_bytecode: shl $3, %ecx next forthfetch_bytecode: mov (%rcx), %rcx next