#include #include #include #include #include "com.h" #include "mem.h" #include "chunk.h" #include "ast.h" #include "util.h" #include "prs.h" #include "lib.h" #include "read.h" #define BYTECODE(C) (C->ch->bc) Chunk chunk_new(State *S) { return (Chunk){ 0 }; } Compiler compiler_new(Compiler *outer, Chunk *ch) { return (Compiler){ .S = outer->S, .ch = ch, .stack_cur = 0, .scope = NULL, }; } static void compile_byte(Compiler *C, uint8_t byte); static void compile_opcode(Compiler *C, Op opcode); static size_t compile_constant(Compiler *C, Val v); static int stack_effect_of(Op opcode) { switch (opcode) { case OP_LOADK: case OP_GETGLOBAL: case OP_GETLOCAL: case OP_NIL: case OP_TRUE: case OP_FALSE: case OP_ARRNEW: return 1; case OP_SKIP: case OP_REDO: case OP_SETGLOBAL: case OP_SETLOCAL: case OP_RET: case OP_HALT: case OP_ARRLEN: return 0; case OP_DROP: case OP_0BRANCH: case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_CMP: case OP_EQU: case OP_MOD: case OP_ARRAPPEND: return -1; case OP_SETIDX: return -2; // these ones depend on their argument. handle them specifically case OP_CALL: case OP_TAILCALL: case OP_ENDSCOPE: return 0; default: ERROR("unknown stack effect of opcode %d",opcode); } } // compile that byte directly static void compile_byte(Compiler *C, uint8_t byte) { Chunk *ch = C->ch; if (ch->bc.len == ch->bc.cap) { size_t newsz = (ch->bc.cap == 0 ? 8 : ch->bc.cap * 2); ch->bc.d = RENEW_ARR(C->S, ch->bc.d, uint8_t, ch->bc.cap, newsz); ch->bc.cap = newsz; } size_t ix = ch->bc.len; ch->bc.d[ix] = byte; ch->bc.len ++; } // compile an opcode, keeping track of its stack effect static void compile_opcode(Compiler *C, Op opcode) { int stack_effect = stack_effect_of(opcode); C->stack_cur += stack_effect; compile_byte(C, opcode); } // add a new constant to the constant table, and return its index // (but return the index of any existing identical constant instead of // inserting a duplicate) static size_t compile_constant(Compiler *C, Val v) { Chunk *ch = C->ch; for (int i = 0; i < ch->consts.len; i ++) if (val_equal(v, ch->consts.d[i])) return i; if (ch->consts.len == ch->consts.cap) { size_t newsz = (ch->consts.cap == 0 ? 8 : ch->consts.cap *2); ch->consts.d = RENEW_ARR(C->S, ch->consts.d, Val, ch->consts.cap, newsz); ch->consts.cap = newsz; } size_t ix = ch->consts.len; ch->consts.d[ix] = v; ch->consts.len ++; return ix; } // len is 1 + number of args static void compile_call_instr(Compiler *C, uint8_t len) { compile_opcode(C, OP_CALL); compile_byte(C, len); C->stack_cur -= len - 1; } static void compile_tailcall_instr(Compiler *C, uint8_t len) { compile_opcode(C, OP_TAILCALL); compile_byte(C, len); C->stack_cur -= len - 1; } static void compile_endscope_instr(Compiler *C, uint8_t nlocals) { if (nlocals > 0) { compile_opcode(C, OP_ENDSCOPE); compile_byte(C, nlocals); C->stack_cur -= nlocals; } } static size_t placeholder(Compiler *C) { size_t old_ix = BYTECODE(C).len; compile_byte(C, 0x00); compile_byte(C, 0x00); return old_ix; } static void patch(Compiler *C, size_t addr, uint16_t val) { BYTECODE(C).d[addr] = val & 0xff; BYTECODE(C).d[addr+1] = (val & 0xff00) >> 8; } // ---- enum flags { F_toplevel = 1, F_tail = 2, }; static void compile_node(Compiler *C, AstNode a, int flags); static void begin_scope(Compiler *C); static void end_scope(Compiler *C); typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); static void compile_body(Compiler *C, AstVec l, int startat, int flags) { begin_scope(C); for (int i = startat; i < l.len - 1; i++) { compile_node(C, l.vals[i], F_toplevel); compile_opcode(C, OP_DROP); } compile_node(C, l.vals[l.len - 1], F_toplevel | (flags & F_tail)); end_scope(C); } void single_form(Compiler *C, AstVec l, Op op) { compile_node(C, l.vals[1], 0); compile_opcode(C, op); } static Local *locate_local(Compiler *C, char *name); void set_form(Compiler *C, AstVec l, Op _, int flags) { AstNode target = l.vals[1]; if (target.ty == AST_IDENT) { // set variable, local or global char *name = target.as.str; Local *loc = locate_local(C, name); if (loc != NULL) { compile_node(C, l.vals[2], 0); compile_opcode(C, OP_SETLOCAL); compile_byte(C, loc->slot); } else { // write global ObjString *o = objstring_copy_cstr(C->S, name); compile_node(C, l.vals[2], 0); compile_opcode(C, OP_SETGLOBAL); compile_byte(C, compile_constant(C, VAL_OBJ(o))); } } else if (target.ty == AST_LIST) { AstVec pair = target.as.list; CHECK(pair.len == 2, "can only set to (arr, ix) 2-pair"); // (value arr ix) <- TOS compile_node(C, l.vals[2], 0); compile_node(C, pair.vals[0], 0); compile_node(C, pair.vals[1], 0); compile_opcode(C, OP_SETIDX); } } void do_form(Compiler *C, AstVec l, Op _, int flags) { compile_body(C, l, 1, flags); } void if_form(Compiler *C, AstVec l, Op _, int flags) { // (if cond if-true if-false) // cond // 0branch ->A // if-true // skip ->B // A: if-false // B: // never toplevel int downflags = flags & ~F_toplevel; int orig_stack_cur = C->stack_cur; compile_node(C, l.vals[1], 0); compile_opcode(C, OP_0BRANCH); size_t ph_a = placeholder(C); compile_node(C, l.vals[2], downflags); compile_opcode(C, OP_SKIP); size_t ph_b = placeholder(C); size_t dest_a = BYTECODE(C).len; int stack_cur_a = C->stack_cur; C->stack_cur = orig_stack_cur; compile_node(C, l.vals[3], downflags); size_t dest_b = BYTECODE(C).len; int stack_cur_b = C->stack_cur; patch(C, ph_a, dest_a - ph_a - 2); patch(C, ph_b, dest_b - ph_b - 2); CHECK(stack_cur_a == stack_cur_b, "this should never happen"); CHECK(stack_cur_a == orig_stack_cur + 1, "this should never happen"); } void while_form(Compiler *C, AstVec l, Op _, int flags) { // (while cond body ...) // A: // cond // 0branch ->B // body .... // redo ->A // B: // nil (while loop always returns nil) size_t dest_a = BYTECODE(C).len; compile_node(C, l.vals[1], 0); compile_opcode(C, OP_0BRANCH); size_t ph_b = placeholder(C); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_REDO); size_t ph_a = placeholder(C); size_t dest_b = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_a, ph_a - dest_a + 2); patch(C, ph_b, dest_b - ph_b - 2); } void arith_form(Compiler *C, AstVec l, Op op, int flags) { compile_node(C, l.vals[1], 0); compile_node(C, l.vals[2], 0); compile_opcode(C, op); } static void begin_scope(Compiler *C) { Scope *sc = malloc(sizeof(Scope)); CHECK(sc != NULL, "memory fail"); memset(sc, 0, sizeof(Scope)); sc->outer = C->scope; C->scope = sc; } static void end_scope(Compiler *C) { Scope *sc = C->scope; CHECK(sc != NULL, "attempt to end nonexistent scope"); C->scope = sc->outer; // printf("ending scope with %d locals, named: \n", sc->nlocals); // for (int i = 0; i < sc->nlocals; i++) { // Local loc = sc->locals[i]; // printf("\t%3d %s\n",loc.slot, loc.name); // } compile_endscope_instr(C, sc->nlocals); free(sc); } // returns slot of declared local static uint8_t declare_local(Compiler *C, char *name) { Scope *sc = C->scope; CHECK(sc != NULL, "can't declare local outside of scope"); Local *l = &sc->locals[sc->nlocals++]; l->name = name; // -1 because local is expected to be already on the stack // ie sitting just below where stack_cur points uint8_t slot = C->stack_cur - 1; l->slot = slot; // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); return slot; } static Local *locate_local(Compiler *C, char *name) { for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { for (int i = 0; i < sc->nlocals; i++) { Local *loc = &sc->locals[i]; if (0 == strcmp(loc->name, name)) return loc; } } return NULL; } void let_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_LIST, "let's first argument must be list"); AstVec bindlist = l.vals[1].as.list; CHECK(bindlist.len % 2 == 0, "unmatched binding in let"); int nbinds = bindlist.len / 2; begin_scope(C); for (int i = 0; i < nbinds; i++) { int ix = i * 2; AstNode name = bindlist.vals[ix]; AstNode expr = bindlist.vals[ix+1]; CHECK(name.ty == AST_IDENT, "binding name must be identifier"); compile_node(C, expr, 0); declare_local(C, name.as.str); } compile_body(C, l, 2, flags); end_scope(C); } void for_form(Compiler *C, AstVec l, Op _, int flags) { // (for (x n) ...) CHECK(l.vals[1].ty == AST_LIST, "for needs binding list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len == 2, "for binding list must have length 2"); CHECK(blist.vals[0].ty == AST_IDENT, "can only bind to ident"); char *ivar = blist.vals[0].as.str; begin_scope(C); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(0))); uint8_t islot = declare_local(C, ivar); compile_node(C, blist.vals[1], 0); uint8_t mslot = declare_local(C, "__max__"); // A // getlocal ivar // getlocal max // cmp // 0branch -> B // body ... // incr ivar // redo -> A // B: // nil size_t dest_A = BYTECODE(C).len; compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, mslot); compile_opcode(C, OP_CMP); compile_opcode(C, OP_0BRANCH); size_t ph_B = placeholder(C); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(1))); compile_opcode(C, OP_ADD); compile_opcode(C, OP_SETLOCAL); compile_opcode(C, islot); compile_opcode(C, OP_REDO); size_t ph_A = placeholder(C); size_t dest_B = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_A , ph_A - dest_A + 2); patch(C, ph_B ,dest_B - ph_B - 2); end_scope(C); } void each_form(Compiler *C, AstVec l, Op _, int flags) { // (each (x a) ...) // returns nil, for now CHECK(l.vals[1].ty == AST_LIST, "each needs binding list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len == 2, "each binding list must have length 2"); CHECK(blist.vals[0].ty == AST_IDENT, "can only bind to ident"); char *ivar = blist.vals[0].as.str; begin_scope(C); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(0))); uint8_t islot = declare_local(C, "__idx__"); compile_node(C, blist.vals[1], 0); uint8_t aslot = declare_local(C, "__arr__"); compile_opcode(C, OP_GETLOCAL); compile_byte(C, aslot); compile_opcode(C, OP_ARRLEN); uint8_t mslot = declare_local(C, "__max__"); compile_opcode(C, OP_NIL); uint8_t vslot = declare_local(C, ivar); // A // getlocal idx // getlocal max // cmp // 0branch -> B // getlocal arr // getlocal idx // call 2 // setlocal ivar // body ... // incr idx // redo -> A // B: // nil size_t dest_A = BYTECODE(C).len; compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, mslot); compile_opcode(C, OP_CMP); compile_opcode(C, OP_0BRANCH); size_t ph_B = placeholder(C); compile_opcode(C, OP_GETLOCAL); compile_byte(C, aslot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_call_instr(C, 2); compile_opcode(C, OP_SETLOCAL); compile_byte(C, vslot); compile_opcode(C, OP_DROP); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(1))); compile_opcode(C, OP_ADD); compile_opcode(C, OP_SETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_DROP); compile_opcode(C, OP_REDO); size_t ph_A = placeholder(C); size_t dest_B = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_A, ph_A - dest_A + 2); patch(C, ph_B, dest_B - ph_B - 2); end_scope(C); } void def_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_IDENT, "def's first argument must be ident"); CHECK(flags & F_toplevel, "def only allowed at top level"); compile_node(C, l.vals[2], 0); declare_local(C, l.vals[1].as.str); // whatever is calling us will compile an OP_DROP next // or, well. not if we're in tail position. but i can't see // any circumstance where you'd want that anyway compile_opcode(C, OP_NIL); } void fn_form(Compiler *C, AstVec l, Op _, int flags) { // (fn (arg arg arg) body ...) CHECK(l.vals[1].ty == AST_LIST, "fn's first argument must be list"); AstVec arglist = l.vals[1].as.list; CHECK(arglist.len <= 255, "maximum 255 args for function"); uint8_t arity = arglist.len; ObjFunc *func = objfunc_new(C->S, arity); Compiler subcompiler = compiler_new(C, &func->ch); Compiler *SC = &subcompiler; begin_scope(SC); // when called, stack slot 0 contains the function itself, // stack slots 1..n contain passed arguments SC->stack_cur ++; declare_local(SC, "__func__"); for (int i = 0; i < arity; i++) { AstNode argname = arglist.vals[i]; CHECK(argname.ty == AST_IDENT, "argument name must be identifier"); SC->stack_cur ++; declare_local(SC, argname.as.str); } compile_body(SC, l, 2, F_tail); end_scope(SC); compile_opcode(SC, OP_RET); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_OBJ(func))); } void defn_form(Compiler *C, AstVec l, Op _, int flags) { // todo: reduce redundancy CHECK(l.vals[1].ty == AST_LIST, "defns first arg must be list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len > 0, "defn needs at least a function name"); CHECK(blist.len <= 256, "maximum 255 args for function"); CHECK(flags & F_toplevel, "defn only allowed at toplevel"); uint8_t arity = blist.len - 1; CHECK(blist.vals[0].ty == AST_IDENT, "func name must be ident"); char *fname = blist.vals[0].as.str; ObjFunc *func = objfunc_new(C->S, arity); Compiler subcompiler = compiler_new(C, &func->ch); Compiler *SC = &subcompiler; begin_scope(SC); SC->stack_cur ++; declare_local(SC, fname); for (int i = 0; i < arity; i++) { AstNode argname = blist.vals[i+1]; CHECK(argname.ty == AST_IDENT, "arg name must be identifier"); SC->stack_cur ++; declare_local(SC, argname.as.str); } compile_body(SC, l, 2, F_tail); end_scope(SC); compile_opcode(SC, OP_RET); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_OBJ(func))); declare_local(C, fname); compile_opcode(C, OP_NIL); } typedef struct { char *name; int min_params; bool ellipsis; form_compiler action; Op op; } BuiltinForm; static BuiltinForm builtin_forms[] = { { "set!", 2, false, set_form, 0 }, { "do", 1, true, do_form, 0 }, { "if", 3, false, if_form, 0 }, { "while", 2, true, while_form, 0 }, { "for", 2, true, for_form, 0 }, { "each", 2, true, each_form, 0 }, { "fn", 2, true, fn_form, 0 }, { "let", 2, true, let_form, 0 }, { "def", 2, false, def_form, 0 }, { "defn", 2, true, defn_form, 0 }, #define ARITH_OP(str, op) \ { str, 2, false, arith_form, op }, ARITH_OP("+", OP_ADD) ARITH_OP("-", OP_SUB) ARITH_OP("*", OP_MUL) ARITH_OP("/", OP_DIV) ARITH_OP("=", OP_EQU) ARITH_OP("<", OP_CMP) ARITH_OP("%", OP_MOD) #undef ARITH_OP { 0 }, }; typedef struct { char *name; Op op; } BuiltinIdent; static BuiltinIdent builtin_idents[] = { { "true", OP_TRUE }, { "false", OP_FALSE }, { "nil", OP_NIL }, { 0 }, }; static void compile_node(Compiler *C, AstNode a, int flags) { switch (a.ty) { case AST_IDENT:; char *ident = a.as.str; bool found_builtin = false; for (BuiltinIdent *b = builtin_idents; b->name != NULL; b++) { if (0 == strcmp(b->name, ident)) { compile_opcode(C, b->op); found_builtin = true; break; } } if (!found_builtin) { Local *loc = locate_local(C, ident); if (loc != NULL) { compile_opcode(C, OP_GETLOCAL); compile_byte(C, loc->slot); } else { // read global ObjString *o = objstring_copy_cstr(C->S, a.as.str); compile_opcode(C, OP_GETGLOBAL); compile_byte(C, compile_constant(C, VAL_OBJ(o))); } } break; case AST_NUM: compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(a.as.num))); break; case AST_STRING: { ObjString *o = objstring_copy_cstr(C->S, a.as.str); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_OBJ(o))); break; } case AST_ARR: { compile_opcode(C, OP_ARRNEW); AstVec v = a.as.list; for (int i = 0; i < v.len; i++) { compile_node(C, v.vals[i], 0); compile_opcode(C, OP_ARRAPPEND); } break; } case AST_LIST: { AstVec l = a.as.list; CHECK(l.len > 0, "can't handle empty list"); BuiltinForm *form = NULL; if (l.vals[0].ty == AST_IDENT) { char *head = l.vals[0].as.str; for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) { if (0 == strcmp(b->name, head)) { form = b; break; } } } if (form != NULL) { size_t nparams = l.len - 1; if (form->ellipsis) CHECK(nparams >= form->min_params, "%s requires at least %d parameters", form->name, form->min_params); else CHECK(nparams == form->min_params, "%s requires exactly %d parameters", form->name, form->min_params); form->action(C, l, form->op, flags); } else { // function call // (f a b c ) CHECK(l.len < 256, "max 255 args in a function call"); for (int i = 0; i < l.len; i++) { compile_node(C, l.vals[i], 0); } if (flags & F_tail) { compile_tailcall_instr(C, l.len); } else { compile_call_instr(C, l.len); } } break; } } } int main(int argc, char **argv) { State st = state_new(); State *S = &st; Chunk ch = chunk_new(S); S->do_disasm = false; S->do_trace = false; char *infile_names[16]; int infile_name_n = 0; for (int i = 1; i < argc; i++) { if (argv[i][0] != '-') { infile_names[infile_name_n++] = argv[i]; CHECK(infile_name_n < 16, "input file count exceeded"); continue; } switch (argv[i][1]) { case 'D': CHECK(strlen(argv[i]) > 1, "unknown option a"); switch (argv[i][2]) { case 'l': S->do_disasm = true; break; case 't': S->do_trace = true; break; default: ERROR("unknown option b"); } break; default: ERROR("unknown option c"); } } Compiler com = (Compiler){ 0 }; com.S = S; com.ch = &ch; if (infile_name_n == 0) infile_names[infile_name_n++] = "-"; for (int i = 0; i < infile_name_n; i++) { char *fname = infile_names[i]; FILE *infile = NULL; if (0 == strcmp(fname, "-")) infile = stdin; else infile = fopen(fname, "r"); if (infile == NULL) { perror("fopen"); exit(1); } AstNode an = { 0 }; AstNode top = astnode_new_list(); pcc_context_t *parser = pcc_create(infile); while (pcc_parse(parser, &an)) { astnode_append(&top, an); } pcc_destroy(parser); compile_body(&com, top.as.list, 0, 0); astnode_free(&top); } compile_opcode(&com, OP_HALT); Thread th = thread_new(S); th.ch = &ch; S->th = &th; load_stdlib(S); return runvm(S); }