#include #include #include #include #include "com.h" #include "mem.h" #include "chunk.h" #include "ast.h" #include "util.h" #include "prs.h" #include "lib.h" #include "read.h" #define BYTECODE(C) (C->ch->bc) enum flags { F_toplevel = 1, F_tail = 2, }; Chunk chunk_new(State *S) { return (Chunk){ 0 }; } Compiler compiler_new(Compiler *outer, Chunk *ch) { return (Compiler){ .S = outer->S, .ch = ch, .stack_cur = 0, .scope = NULL, }; } static int stack_effect_of(Op opcode) { switch (opcode) { case OP_LOADK: case OP_GETGLOBAL: case OP_GETLOCAL: case OP_NIL: case OP_TRUE: case OP_FALSE: case OP_ARRNEW: return 1; case OP_SKIP: case OP_REDO: case OP_SETGLOBAL: case OP_SETLOCAL: case OP_RET: case OP_HALT: case OP_ARRLEN: return 0; case OP_DROP: case OP_0BRANCH: case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_CMP: case OP_EQU: case OP_MOD: case OP_ARRAPPEND: return -1; case OP_SETIDX: return -2; // these ones depend on their argument. handle them specifically case OP_CALL: case OP_TAILCALL: case OP_ENDSCOPE: ERROR("stack effect of opcode %d not constant",opcode); break; default: ERROR("unknown stack effect of opcode %d",opcode); break; } } // compilation of things static void cpl_byte(Compiler *C, uint8_t byte) { Chunk *ch = C->ch; ENSURE_CAP(C->S, ch->bc, uint8_t, ch->bc.len+1); ch->bc.d[ch->bc.len++] = byte; } static void cpl_op(Compiler *C, Op op) { int stack_effect = stack_effect_of(op); C->stack_cur += stack_effect; cpl_byte(C, op); } // particular instructions static uint8_t cpl_const(Compiler *C, Val v) { Chunk *ch = C->ch; for (int i = 0; i < ch->consts.len; i++) { if (val_equal(v, ch-consts.d[i])) { return i; } } CHECK(ch->consts.len < 256, "maximum number of constants per function reached"); ENSURE_CAP(C->S, ch->consts, Val, ch->consts.len+1); uint8_t ix = ch->consts.len; ch->consts.d[ch->consts.len++] = v; return ix; } static void cpl_constop(Compiler *C, Op op, Val v) { cpl_op(C, op); cpl_byte(C, cpl_const(C, v)); } static void cpl_call(Compiler *C, uint8_t len) { cpl_byte(C, OP_CALL); cpl_byte(C, len); C->stack_cur -= len - 1; } static void cpl_tailcall(Compiler *C, uint8_t len) { cpl_byte(C, OP_TAILCALL); cpl_byte(C, len); C->stack_cur -= len - 1; } static void cpl_endscope(Compiler *C, uint8_t nlocals) { if (nlocals > 0) { cpl_byte(C, OP_ENDSCOPE); cpl_byte(C, nlocals); C->stack_cur -= nlocals; } } // jump offsets and things static size_t placeholder(Compiler *C) { size_t old_ix = BYTECODE(C).len; compile_byte(C, 0x00); compile_byte(C, 0x00); return old_ix; } static void patch(Compiler *C, size_t addr, uint16_t val) { BYTECODE(C).d[addr] = val & 0xff; BYTECODE(C).d[addr+1] = (val & 0xff00) >> 8; } // scopes and locals static void begin_scope(Compiler *C) { Scope *sc = malloc(sizeof(Scope)); CHECK(sc != NULL, "memory fail"); memset(sc, 0, sizeof(Scope)); sc->outer = C->scope; C->scope = sc; } static void end_scope(Compiler *C) { Scope *sc = C->scope; CHECK(sc != NULL, "attempt to end nonexistent scope"); C->scope = sc->outer; // printf("ending scope with %d locals, named: \n", sc->nlocals); // for (int i = 0; i < sc->nlocals; i++) { // Local loc = sc->locals[i]; // printf("\t%3d %s\n",loc.slot, loc.name); // } compile_endscope_instr(C, sc->nlocals); free(sc); } // returns slot of declared local static uint8_t declare_local(Compiler *C, char *name) { Scope *sc = C->scope; CHECK(sc != NULL, "can't declare local outside of scope"); Local *l = &sc->locals[sc->nlocals++]; l->name = name; // -1 because local is expected to be already on the stack // ie sitting just below where stack_cur points uint8_t slot = C->stack_cur - 1; l->slot = slot; // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); return slot; } static Local *locate_local(Compiler *C, char *name) { for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { for (int i = 0; i < sc->nlocals; i++) { Local *loc = &sc->locals[i]; if (0 == strcmp(loc->name, name)) return loc; } } return NULL; } // compiles a body, ie sequence of "toplevel" expressions in which // declarations are allowed static void compile_body(Compiler *C, AstVec l, int startat, int flags) { begin_scope(C); for (int i = startat; i < l.len - 1; i++) { compile_node(C, l.vals[i], F_toplevel); compile_opcode(C, OP_DROP); } compile_node(C, l.vals[l.len - 1], F_toplevel | (flags & F_tail)); end_scope(C); } static void set_form(Compiler *C, AstVec l, Op _, int flags) { AstNode target = l.vals[1]; if (target.ty == AST_IDENT) { // set variable, local or global char *name = target.as.str; Local *loc = locate_local(C, name); if (loc != NULL) { compile_node(C, l.vals[2], 0); compile_opcode(C, OP_SETLOCAL); compile_byte(C, loc->slot); } else { // write global ObjString *o = objstring_copy_cstr(C->S, name); compile_node(C, l.vals[2], 0); compile_opcode(C, OP_SETGLOBAL); compile_byte(C, compile_constant(C, VAL_OBJ(o))); } } else if (target.ty == AST_LIST) { AstVec pair = target.as.list; CHECK(pair.len == 2, "can only set to (arr, ix) 2-pair"); // (value arr ix) <- TOS compile_node(C, l.vals[2], 0); compile_node(C, pair.vals[0], 0); compile_node(C, pair.vals[1], 0); compile_opcode(C, OP_SETIDX); } } static void do_form(Compiler *C, AstVec l, Op _, int flags) { compile_body(C, l, 1, flags); } static void if_form(Compiler *C, AstVec l, Op _, int flags) { // (if cond if-true if-false) // cond // 0branch ->A // if-true // skip ->B // A: if-false // B: // never toplevel int downflags = flags & ~F_toplevel; int orig_stack_cur = C->stack_cur; compile_node(C, l.vals[1], 0); compile_opcode(C, OP_0BRANCH); size_t ph_a = placeholder(C); compile_node(C, l.vals[2], downflags); compile_opcode(C, OP_SKIP); size_t ph_b = placeholder(C); size_t dest_a = BYTECODE(C).len; int stack_cur_a = C->stack_cur; C->stack_cur = orig_stack_cur; compile_node(C, l.vals[3], downflags); size_t dest_b = BYTECODE(C).len; int stack_cur_b = C->stack_cur; patch(C, ph_a, dest_a - ph_a - 2); patch(C, ph_b, dest_b - ph_b - 2); CHECK(stack_cur_a == stack_cur_b, "this should never happen"); CHECK(stack_cur_a == orig_stack_cur + 1, "this should never happen"); } static void while_form(Compiler *C, AstVec l, Op _, int flags) { // (while cond body ...) // A: // cond // 0branch ->B // body .... // redo ->A // B: // nil (while loop always returns nil) size_t dest_a = BYTECODE(C).len; compile_node(C, l.vals[1], 0); compile_opcode(C, OP_0BRANCH); size_t ph_b = placeholder(C); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_REDO); size_t ph_a = placeholder(C); size_t dest_b = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_a, ph_a - dest_a + 2); patch(C, ph_b, dest_b - ph_b - 2); } static void arith_form(Compiler *C, AstVec l, Op op, int flags) { compile_node(C, l.vals[1], 0); compile_node(C, l.vals[2], 0); compile_opcode(C, op); } static void let_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_LIST, "let's first argument must be list"); AstVec bindlist = l.vals[1].as.list; CHECK(bindlist.len % 2 == 0, "unmatched binding in let"); int nbinds = bindlist.len / 2; begin_scope(C); for (int i = 0; i < nbinds; i++) { int ix = i * 2; AstNode name = bindlist.vals[ix]; AstNode expr = bindlist.vals[ix+1]; CHECK(name.ty == AST_IDENT, "binding name must be identifier"); compile_node(C, expr, 0); declare_local(C, name.as.str); } compile_body(C, l, 2, flags); end_scope(C); } static void for_form(Compiler *C, AstVec l, Op _, int flags) { // (for (x n) ...) CHECK(l.vals[1].ty == AST_LIST, "for needs binding list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len == 2, "for binding list must have length 2"); CHECK(blist.vals[0].ty == AST_IDENT, "can only bind to ident"); char *ivar = blist.vals[0].as.str; begin_scope(C); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(0))); uint8_t islot = declare_local(C, ivar); compile_node(C, blist.vals[1], 0); uint8_t mslot = declare_local(C, "__max__"); // A // getlocal ivar // getlocal max // cmp // 0branch -> B // body ... // incr ivar // redo -> A // B: // nil size_t dest_A = BYTECODE(C).len; compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, mslot); compile_opcode(C, OP_CMP); compile_opcode(C, OP_0BRANCH); size_t ph_B = placeholder(C); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(1))); compile_opcode(C, OP_ADD); compile_opcode(C, OP_SETLOCAL); compile_opcode(C, islot); compile_opcode(C, OP_REDO); size_t ph_A = placeholder(C); size_t dest_B = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_A , ph_A - dest_A + 2); patch(C, ph_B ,dest_B - ph_B - 2); end_scope(C); } static void each_form(Compiler *C, AstVec l, Op _, int flags) { // (each (x a) ...) // returns nil, for now CHECK(l.vals[1].ty == AST_LIST, "each needs binding list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len == 2, "each binding list must have length 2"); CHECK(blist.vals[0].ty == AST_IDENT, "can only bind to ident"); char *ivar = blist.vals[0].as.str; begin_scope(C); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(0))); uint8_t islot = declare_local(C, "__idx__"); compile_node(C, blist.vals[1], 0); uint8_t aslot = declare_local(C, "__arr__"); compile_opcode(C, OP_GETLOCAL); compile_byte(C, aslot); compile_opcode(C, OP_ARRLEN); uint8_t mslot = declare_local(C, "__max__"); compile_opcode(C, OP_NIL); uint8_t vslot = declare_local(C, ivar); // A // getlocal idx // getlocal max // cmp // 0branch -> B // getlocal arr // getlocal idx // call 2 // setlocal ivar // body ... // incr idx // redo -> A // B: // nil size_t dest_A = BYTECODE(C).len; compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, mslot); compile_opcode(C, OP_CMP); compile_opcode(C, OP_0BRANCH); size_t ph_B = placeholder(C); compile_opcode(C, OP_GETLOCAL); compile_byte(C, aslot); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_call_instr(C, 2); compile_opcode(C, OP_SETLOCAL); compile_byte(C, vslot); compile_opcode(C, OP_DROP); compile_body(C, l, 2, flags & ~F_tail); compile_opcode(C, OP_DROP); compile_opcode(C, OP_GETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_NUM(1))); compile_opcode(C, OP_ADD); compile_opcode(C, OP_SETLOCAL); compile_byte(C, islot); compile_opcode(C, OP_DROP); compile_opcode(C, OP_REDO); size_t ph_A = placeholder(C); size_t dest_B = BYTECODE(C).len; compile_opcode(C, OP_NIL); patch(C, ph_A, ph_A - dest_A + 2); patch(C, ph_B, dest_B - ph_B - 2); end_scope(C); } static void def_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_IDENT, "def's first argument must be ident"); CHECK(flags & F_toplevel, "def only allowed at top level"); compile_node(C, l.vals[2], 0); declare_local(C, l.vals[1].as.str); // whatever is calling us will compile an OP_DROP next // or, well. not if we're in tail position. but i can't see // any circumstance where you'd want that anyway compile_opcode(C, OP_NIL); } static void fn_form(Compiler *C, AstVec l, Op _, int flags) { // (fn (arg arg arg) body ...) CHECK(l.vals[1].ty == AST_LIST, "fn's first argument must be list"); AstVec arglist = l.vals[1].as.list; CHECK(arglist.len <= 255, "maximum 255 args for function"); uint8_t arity = arglist.len; ObjFunc *func = objfunc_new(C->S, arity); Compiler subcompiler = compiler_new(C, &func->ch); Compiler *SC = &subcompiler; begin_scope(SC); // when called, stack slot 0 contains the function itself, // stack slots 1..n contain passed arguments SC->stack_cur ++; declare_local(SC, "__func__"); for (int i = 0; i < arity; i++) { AstNode argname = arglist.vals[i]; CHECK(argname.ty == AST_IDENT, "argument name must be identifier"); SC->stack_cur ++; declare_local(SC, argname.as.str); } compile_body(SC, l, 2, F_tail); end_scope(SC); compile_opcode(SC, OP_RET); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_OBJ(func))); } static void defn_form(Compiler *C, AstVec l, Op _, int flags) { // todo: reduce redundancy CHECK(l.vals[1].ty == AST_LIST, "defns first arg must be list"); AstVec blist = l.vals[1].as.list; CHECK(blist.len > 0, "defn needs at least a function name"); CHECK(blist.len <= 256, "maximum 255 args for function"); CHECK(flags & F_toplevel, "defn only allowed at toplevel"); uint8_t arity = blist.len - 1; CHECK(blist.vals[0].ty == AST_IDENT, "func name must be ident"); char *fname = blist.vals[0].as.str; ObjFunc *func = objfunc_new(C->S, arity); Compiler subcompiler = compiler_new(C, &func->ch); Compiler *SC = &subcompiler; begin_scope(SC); SC->stack_cur ++; declare_local(SC, fname); for (int i = 0; i < arity; i++) { AstNode argname = blist.vals[i+1]; CHECK(argname.ty == AST_IDENT, "arg name must be identifier"); SC->stack_cur ++; declare_local(SC, argname.as.str); } compile_body(SC, l, 2, F_tail); end_scope(SC); compile_opcode(SC, OP_RET); compile_opcode(C, OP_LOADK); compile_byte(C, compile_constant(C, VAL_OBJ(func))); declare_local(C, fname); compile_opcode(C, OP_NIL); } typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); typedef struct { char *name; int min_params; bool ellipsis; form_compiler action; Op op; } BuiltinForm; static BuiltinForm builtin_forms[] = { { "set!", 2, false, set_form, 0 }, { "do", 1, true, do_form, 0 }, { "if", 3, false, if_form, 0 }, { "while", 2, true, while_form, 0 }, { "for", 2, true, for_form, 0 }, { "each", 2, true, each_form, 0 }, { "fn", 2, true, fn_form, 0 }, { "let", 2, true, let_form, 0 }, { "def", 2, false, def_form, 0 }, { "defn", 2, true, defn_form, 0 }, #define ARITH_OP(str, op) \ { str, 2, false, arith_form, op }, ARITH_OP("+", OP_ADD) ARITH_OP("-", OP_SUB) ARITH_OP("*", OP_MUL) ARITH_OP("/", OP_DIV) ARITH_OP("=", OP_EQU) ARITH_OP("<", OP_CMP) ARITH_OP("%", OP_MOD) #undef ARITH_OP { 0 }, }; static BuiltinForm *find_builtinform(char *name) { for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) if (0 == strcmp(b->name, name)) return b; return NULL; } static void compile_expr(Compiler *C, Val v, int flags) { switch (val_type(v)) { case TY_NUM: case TY_NIL: case TY_BOOL: cpl_constop(C, OP_LOADK, v); break; case OTY_STRING:; Local *loc = locate_local(C, AS_CSTRING(v)); if (loc) { cpl_op(C, OP_GETLOCAL); cpl_byte(C, loc->slot); } else { cpl_constop(C, OP_GETGLOBAL, v); } break; case OTY_ARR:; ObjArr *a = AS_ARR(v); size_t len = a->len; CHECK(len > 0, "can't handle empty array"); Val first = a->d[0]; BuiltinForm *form = NULL; if (IS_STRING(first)) form = find_builtinform(AS_CSTRING(first)); if (form) { size_t nargs = len - 1; if (form->ellipsis) CHECK(nargs >= form->min_params, "%s requires at least %d args", form->name, form->min_params); else CHECK(nargs == form->min_params, "%s requires exactly %d args", form->name, form->min_params); form->action(C, a, form->op, flags); } else { // function call CHECK(len < 256, "max 255 args in a function call"); for (int i = 0; i a->len; i++) compile_expr(C, a->d[i], 0); if (flags & F_tail) cpl_tailcall(C, len); else cpl_call(C, len); } break; } } static char buf[8193]; int main(int argc, char **argv) { State st = state_new(); State *S = &st; Chunk ch = chunk_new(S); S->do_disasm = false; S->do_trace = false; char *infile_names[16]; int infile_name_n = 0; for (int i = 1; i < argc; i++) { if (argv[i][0] != '-') { infile_names[infile_name_n++] = argv[i]; CHECK(infile_name_n < 16, "input file count exceeded"); continue; } switch (argv[i][1]) { case 'D': CHECK(strlen(argv[i]) > 1, "unknown option a"); switch (argv[i][2]) { case 'l': S->do_disasm = true; break; case 't': S->do_trace = true; break; default: ERROR("unknown option b"); } break; default: ERROR("unknown option c"); } } Compiler com = (Compiler){ 0 }; com.S = S; com.ch = &ch; if (infile_name_n == 0) infile_names[infile_name_n++] = "-"; for (int i = 0; i < infile_name_n; i++) { char *fname = infile_names[i]; FILE *infile = NULL; if (0 == strcmp(fname, "-")) infile = stdin; else infile = fopen(fname, "r"); if (infile == NULL) { perror("fopen"); exit(1); } fread(buf, 1, 8192, infile); buf[8192] = '\0'; ObjArr *top = read_exprs(S, buf); println_val(VAL_OBJ(top)); // pcc_destroy(parser); // compile_body(&com, top.as.list, 0, 0); // astnode_free(&top); } compile_opcode(&com, OP_HALT); Thread th = thread_new(S); th.ch = &ch; S->th = &th; load_stdlib(S); return runvm(S); }