diff options
author | ubq323 <ubq323@ubq323.website> | 2024-08-06 16:44:49 +0100 |
---|---|---|
committer | ubq323 <ubq323@ubq323.website> | 2024-08-06 16:48:32 +0100 |
commit | a266b97829ebe698a4256890fdf1214e479fe1de (patch) | |
tree | 61cc3d7aaae0933ffa3081f88f7514c086b14ae6 /com.c | |
parent | 000e8ca43f4968412ed5c8fc514bb6caa3e5c450 (diff) |
rearrange and refactor compiler
part 1/? of making compiler use badthing objects instead of separate ast
Diffstat (limited to 'com.c')
-rw-r--r-- | com.c | 371 |
1 files changed, 154 insertions, 217 deletions
@@ -15,6 +15,11 @@ #define BYTECODE(C) (C->ch->bc) +enum flags { + F_toplevel = 1, + F_tail = 2, +}; + Chunk chunk_new(State *S) { return (Chunk){ 0 }; } @@ -28,10 +33,6 @@ Compiler compiler_new(Compiler *outer, Chunk *ch) { }; } -static void compile_byte(Compiler *C, uint8_t byte); -static void compile_opcode(Compiler *C, Op opcode); -static size_t compile_constant(Compiler *C, Val v); - static int stack_effect_of(Op opcode) { switch (opcode) { case OP_LOADK: @@ -68,73 +69,65 @@ static int stack_effect_of(Op opcode) { case OP_CALL: case OP_TAILCALL: case OP_ENDSCOPE: - return 0; + ERROR("stack effect of opcode %d not constant",opcode); + break; default: ERROR("unknown stack effect of opcode %d",opcode); + break; } } - -// compile that byte directly -static void compile_byte(Compiler *C, uint8_t byte) { +// compilation of things +static void cpl_byte(Compiler *C, uint8_t byte) { Chunk *ch = C->ch; - if (ch->bc.len == ch->bc.cap) { - size_t newsz = (ch->bc.cap == 0 ? 8 : ch->bc.cap * 2); - ch->bc.d = RENEW_ARR(C->S, ch->bc.d, uint8_t, ch->bc.cap, newsz); - ch->bc.cap = newsz; - } - size_t ix = ch->bc.len; - ch->bc.d[ix] = byte; - ch->bc.len ++; + ENSURE_CAP(C->S, ch->bc, uint8_t, ch->bc.len+1); + ch->bc.d[ch->bc.len++] = byte; } -// compile an opcode, keeping track of its stack effect -static void compile_opcode(Compiler *C, Op opcode) { - int stack_effect = stack_effect_of(opcode); +static void cpl_op(Compiler *C, Op op) { + int stack_effect = stack_effect_of(op); C->stack_cur += stack_effect; - compile_byte(C, opcode); + cpl_byte(C, op); } -// add a new constant to the constant table, and return its index -// (but return the index of any existing identical constant instead of -// inserting a duplicate) -static size_t compile_constant(Compiler *C, Val v) { +// particular instructions +static uint8_t cpl_const(Compiler *C, Val v) { Chunk *ch = C->ch; - for (int i = 0; i < ch->consts.len; i ++) - if (val_equal(v, ch->consts.d[i])) return i; - if (ch->consts.len == ch->consts.cap) { - size_t newsz = (ch->consts.cap == 0 ? 8 : ch->consts.cap *2); - ch->consts.d = RENEW_ARR(C->S, ch->consts.d, Val, ch->consts.cap, newsz); - ch->consts.cap = newsz; + for (int i = 0; i < ch->consts.len; i++) { + if (val_equal(v, ch-consts.d[i])) { + return i; + } } - size_t ix = ch->consts.len; - ch->consts.d[ix] = v; - ch->consts.len ++; + CHECK(ch->consts.len < 256, "maximum number of constants per function reached"); + + ENSURE_CAP(C->S, ch->consts, Val, ch->consts.len+1); + uint8_t ix = ch->consts.len; + ch->consts.d[ch->consts.len++] = v; return ix; } - -// len is 1 + number of args -static void compile_call_instr(Compiler *C, uint8_t len) { - compile_opcode(C, OP_CALL); - compile_byte(C, len); +static void cpl_constop(Compiler *C, Op op, Val v) { + cpl_op(C, op); + cpl_byte(C, cpl_const(C, v)); +} +static void cpl_call(Compiler *C, uint8_t len) { + cpl_byte(C, OP_CALL); + cpl_byte(C, len); C->stack_cur -= len - 1; } - -static void compile_tailcall_instr(Compiler *C, uint8_t len) { - compile_opcode(C, OP_TAILCALL); - compile_byte(C, len); +static void cpl_tailcall(Compiler *C, uint8_t len) { + cpl_byte(C, OP_TAILCALL); + cpl_byte(C, len); C->stack_cur -= len - 1; } - -static void compile_endscope_instr(Compiler *C, uint8_t nlocals) { +static void cpl_endscope(Compiler *C, uint8_t nlocals) { if (nlocals > 0) { - compile_opcode(C, OP_ENDSCOPE); - compile_byte(C, nlocals); + cpl_byte(C, OP_ENDSCOPE); + cpl_byte(C, nlocals); C->stack_cur -= nlocals; } } - +// jump offsets and things static size_t placeholder(Compiler *C) { size_t old_ix = BYTECODE(C).len; compile_byte(C, 0x00); @@ -145,20 +138,53 @@ static void patch(Compiler *C, size_t addr, uint16_t val) { BYTECODE(C).d[addr] = val & 0xff; BYTECODE(C).d[addr+1] = (val & 0xff00) >> 8; } +// scopes and locals +static void begin_scope(Compiler *C) { + Scope *sc = malloc(sizeof(Scope)); + CHECK(sc != NULL, "memory fail"); + memset(sc, 0, sizeof(Scope)); + sc->outer = C->scope; + C->scope = sc; +} +static void end_scope(Compiler *C) { + Scope *sc = C->scope; + CHECK(sc != NULL, "attempt to end nonexistent scope"); + C->scope = sc->outer; + // printf("ending scope with %d locals, named: \n", sc->nlocals); + // for (int i = 0; i < sc->nlocals; i++) { + // Local loc = sc->locals[i]; + // printf("\t%3d %s\n",loc.slot, loc.name); + // } -// ---- - -enum flags { - F_toplevel = 1, - F_tail = 2, -}; - -static void compile_node(Compiler *C, AstNode a, int flags); -static void begin_scope(Compiler *C); -static void end_scope(Compiler *C); - -typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); + compile_endscope_instr(C, sc->nlocals); + free(sc); +} +// returns slot of declared local +static uint8_t declare_local(Compiler *C, char *name) { + Scope *sc = C->scope; + CHECK(sc != NULL, "can't declare local outside of scope"); + Local *l = &sc->locals[sc->nlocals++]; + l->name = name; + // -1 because local is expected to be already on the stack + // ie sitting just below where stack_cur points + uint8_t slot = C->stack_cur - 1; + l->slot = slot; + // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); + return slot; +} +static Local *locate_local(Compiler *C, char *name) { + for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { + for (int i = 0; i < sc->nlocals; i++) { + Local *loc = &sc->locals[i]; + if (0 == strcmp(loc->name, name)) + return loc; + } + } + return NULL; +} +// compiles a body, ie sequence of "toplevel" expressions in which +// declarations are allowed static void compile_body(Compiler *C, AstVec l, int startat, int flags) { begin_scope(C); for (int i = startat; i < l.len - 1; i++) { @@ -169,14 +195,11 @@ static void compile_body(Compiler *C, AstVec l, int startat, int flags) { end_scope(C); } -void single_form(Compiler *C, AstVec l, Op op) { - compile_node(C, l.vals[1], 0); - compile_opcode(C, op); -} -static Local *locate_local(Compiler *C, char *name); -void set_form(Compiler *C, AstVec l, Op _, int flags) { + + +static void set_form(Compiler *C, AstVec l, Op _, int flags) { AstNode target = l.vals[1]; if (target.ty == AST_IDENT) { // set variable, local or global @@ -205,11 +228,11 @@ void set_form(Compiler *C, AstVec l, Op _, int flags) { } } -void do_form(Compiler *C, AstVec l, Op _, int flags) { +static void do_form(Compiler *C, AstVec l, Op _, int flags) { compile_body(C, l, 1, flags); } -void if_form(Compiler *C, AstVec l, Op _, int flags) { +static void if_form(Compiler *C, AstVec l, Op _, int flags) { // (if cond if-true if-false) // cond // 0branch ->A @@ -246,7 +269,7 @@ void if_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(stack_cur_a == orig_stack_cur + 1, "this should never happen"); } -void while_form(Compiler *C, AstVec l, Op _, int flags) { +static void while_form(Compiler *C, AstVec l, Op _, int flags) { // (while cond body ...) // A: // cond @@ -272,59 +295,15 @@ void while_form(Compiler *C, AstVec l, Op _, int flags) { -void arith_form(Compiler *C, AstVec l, Op op, int flags) { +static void arith_form(Compiler *C, AstVec l, Op op, int flags) { compile_node(C, l.vals[1], 0); compile_node(C, l.vals[2], 0); compile_opcode(C, op); } -static void begin_scope(Compiler *C) { - Scope *sc = malloc(sizeof(Scope)); - CHECK(sc != NULL, "memory fail"); - memset(sc, 0, sizeof(Scope)); - sc->outer = C->scope; - C->scope = sc; -} -static void end_scope(Compiler *C) { - Scope *sc = C->scope; - CHECK(sc != NULL, "attempt to end nonexistent scope"); - C->scope = sc->outer; - // printf("ending scope with %d locals, named: \n", sc->nlocals); - // for (int i = 0; i < sc->nlocals; i++) { - // Local loc = sc->locals[i]; - // printf("\t%3d %s\n",loc.slot, loc.name); - // } - compile_endscope_instr(C, sc->nlocals); - - free(sc); -} -// returns slot of declared local -static uint8_t declare_local(Compiler *C, char *name) { - Scope *sc = C->scope; - CHECK(sc != NULL, "can't declare local outside of scope"); - Local *l = &sc->locals[sc->nlocals++]; - l->name = name; - // -1 because local is expected to be already on the stack - // ie sitting just below where stack_cur points - uint8_t slot = C->stack_cur - 1; - l->slot = slot; - // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); - return slot; -} -static Local *locate_local(Compiler *C, char *name) { - for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { - for (int i = 0; i < sc->nlocals; i++) { - Local *loc = &sc->locals[i]; - if (0 == strcmp(loc->name, name)) - return loc; - } - } - return NULL; -} - -void let_form(Compiler *C, AstVec l, Op _, int flags) { +static void let_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_LIST, "let's first argument must be list"); AstVec bindlist = l.vals[1].as.list; CHECK(bindlist.len % 2 == 0, "unmatched binding in let"); @@ -346,7 +325,7 @@ void let_form(Compiler *C, AstVec l, Op _, int flags) { end_scope(C); } -void for_form(Compiler *C, AstVec l, Op _, int flags) { +static void for_form(Compiler *C, AstVec l, Op _, int flags) { // (for (x n) ...) CHECK(l.vals[1].ty == AST_LIST, "for needs binding list"); AstVec blist = l.vals[1].as.list; @@ -402,7 +381,7 @@ void for_form(Compiler *C, AstVec l, Op _, int flags) { end_scope(C); } -void each_form(Compiler *C, AstVec l, Op _, int flags) { +static void each_form(Compiler *C, AstVec l, Op _, int flags) { // (each (x a) ...) // returns nil, for now CHECK(l.vals[1].ty == AST_LIST, "each needs binding list"); @@ -487,7 +466,7 @@ void each_form(Compiler *C, AstVec l, Op _, int flags) { } -void def_form(Compiler *C, AstVec l, Op _, int flags) { +static void def_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_IDENT, "def's first argument must be ident"); CHECK(flags & F_toplevel, "def only allowed at top level"); compile_node(C, l.vals[2], 0); @@ -498,7 +477,7 @@ void def_form(Compiler *C, AstVec l, Op _, int flags) { compile_opcode(C, OP_NIL); } -void fn_form(Compiler *C, AstVec l, Op _, int flags) { +static void fn_form(Compiler *C, AstVec l, Op _, int flags) { // (fn (arg arg arg) body ...) CHECK(l.vals[1].ty == AST_LIST, "fn's first argument must be list"); AstVec arglist = l.vals[1].as.list; @@ -530,7 +509,7 @@ void fn_form(Compiler *C, AstVec l, Op _, int flags) { compile_byte(C, compile_constant(C, VAL_OBJ(func))); } -void defn_form(Compiler *C, AstVec l, Op _, int flags) { +static void defn_form(Compiler *C, AstVec l, Op _, int flags) { // todo: reduce redundancy CHECK(l.vals[1].ty == AST_LIST, "defns first arg must be list"); AstVec blist = l.vals[1].as.list; @@ -564,6 +543,7 @@ void defn_form(Compiler *C, AstVec l, Op _, int flags) { compile_opcode(C, OP_NIL); } +typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); typedef struct { char *name; int min_params; @@ -596,110 +576,66 @@ static BuiltinForm builtin_forms[] = { { 0 }, }; -typedef struct { - char *name; - Op op; -} BuiltinIdent; -static BuiltinIdent builtin_idents[] = { - { "true", OP_TRUE }, - { "false", OP_FALSE }, - { "nil", OP_NIL }, - { 0 }, -}; - - +static BuiltinForm *find_builtinform(char *name) { + for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) + if (0 == strcmp(b->name, name)) return b; + return NULL; +} -static void compile_node(Compiler *C, AstNode a, int flags) { - switch (a.ty) { - case AST_IDENT:; - char *ident = a.as.str; - bool found_builtin = false; - for (BuiltinIdent *b = builtin_idents; b->name != NULL; b++) { - if (0 == strcmp(b->name, ident)) { - compile_opcode(C, b->op); - found_builtin = true; - break; - } - } - if (!found_builtin) { - Local *loc = locate_local(C, ident); - if (loc != NULL) { - compile_opcode(C, OP_GETLOCAL); - compile_byte(C, loc->slot); - } else { - // read global - ObjString *o = objstring_copy_cstr(C->S, a.as.str); - compile_opcode(C, OP_GETGLOBAL); - compile_byte(C, compile_constant(C, VAL_OBJ(o))); - } - } - break; - case AST_NUM: - compile_opcode(C, OP_LOADK); - compile_byte(C, compile_constant(C, VAL_NUM(a.as.num))); - break; - case AST_STRING: { - ObjString *o = objstring_copy_cstr(C->S, a.as.str); - compile_opcode(C, OP_LOADK); - compile_byte(C, compile_constant(C, VAL_OBJ(o))); +static void compile_expr(Compiler *C, Val v, int flags) { + switch (val_type(v)) { + case TY_NUM: case TY_NIL: case TY_BOOL: + cpl_constop(C, OP_LOADK, v); break; - } - case AST_ARR: { - compile_opcode(C, OP_ARRNEW); - AstVec v = a.as.list; - for (int i = 0; i < v.len; i++) { - compile_node(C, v.vals[i], 0); - compile_opcode(C, OP_ARRAPPEND); + case OTY_STRING:; + Local *loc = locate_local(C, AS_CSTRING(v)); + if (loc) { + cpl_op(C, OP_GETLOCAL); + cpl_byte(C, loc->slot); + } else { + cpl_constop(C, OP_GETGLOBAL, v); } break; - } - - case AST_LIST: { - AstVec l = a.as.list; - - CHECK(l.len > 0, "can't handle empty list"); - + case OTY_ARR:; + ObjArr *a = AS_ARR(v); + size_t len = a->len; + CHECK(len > 0, "can't handle empty array"); + Val first = a->d[0]; BuiltinForm *form = NULL; + if (IS_STRING(first)) + form = find_builtinform(AS_CSTRING(first)); - if (l.vals[0].ty == AST_IDENT) { - char *head = l.vals[0].as.str; - for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) { - if (0 == strcmp(b->name, head)) { - form = b; - break; - } - } - } - - if (form != NULL) { - size_t nparams = l.len - 1; - if (form->ellipsis) - CHECK(nparams >= form->min_params, "%s requires at least %d parameters", + if (form) { + size_t nargs = len - 1; + if (form->ellipsis) + CHECK(nargs >= form->min_params, "%s requires at least %d args", form->name, form->min_params); - else - CHECK(nparams == form->min_params, "%s requires exactly %d parameters", + else + CHECK(nargs == form->min_params, "%s requires exactly %d args", form->name, form->min_params); - - form->action(C, l, form->op, flags); + form->action(C, a, form->op, flags); } else { // function call - // (f a b c ) - CHECK(l.len < 256, "max 255 args in a function call"); - for (int i = 0; i < l.len; i++) { - compile_node(C, l.vals[i], 0); - } - if (flags & F_tail) { - compile_tailcall_instr(C, l.len); - } else { - compile_call_instr(C, l.len); - } + CHECK(len < 256, "max 255 args in a function call"); + for (int i = 0; i a->len; i++) + compile_expr(C, a->d[i], 0); + if (flags & F_tail) + cpl_tailcall(C, len); + else + cpl_call(C, len); } - break; } - } } + + + + + + +static char buf[8193]; + int main(int argc, char **argv) { State st = state_new(); State *S = &st; @@ -748,15 +684,16 @@ int main(int argc, char **argv) { exit(1); } - AstNode an = { 0 }; - AstNode top = astnode_new_list(); - pcc_context_t *parser = pcc_create(infile); - while (pcc_parse(parser, &an)) { - astnode_append(&top, an); - } - pcc_destroy(parser); - compile_body(&com, top.as.list, 0, 0); - astnode_free(&top); + + fread(buf, 1, 8192, infile); + buf[8192] = '\0'; + ObjArr *top = read_exprs(S, buf); + + println_val(VAL_OBJ(top)); + + // pcc_destroy(parser); + // compile_body(&com, top.as.list, 0, 0); + // astnode_free(&top); } |