From a266b97829ebe698a4256890fdf1214e479fe1de Mon Sep 17 00:00:00 2001 From: ubq323 Date: Tue, 6 Aug 2024 16:44:49 +0100 Subject: rearrange and refactor compiler part 1/? of making compiler use badthing objects instead of separate ast --- com.c | 371 +++++++++++++++++++++++++++-------------------------------------- mem.h | 15 +++ read.c | 57 ++++++---- read.h | 1 + 4 files changed, 209 insertions(+), 235 deletions(-) diff --git a/com.c b/com.c index 10774c8..38a4196 100644 --- a/com.c +++ b/com.c @@ -15,6 +15,11 @@ #define BYTECODE(C) (C->ch->bc) +enum flags { + F_toplevel = 1, + F_tail = 2, +}; + Chunk chunk_new(State *S) { return (Chunk){ 0 }; } @@ -28,10 +33,6 @@ Compiler compiler_new(Compiler *outer, Chunk *ch) { }; } -static void compile_byte(Compiler *C, uint8_t byte); -static void compile_opcode(Compiler *C, Op opcode); -static size_t compile_constant(Compiler *C, Val v); - static int stack_effect_of(Op opcode) { switch (opcode) { case OP_LOADK: @@ -68,73 +69,65 @@ static int stack_effect_of(Op opcode) { case OP_CALL: case OP_TAILCALL: case OP_ENDSCOPE: - return 0; + ERROR("stack effect of opcode %d not constant",opcode); + break; default: ERROR("unknown stack effect of opcode %d",opcode); + break; } } - -// compile that byte directly -static void compile_byte(Compiler *C, uint8_t byte) { +// compilation of things +static void cpl_byte(Compiler *C, uint8_t byte) { Chunk *ch = C->ch; - if (ch->bc.len == ch->bc.cap) { - size_t newsz = (ch->bc.cap == 0 ? 8 : ch->bc.cap * 2); - ch->bc.d = RENEW_ARR(C->S, ch->bc.d, uint8_t, ch->bc.cap, newsz); - ch->bc.cap = newsz; - } - size_t ix = ch->bc.len; - ch->bc.d[ix] = byte; - ch->bc.len ++; + ENSURE_CAP(C->S, ch->bc, uint8_t, ch->bc.len+1); + ch->bc.d[ch->bc.len++] = byte; } -// compile an opcode, keeping track of its stack effect -static void compile_opcode(Compiler *C, Op opcode) { - int stack_effect = stack_effect_of(opcode); +static void cpl_op(Compiler *C, Op op) { + int stack_effect = stack_effect_of(op); C->stack_cur += stack_effect; - compile_byte(C, opcode); + cpl_byte(C, op); } -// add a new constant to the constant table, and return its index -// (but return the index of any existing identical constant instead of -// inserting a duplicate) -static size_t compile_constant(Compiler *C, Val v) { +// particular instructions +static uint8_t cpl_const(Compiler *C, Val v) { Chunk *ch = C->ch; - for (int i = 0; i < ch->consts.len; i ++) - if (val_equal(v, ch->consts.d[i])) return i; - if (ch->consts.len == ch->consts.cap) { - size_t newsz = (ch->consts.cap == 0 ? 8 : ch->consts.cap *2); - ch->consts.d = RENEW_ARR(C->S, ch->consts.d, Val, ch->consts.cap, newsz); - ch->consts.cap = newsz; + for (int i = 0; i < ch->consts.len; i++) { + if (val_equal(v, ch-consts.d[i])) { + return i; + } } - size_t ix = ch->consts.len; - ch->consts.d[ix] = v; - ch->consts.len ++; + CHECK(ch->consts.len < 256, "maximum number of constants per function reached"); + + ENSURE_CAP(C->S, ch->consts, Val, ch->consts.len+1); + uint8_t ix = ch->consts.len; + ch->consts.d[ch->consts.len++] = v; return ix; } - -// len is 1 + number of args -static void compile_call_instr(Compiler *C, uint8_t len) { - compile_opcode(C, OP_CALL); - compile_byte(C, len); +static void cpl_constop(Compiler *C, Op op, Val v) { + cpl_op(C, op); + cpl_byte(C, cpl_const(C, v)); +} +static void cpl_call(Compiler *C, uint8_t len) { + cpl_byte(C, OP_CALL); + cpl_byte(C, len); C->stack_cur -= len - 1; } - -static void compile_tailcall_instr(Compiler *C, uint8_t len) { - compile_opcode(C, OP_TAILCALL); - compile_byte(C, len); +static void cpl_tailcall(Compiler *C, uint8_t len) { + cpl_byte(C, OP_TAILCALL); + cpl_byte(C, len); C->stack_cur -= len - 1; } - -static void compile_endscope_instr(Compiler *C, uint8_t nlocals) { +static void cpl_endscope(Compiler *C, uint8_t nlocals) { if (nlocals > 0) { - compile_opcode(C, OP_ENDSCOPE); - compile_byte(C, nlocals); + cpl_byte(C, OP_ENDSCOPE); + cpl_byte(C, nlocals); C->stack_cur -= nlocals; } } - +// jump offsets and things static size_t placeholder(Compiler *C) { size_t old_ix = BYTECODE(C).len; compile_byte(C, 0x00); @@ -145,20 +138,53 @@ static void patch(Compiler *C, size_t addr, uint16_t val) { BYTECODE(C).d[addr] = val & 0xff; BYTECODE(C).d[addr+1] = (val & 0xff00) >> 8; } +// scopes and locals +static void begin_scope(Compiler *C) { + Scope *sc = malloc(sizeof(Scope)); + CHECK(sc != NULL, "memory fail"); + memset(sc, 0, sizeof(Scope)); + sc->outer = C->scope; + C->scope = sc; +} +static void end_scope(Compiler *C) { + Scope *sc = C->scope; + CHECK(sc != NULL, "attempt to end nonexistent scope"); + C->scope = sc->outer; + // printf("ending scope with %d locals, named: \n", sc->nlocals); + // for (int i = 0; i < sc->nlocals; i++) { + // Local loc = sc->locals[i]; + // printf("\t%3d %s\n",loc.slot, loc.name); + // } -// ---- - -enum flags { - F_toplevel = 1, - F_tail = 2, -}; - -static void compile_node(Compiler *C, AstNode a, int flags); -static void begin_scope(Compiler *C); -static void end_scope(Compiler *C); - -typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); + compile_endscope_instr(C, sc->nlocals); + free(sc); +} +// returns slot of declared local +static uint8_t declare_local(Compiler *C, char *name) { + Scope *sc = C->scope; + CHECK(sc != NULL, "can't declare local outside of scope"); + Local *l = &sc->locals[sc->nlocals++]; + l->name = name; + // -1 because local is expected to be already on the stack + // ie sitting just below where stack_cur points + uint8_t slot = C->stack_cur - 1; + l->slot = slot; + // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); + return slot; +} +static Local *locate_local(Compiler *C, char *name) { + for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { + for (int i = 0; i < sc->nlocals; i++) { + Local *loc = &sc->locals[i]; + if (0 == strcmp(loc->name, name)) + return loc; + } + } + return NULL; +} +// compiles a body, ie sequence of "toplevel" expressions in which +// declarations are allowed static void compile_body(Compiler *C, AstVec l, int startat, int flags) { begin_scope(C); for (int i = startat; i < l.len - 1; i++) { @@ -169,14 +195,11 @@ static void compile_body(Compiler *C, AstVec l, int startat, int flags) { end_scope(C); } -void single_form(Compiler *C, AstVec l, Op op) { - compile_node(C, l.vals[1], 0); - compile_opcode(C, op); -} -static Local *locate_local(Compiler *C, char *name); -void set_form(Compiler *C, AstVec l, Op _, int flags) { + + +static void set_form(Compiler *C, AstVec l, Op _, int flags) { AstNode target = l.vals[1]; if (target.ty == AST_IDENT) { // set variable, local or global @@ -205,11 +228,11 @@ void set_form(Compiler *C, AstVec l, Op _, int flags) { } } -void do_form(Compiler *C, AstVec l, Op _, int flags) { +static void do_form(Compiler *C, AstVec l, Op _, int flags) { compile_body(C, l, 1, flags); } -void if_form(Compiler *C, AstVec l, Op _, int flags) { +static void if_form(Compiler *C, AstVec l, Op _, int flags) { // (if cond if-true if-false) // cond // 0branch ->A @@ -246,7 +269,7 @@ void if_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(stack_cur_a == orig_stack_cur + 1, "this should never happen"); } -void while_form(Compiler *C, AstVec l, Op _, int flags) { +static void while_form(Compiler *C, AstVec l, Op _, int flags) { // (while cond body ...) // A: // cond @@ -272,59 +295,15 @@ void while_form(Compiler *C, AstVec l, Op _, int flags) { -void arith_form(Compiler *C, AstVec l, Op op, int flags) { +static void arith_form(Compiler *C, AstVec l, Op op, int flags) { compile_node(C, l.vals[1], 0); compile_node(C, l.vals[2], 0); compile_opcode(C, op); } -static void begin_scope(Compiler *C) { - Scope *sc = malloc(sizeof(Scope)); - CHECK(sc != NULL, "memory fail"); - memset(sc, 0, sizeof(Scope)); - sc->outer = C->scope; - C->scope = sc; -} -static void end_scope(Compiler *C) { - Scope *sc = C->scope; - CHECK(sc != NULL, "attempt to end nonexistent scope"); - C->scope = sc->outer; - // printf("ending scope with %d locals, named: \n", sc->nlocals); - // for (int i = 0; i < sc->nlocals; i++) { - // Local loc = sc->locals[i]; - // printf("\t%3d %s\n",loc.slot, loc.name); - // } - compile_endscope_instr(C, sc->nlocals); - - free(sc); -} -// returns slot of declared local -static uint8_t declare_local(Compiler *C, char *name) { - Scope *sc = C->scope; - CHECK(sc != NULL, "can't declare local outside of scope"); - Local *l = &sc->locals[sc->nlocals++]; - l->name = name; - // -1 because local is expected to be already on the stack - // ie sitting just below where stack_cur points - uint8_t slot = C->stack_cur - 1; - l->slot = slot; - // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur); - return slot; -} -static Local *locate_local(Compiler *C, char *name) { - for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) { - for (int i = 0; i < sc->nlocals; i++) { - Local *loc = &sc->locals[i]; - if (0 == strcmp(loc->name, name)) - return loc; - } - } - return NULL; -} - -void let_form(Compiler *C, AstVec l, Op _, int flags) { +static void let_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_LIST, "let's first argument must be list"); AstVec bindlist = l.vals[1].as.list; CHECK(bindlist.len % 2 == 0, "unmatched binding in let"); @@ -346,7 +325,7 @@ void let_form(Compiler *C, AstVec l, Op _, int flags) { end_scope(C); } -void for_form(Compiler *C, AstVec l, Op _, int flags) { +static void for_form(Compiler *C, AstVec l, Op _, int flags) { // (for (x n) ...) CHECK(l.vals[1].ty == AST_LIST, "for needs binding list"); AstVec blist = l.vals[1].as.list; @@ -402,7 +381,7 @@ void for_form(Compiler *C, AstVec l, Op _, int flags) { end_scope(C); } -void each_form(Compiler *C, AstVec l, Op _, int flags) { +static void each_form(Compiler *C, AstVec l, Op _, int flags) { // (each (x a) ...) // returns nil, for now CHECK(l.vals[1].ty == AST_LIST, "each needs binding list"); @@ -487,7 +466,7 @@ void each_form(Compiler *C, AstVec l, Op _, int flags) { } -void def_form(Compiler *C, AstVec l, Op _, int flags) { +static void def_form(Compiler *C, AstVec l, Op _, int flags) { CHECK(l.vals[1].ty == AST_IDENT, "def's first argument must be ident"); CHECK(flags & F_toplevel, "def only allowed at top level"); compile_node(C, l.vals[2], 0); @@ -498,7 +477,7 @@ void def_form(Compiler *C, AstVec l, Op _, int flags) { compile_opcode(C, OP_NIL); } -void fn_form(Compiler *C, AstVec l, Op _, int flags) { +static void fn_form(Compiler *C, AstVec l, Op _, int flags) { // (fn (arg arg arg) body ...) CHECK(l.vals[1].ty == AST_LIST, "fn's first argument must be list"); AstVec arglist = l.vals[1].as.list; @@ -530,7 +509,7 @@ void fn_form(Compiler *C, AstVec l, Op _, int flags) { compile_byte(C, compile_constant(C, VAL_OBJ(func))); } -void defn_form(Compiler *C, AstVec l, Op _, int flags) { +static void defn_form(Compiler *C, AstVec l, Op _, int flags) { // todo: reduce redundancy CHECK(l.vals[1].ty == AST_LIST, "defns first arg must be list"); AstVec blist = l.vals[1].as.list; @@ -564,6 +543,7 @@ void defn_form(Compiler *C, AstVec l, Op _, int flags) { compile_opcode(C, OP_NIL); } +typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags); typedef struct { char *name; int min_params; @@ -596,110 +576,66 @@ static BuiltinForm builtin_forms[] = { { 0 }, }; -typedef struct { - char *name; - Op op; -} BuiltinIdent; -static BuiltinIdent builtin_idents[] = { - { "true", OP_TRUE }, - { "false", OP_FALSE }, - { "nil", OP_NIL }, - { 0 }, -}; - - +static BuiltinForm *find_builtinform(char *name) { + for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) + if (0 == strcmp(b->name, name)) return b; + return NULL; +} -static void compile_node(Compiler *C, AstNode a, int flags) { - switch (a.ty) { - case AST_IDENT:; - char *ident = a.as.str; - bool found_builtin = false; - for (BuiltinIdent *b = builtin_idents; b->name != NULL; b++) { - if (0 == strcmp(b->name, ident)) { - compile_opcode(C, b->op); - found_builtin = true; - break; - } - } - if (!found_builtin) { - Local *loc = locate_local(C, ident); - if (loc != NULL) { - compile_opcode(C, OP_GETLOCAL); - compile_byte(C, loc->slot); - } else { - // read global - ObjString *o = objstring_copy_cstr(C->S, a.as.str); - compile_opcode(C, OP_GETGLOBAL); - compile_byte(C, compile_constant(C, VAL_OBJ(o))); - } - } - break; - case AST_NUM: - compile_opcode(C, OP_LOADK); - compile_byte(C, compile_constant(C, VAL_NUM(a.as.num))); - break; - case AST_STRING: { - ObjString *o = objstring_copy_cstr(C->S, a.as.str); - compile_opcode(C, OP_LOADK); - compile_byte(C, compile_constant(C, VAL_OBJ(o))); +static void compile_expr(Compiler *C, Val v, int flags) { + switch (val_type(v)) { + case TY_NUM: case TY_NIL: case TY_BOOL: + cpl_constop(C, OP_LOADK, v); break; - } - case AST_ARR: { - compile_opcode(C, OP_ARRNEW); - AstVec v = a.as.list; - for (int i = 0; i < v.len; i++) { - compile_node(C, v.vals[i], 0); - compile_opcode(C, OP_ARRAPPEND); + case OTY_STRING:; + Local *loc = locate_local(C, AS_CSTRING(v)); + if (loc) { + cpl_op(C, OP_GETLOCAL); + cpl_byte(C, loc->slot); + } else { + cpl_constop(C, OP_GETGLOBAL, v); } break; - } - - case AST_LIST: { - AstVec l = a.as.list; - - CHECK(l.len > 0, "can't handle empty list"); - + case OTY_ARR:; + ObjArr *a = AS_ARR(v); + size_t len = a->len; + CHECK(len > 0, "can't handle empty array"); + Val first = a->d[0]; BuiltinForm *form = NULL; + if (IS_STRING(first)) + form = find_builtinform(AS_CSTRING(first)); - if (l.vals[0].ty == AST_IDENT) { - char *head = l.vals[0].as.str; - for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) { - if (0 == strcmp(b->name, head)) { - form = b; - break; - } - } - } - - if (form != NULL) { - size_t nparams = l.len - 1; - if (form->ellipsis) - CHECK(nparams >= form->min_params, "%s requires at least %d parameters", + if (form) { + size_t nargs = len - 1; + if (form->ellipsis) + CHECK(nargs >= form->min_params, "%s requires at least %d args", form->name, form->min_params); - else - CHECK(nparams == form->min_params, "%s requires exactly %d parameters", + else + CHECK(nargs == form->min_params, "%s requires exactly %d args", form->name, form->min_params); - - form->action(C, l, form->op, flags); + form->action(C, a, form->op, flags); } else { // function call - // (f a b c ) - CHECK(l.len < 256, "max 255 args in a function call"); - for (int i = 0; i < l.len; i++) { - compile_node(C, l.vals[i], 0); - } - if (flags & F_tail) { - compile_tailcall_instr(C, l.len); - } else { - compile_call_instr(C, l.len); - } + CHECK(len < 256, "max 255 args in a function call"); + for (int i = 0; i a->len; i++) + compile_expr(C, a->d[i], 0); + if (flags & F_tail) + cpl_tailcall(C, len); + else + cpl_call(C, len); } - break; } - } } + + + + + + +static char buf[8193]; + int main(int argc, char **argv) { State st = state_new(); State *S = &st; @@ -748,15 +684,16 @@ int main(int argc, char **argv) { exit(1); } - AstNode an = { 0 }; - AstNode top = astnode_new_list(); - pcc_context_t *parser = pcc_create(infile); - while (pcc_parse(parser, &an)) { - astnode_append(&top, an); - } - pcc_destroy(parser); - compile_body(&com, top.as.list, 0, 0); - astnode_free(&top); + + fread(buf, 1, 8192, infile); + buf[8192] = '\0'; + ObjArr *top = read_exprs(S, buf); + + println_val(VAL_OBJ(top)); + + // pcc_destroy(parser); + // compile_body(&com, top.as.list, 0, 0); + // astnode_free(&top); } diff --git a/mem.h b/mem.h index d13e02f..e7edb82 100644 --- a/mem.h +++ b/mem.h @@ -13,6 +13,21 @@ void *M(State *S, void *ptr, size_t old, size_t new); #define RENEW_ARR(S,p,ty,old,new) (ty*)M(S, (p), (old)*sizeof(ty), (new)*sizeof(ty)) #define NEW_OBJ(S,ty, oty) (ty*)alloc_obj(S, sizeof(ty), oty) +// needs len,cap,d fields +#define ENSURE_CAP(S, darr, type, needed) \ + if (darr.cap < needed) { \ + size_t __newsz = next_pwrof2(needed); \ + if (__newsz < 8) __newsz = 8; + darr.d = RENEW_ARR(S, darr.d, type, darr.cap, __newsz); \ + darr.cap = __newsz; \ + } + +inline size_t next_pwrof2(size_t x) { + size_t p = 1; + while (p < x) p <<= 1; + return p; +} + #define FREE(S,p,ty) M(S, p, sizeof(ty), 0) #define FREE_ARR(S,p,ty,old) M(S, p, (old)*sizeof(ty), 0) diff --git a/read.c b/read.c index 047dfb9..b3fc5a3 100644 --- a/read.c +++ b/read.c @@ -4,12 +4,16 @@ #include typedef struct _reader { - char *c; State *S; + int len; + char *c; } Reader; -static void next(Reader *R) { CHECK(*R->c != '\0',"end of string reached"); R->c++; } +static void next(Reader *R) { + if(*R->c == '\0') + ERROR("end of string reached"); + R->c++; } static bool is_ws(char x) { return x == ' ' || x == '\t' || x == '\n';} static bool is_num(char x) { return '0' <= x && x <= '9'; } static bool is_special(char x) { @@ -28,7 +32,14 @@ static Val symbol(Reader *R); static Val string(Reader *R); static Val list(Reader *R, char closer); -#define STR(S, s) (VAL_OBJ(objstring_copy_cstr(S, s))) +// en("quote", x) -> (quote x) etc +// i guess in the good universe this is just cons or whatver +static Val en(Reader *R, char *sym, Val v) { + ObjArr *a = objarr_new(R->S); + objarr_append(R->S, a, VAL_OBJ(objstring_copy_cstr(R->S, sym))); + objarr_append(R->S, a, v); + return VAL_OBJ(a); +} static Val expr(Reader *R) { skipws(R); @@ -41,9 +52,11 @@ static Val expr(Reader *R) { break; case '[': next(R); - ObjArr *a = AS_ARR(list(R, ']')); - objarr_insert(R->S, a, 0, STR(R->S, "arr")); - return VAL_OBJ(a); + return en(R, "arrlit", list(R, ']')); + break; + case '\'': + next(R); + return en(R, "quote", expr(R)); break; default: return symbol(R); break; } @@ -53,17 +66,24 @@ static Val symbol(Reader *R) { int len = 0; bool num = true; while (is_normal(*R->c)) { num &= is_num(*R->c); len += 1; next(R); } + + // i LOVE 0-terminated strings + char *tmp = malloc(len+1); + memcpy(tmp, start, len); + tmp[len] = 0; + if (num) { - // c moment - char *tmp = malloc(len+1); - memcpy(tmp, start, len); - tmp[len] = 0; int x = atoi(tmp); free(tmp); return VAL_NUM((double)x); } else { - ObjString *o = objstring_copy(R->S, start, len); - return VAL_OBJ(o); + Val v; + if (0 == strcmp(tmp, "nil")) v = VAL_NIL; + else if (0 == strcmp(tmp, "true")) v = VAL_TRUE; + else if (0 == strcmp(tmp, "false")) v = VAL_FALSE; + else v = VAL_OBJ(objstring_copy(R->S, start, len)); + free(tmp); + return v; } } static Val string(Reader *R) { @@ -73,17 +93,14 @@ static Val string(Reader *R) { while (*R->c != '"') { len += 1; next(R); } next(R); ObjString *s = objstring_copy(R->S, start, len); - ObjArr *a = objarr_new(R->S); - objarr_append(R->S, a, STR(R->S, "quote")); - objarr_append(R->S, a, VAL_OBJ(s)); - return VAL_OBJ(a); + return en(R, "quote", VAL_OBJ(s)); } static Val list(Reader *R, char closer) { ObjArr *a = objarr_new(R->S); while (true) { skipws(R); if (*R->c == closer) { - next(R); + if (closer != '\0') next(R); return VAL_OBJ(a); } objarr_append(R->S, a, expr(R)); @@ -91,8 +108,12 @@ static Val list(Reader *R, char closer) { } +// both need null terminated strings Val read_expr(State *S, char *str) { Reader r = (Reader){.S=S, .c=str}; return expr(&r); } - +ObjArr *read_exprs(State *S, char *str) { + Reader r = (Reader){.S=S, .c=str}; + return AS_ARR(list(&r, '\0')); +} diff --git a/read.h b/read.h index d4c54a2..2da946f 100644 --- a/read.h +++ b/read.h @@ -4,5 +4,6 @@ #include "state.h" #include "val.h" Val read_expr(State *S, char *str); +ObjArr *read_exprs(State *S, char *str); #endif -- cgit v1.2.3