summaryrefslogtreecommitdiff
path: root/com.c
diff options
context:
space:
mode:
authorubq323 <ubq323@ubq323.website>2024-08-06 16:44:49 +0100
committerubq323 <ubq323@ubq323.website>2024-08-06 16:48:32 +0100
commita266b97829ebe698a4256890fdf1214e479fe1de (patch)
tree61cc3d7aaae0933ffa3081f88f7514c086b14ae6 /com.c
parent000e8ca43f4968412ed5c8fc514bb6caa3e5c450 (diff)
rearrange and refactor compiler
part 1/? of making compiler use badthing objects instead of separate ast
Diffstat (limited to 'com.c')
-rw-r--r--com.c371
1 files changed, 154 insertions, 217 deletions
diff --git a/com.c b/com.c
index 10774c8..38a4196 100644
--- a/com.c
+++ b/com.c
@@ -15,6 +15,11 @@
#define BYTECODE(C) (C->ch->bc)
+enum flags {
+ F_toplevel = 1,
+ F_tail = 2,
+};
+
Chunk chunk_new(State *S) {
return (Chunk){ 0 };
}
@@ -28,10 +33,6 @@ Compiler compiler_new(Compiler *outer, Chunk *ch) {
};
}
-static void compile_byte(Compiler *C, uint8_t byte);
-static void compile_opcode(Compiler *C, Op opcode);
-static size_t compile_constant(Compiler *C, Val v);
-
static int stack_effect_of(Op opcode) {
switch (opcode) {
case OP_LOADK:
@@ -68,73 +69,65 @@ static int stack_effect_of(Op opcode) {
case OP_CALL:
case OP_TAILCALL:
case OP_ENDSCOPE:
- return 0;
+ ERROR("stack effect of opcode %d not constant",opcode);
+ break;
default:
ERROR("unknown stack effect of opcode %d",opcode);
+ break;
}
}
-
-// compile that byte directly
-static void compile_byte(Compiler *C, uint8_t byte) {
+// compilation of things
+static void cpl_byte(Compiler *C, uint8_t byte) {
Chunk *ch = C->ch;
- if (ch->bc.len == ch->bc.cap) {
- size_t newsz = (ch->bc.cap == 0 ? 8 : ch->bc.cap * 2);
- ch->bc.d = RENEW_ARR(C->S, ch->bc.d, uint8_t, ch->bc.cap, newsz);
- ch->bc.cap = newsz;
- }
- size_t ix = ch->bc.len;
- ch->bc.d[ix] = byte;
- ch->bc.len ++;
+ ENSURE_CAP(C->S, ch->bc, uint8_t, ch->bc.len+1);
+ ch->bc.d[ch->bc.len++] = byte;
}
-// compile an opcode, keeping track of its stack effect
-static void compile_opcode(Compiler *C, Op opcode) {
- int stack_effect = stack_effect_of(opcode);
+static void cpl_op(Compiler *C, Op op) {
+ int stack_effect = stack_effect_of(op);
C->stack_cur += stack_effect;
- compile_byte(C, opcode);
+ cpl_byte(C, op);
}
-// add a new constant to the constant table, and return its index
-// (but return the index of any existing identical constant instead of
-// inserting a duplicate)
-static size_t compile_constant(Compiler *C, Val v) {
+// particular instructions
+static uint8_t cpl_const(Compiler *C, Val v) {
Chunk *ch = C->ch;
- for (int i = 0; i < ch->consts.len; i ++)
- if (val_equal(v, ch->consts.d[i])) return i;
- if (ch->consts.len == ch->consts.cap) {
- size_t newsz = (ch->consts.cap == 0 ? 8 : ch->consts.cap *2);
- ch->consts.d = RENEW_ARR(C->S, ch->consts.d, Val, ch->consts.cap, newsz);
- ch->consts.cap = newsz;
+ for (int i = 0; i < ch->consts.len; i++) {
+ if (val_equal(v, ch-consts.d[i])) {
+ return i;
+ }
}
- size_t ix = ch->consts.len;
- ch->consts.d[ix] = v;
- ch->consts.len ++;
+ CHECK(ch->consts.len < 256, "maximum number of constants per function reached");
+
+ ENSURE_CAP(C->S, ch->consts, Val, ch->consts.len+1);
+ uint8_t ix = ch->consts.len;
+ ch->consts.d[ch->consts.len++] = v;
return ix;
}
-
-// len is 1 + number of args
-static void compile_call_instr(Compiler *C, uint8_t len) {
- compile_opcode(C, OP_CALL);
- compile_byte(C, len);
+static void cpl_constop(Compiler *C, Op op, Val v) {
+ cpl_op(C, op);
+ cpl_byte(C, cpl_const(C, v));
+}
+static void cpl_call(Compiler *C, uint8_t len) {
+ cpl_byte(C, OP_CALL);
+ cpl_byte(C, len);
C->stack_cur -= len - 1;
}
-
-static void compile_tailcall_instr(Compiler *C, uint8_t len) {
- compile_opcode(C, OP_TAILCALL);
- compile_byte(C, len);
+static void cpl_tailcall(Compiler *C, uint8_t len) {
+ cpl_byte(C, OP_TAILCALL);
+ cpl_byte(C, len);
C->stack_cur -= len - 1;
}
-
-static void compile_endscope_instr(Compiler *C, uint8_t nlocals) {
+static void cpl_endscope(Compiler *C, uint8_t nlocals) {
if (nlocals > 0) {
- compile_opcode(C, OP_ENDSCOPE);
- compile_byte(C, nlocals);
+ cpl_byte(C, OP_ENDSCOPE);
+ cpl_byte(C, nlocals);
C->stack_cur -= nlocals;
}
}
-
+// jump offsets and things
static size_t placeholder(Compiler *C) {
size_t old_ix = BYTECODE(C).len;
compile_byte(C, 0x00);
@@ -145,20 +138,53 @@ static void patch(Compiler *C, size_t addr, uint16_t val) {
BYTECODE(C).d[addr] = val & 0xff;
BYTECODE(C).d[addr+1] = (val & 0xff00) >> 8;
}
+// scopes and locals
+static void begin_scope(Compiler *C) {
+ Scope *sc = malloc(sizeof(Scope));
+ CHECK(sc != NULL, "memory fail");
+ memset(sc, 0, sizeof(Scope));
+ sc->outer = C->scope;
+ C->scope = sc;
+}
+static void end_scope(Compiler *C) {
+ Scope *sc = C->scope;
+ CHECK(sc != NULL, "attempt to end nonexistent scope");
+ C->scope = sc->outer;
+ // printf("ending scope with %d locals, named: \n", sc->nlocals);
+ // for (int i = 0; i < sc->nlocals; i++) {
+ // Local loc = sc->locals[i];
+ // printf("\t%3d %s\n",loc.slot, loc.name);
+ // }
-// ----
-
-enum flags {
- F_toplevel = 1,
- F_tail = 2,
-};
-
-static void compile_node(Compiler *C, AstNode a, int flags);
-static void begin_scope(Compiler *C);
-static void end_scope(Compiler *C);
-
-typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags);
+ compile_endscope_instr(C, sc->nlocals);
+ free(sc);
+}
+// returns slot of declared local
+static uint8_t declare_local(Compiler *C, char *name) {
+ Scope *sc = C->scope;
+ CHECK(sc != NULL, "can't declare local outside of scope");
+ Local *l = &sc->locals[sc->nlocals++];
+ l->name = name;
+ // -1 because local is expected to be already on the stack
+ // ie sitting just below where stack_cur points
+ uint8_t slot = C->stack_cur - 1;
+ l->slot = slot;
+ // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur);
+ return slot;
+}
+static Local *locate_local(Compiler *C, char *name) {
+ for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) {
+ for (int i = 0; i < sc->nlocals; i++) {
+ Local *loc = &sc->locals[i];
+ if (0 == strcmp(loc->name, name))
+ return loc;
+ }
+ }
+ return NULL;
+}
+// compiles a body, ie sequence of "toplevel" expressions in which
+// declarations are allowed
static void compile_body(Compiler *C, AstVec l, int startat, int flags) {
begin_scope(C);
for (int i = startat; i < l.len - 1; i++) {
@@ -169,14 +195,11 @@ static void compile_body(Compiler *C, AstVec l, int startat, int flags) {
end_scope(C);
}
-void single_form(Compiler *C, AstVec l, Op op) {
- compile_node(C, l.vals[1], 0);
- compile_opcode(C, op);
-}
-static Local *locate_local(Compiler *C, char *name);
-void set_form(Compiler *C, AstVec l, Op _, int flags) {
+
+
+static void set_form(Compiler *C, AstVec l, Op _, int flags) {
AstNode target = l.vals[1];
if (target.ty == AST_IDENT) {
// set variable, local or global
@@ -205,11 +228,11 @@ void set_form(Compiler *C, AstVec l, Op _, int flags) {
}
}
-void do_form(Compiler *C, AstVec l, Op _, int flags) {
+static void do_form(Compiler *C, AstVec l, Op _, int flags) {
compile_body(C, l, 1, flags);
}
-void if_form(Compiler *C, AstVec l, Op _, int flags) {
+static void if_form(Compiler *C, AstVec l, Op _, int flags) {
// (if cond if-true if-false)
// cond
// 0branch ->A
@@ -246,7 +269,7 @@ void if_form(Compiler *C, AstVec l, Op _, int flags) {
CHECK(stack_cur_a == orig_stack_cur + 1, "this should never happen");
}
-void while_form(Compiler *C, AstVec l, Op _, int flags) {
+static void while_form(Compiler *C, AstVec l, Op _, int flags) {
// (while cond body ...)
// A:
// cond
@@ -272,59 +295,15 @@ void while_form(Compiler *C, AstVec l, Op _, int flags) {
-void arith_form(Compiler *C, AstVec l, Op op, int flags) {
+static void arith_form(Compiler *C, AstVec l, Op op, int flags) {
compile_node(C, l.vals[1], 0);
compile_node(C, l.vals[2], 0);
compile_opcode(C, op);
}
-static void begin_scope(Compiler *C) {
- Scope *sc = malloc(sizeof(Scope));
- CHECK(sc != NULL, "memory fail");
- memset(sc, 0, sizeof(Scope));
- sc->outer = C->scope;
- C->scope = sc;
-}
-static void end_scope(Compiler *C) {
- Scope *sc = C->scope;
- CHECK(sc != NULL, "attempt to end nonexistent scope");
- C->scope = sc->outer;
- // printf("ending scope with %d locals, named: \n", sc->nlocals);
- // for (int i = 0; i < sc->nlocals; i++) {
- // Local loc = sc->locals[i];
- // printf("\t%3d %s\n",loc.slot, loc.name);
- // }
- compile_endscope_instr(C, sc->nlocals);
-
- free(sc);
-}
-// returns slot of declared local
-static uint8_t declare_local(Compiler *C, char *name) {
- Scope *sc = C->scope;
- CHECK(sc != NULL, "can't declare local outside of scope");
- Local *l = &sc->locals[sc->nlocals++];
- l->name = name;
- // -1 because local is expected to be already on the stack
- // ie sitting just below where stack_cur points
- uint8_t slot = C->stack_cur - 1;
- l->slot = slot;
- // printf("declaring local %s at %d, stack_cur is %d\n",l->name, l->slot, C->stack_cur);
- return slot;
-}
-static Local *locate_local(Compiler *C, char *name) {
- for (Scope *sc = C->scope; sc != NULL; sc = sc->outer) {
- for (int i = 0; i < sc->nlocals; i++) {
- Local *loc = &sc->locals[i];
- if (0 == strcmp(loc->name, name))
- return loc;
- }
- }
- return NULL;
-}
-
-void let_form(Compiler *C, AstVec l, Op _, int flags) {
+static void let_form(Compiler *C, AstVec l, Op _, int flags) {
CHECK(l.vals[1].ty == AST_LIST, "let's first argument must be list");
AstVec bindlist = l.vals[1].as.list;
CHECK(bindlist.len % 2 == 0, "unmatched binding in let");
@@ -346,7 +325,7 @@ void let_form(Compiler *C, AstVec l, Op _, int flags) {
end_scope(C);
}
-void for_form(Compiler *C, AstVec l, Op _, int flags) {
+static void for_form(Compiler *C, AstVec l, Op _, int flags) {
// (for (x n) ...)
CHECK(l.vals[1].ty == AST_LIST, "for needs binding list");
AstVec blist = l.vals[1].as.list;
@@ -402,7 +381,7 @@ void for_form(Compiler *C, AstVec l, Op _, int flags) {
end_scope(C);
}
-void each_form(Compiler *C, AstVec l, Op _, int flags) {
+static void each_form(Compiler *C, AstVec l, Op _, int flags) {
// (each (x a) ...)
// returns nil, for now
CHECK(l.vals[1].ty == AST_LIST, "each needs binding list");
@@ -487,7 +466,7 @@ void each_form(Compiler *C, AstVec l, Op _, int flags) {
}
-void def_form(Compiler *C, AstVec l, Op _, int flags) {
+static void def_form(Compiler *C, AstVec l, Op _, int flags) {
CHECK(l.vals[1].ty == AST_IDENT, "def's first argument must be ident");
CHECK(flags & F_toplevel, "def only allowed at top level");
compile_node(C, l.vals[2], 0);
@@ -498,7 +477,7 @@ void def_form(Compiler *C, AstVec l, Op _, int flags) {
compile_opcode(C, OP_NIL);
}
-void fn_form(Compiler *C, AstVec l, Op _, int flags) {
+static void fn_form(Compiler *C, AstVec l, Op _, int flags) {
// (fn (arg arg arg) body ...)
CHECK(l.vals[1].ty == AST_LIST, "fn's first argument must be list");
AstVec arglist = l.vals[1].as.list;
@@ -530,7 +509,7 @@ void fn_form(Compiler *C, AstVec l, Op _, int flags) {
compile_byte(C, compile_constant(C, VAL_OBJ(func)));
}
-void defn_form(Compiler *C, AstVec l, Op _, int flags) {
+static void defn_form(Compiler *C, AstVec l, Op _, int flags) {
// todo: reduce redundancy
CHECK(l.vals[1].ty == AST_LIST, "defns first arg must be list");
AstVec blist = l.vals[1].as.list;
@@ -564,6 +543,7 @@ void defn_form(Compiler *C, AstVec l, Op _, int flags) {
compile_opcode(C, OP_NIL);
}
+typedef void (*form_compiler)(Compiler *C, AstVec l, Op op, int flags);
typedef struct {
char *name;
int min_params;
@@ -596,110 +576,66 @@ static BuiltinForm builtin_forms[] = {
{ 0 },
};
-typedef struct {
- char *name;
- Op op;
-} BuiltinIdent;
-static BuiltinIdent builtin_idents[] = {
- { "true", OP_TRUE },
- { "false", OP_FALSE },
- { "nil", OP_NIL },
- { 0 },
-};
-
-
+static BuiltinForm *find_builtinform(char *name) {
+ for (BuiltinForm *b = builtin_forms; b->name != NULL; b++)
+ if (0 == strcmp(b->name, name)) return b;
+ return NULL;
+}
-static void compile_node(Compiler *C, AstNode a, int flags) {
- switch (a.ty) {
- case AST_IDENT:;
- char *ident = a.as.str;
- bool found_builtin = false;
- for (BuiltinIdent *b = builtin_idents; b->name != NULL; b++) {
- if (0 == strcmp(b->name, ident)) {
- compile_opcode(C, b->op);
- found_builtin = true;
- break;
- }
- }
- if (!found_builtin) {
- Local *loc = locate_local(C, ident);
- if (loc != NULL) {
- compile_opcode(C, OP_GETLOCAL);
- compile_byte(C, loc->slot);
- } else {
- // read global
- ObjString *o = objstring_copy_cstr(C->S, a.as.str);
- compile_opcode(C, OP_GETGLOBAL);
- compile_byte(C, compile_constant(C, VAL_OBJ(o)));
- }
- }
- break;
- case AST_NUM:
- compile_opcode(C, OP_LOADK);
- compile_byte(C, compile_constant(C, VAL_NUM(a.as.num)));
- break;
- case AST_STRING: {
- ObjString *o = objstring_copy_cstr(C->S, a.as.str);
- compile_opcode(C, OP_LOADK);
- compile_byte(C, compile_constant(C, VAL_OBJ(o)));
+static void compile_expr(Compiler *C, Val v, int flags) {
+ switch (val_type(v)) {
+ case TY_NUM: case TY_NIL: case TY_BOOL:
+ cpl_constop(C, OP_LOADK, v);
break;
- }
- case AST_ARR: {
- compile_opcode(C, OP_ARRNEW);
- AstVec v = a.as.list;
- for (int i = 0; i < v.len; i++) {
- compile_node(C, v.vals[i], 0);
- compile_opcode(C, OP_ARRAPPEND);
+ case OTY_STRING:;
+ Local *loc = locate_local(C, AS_CSTRING(v));
+ if (loc) {
+ cpl_op(C, OP_GETLOCAL);
+ cpl_byte(C, loc->slot);
+ } else {
+ cpl_constop(C, OP_GETGLOBAL, v);
}
break;
- }
-
- case AST_LIST: {
- AstVec l = a.as.list;
-
- CHECK(l.len > 0, "can't handle empty list");
-
+ case OTY_ARR:;
+ ObjArr *a = AS_ARR(v);
+ size_t len = a->len;
+ CHECK(len > 0, "can't handle empty array");
+ Val first = a->d[0];
BuiltinForm *form = NULL;
+ if (IS_STRING(first))
+ form = find_builtinform(AS_CSTRING(first));
- if (l.vals[0].ty == AST_IDENT) {
- char *head = l.vals[0].as.str;
- for (BuiltinForm *b = builtin_forms; b->name != NULL; b++) {
- if (0 == strcmp(b->name, head)) {
- form = b;
- break;
- }
- }
- }
-
- if (form != NULL) {
- size_t nparams = l.len - 1;
- if (form->ellipsis)
- CHECK(nparams >= form->min_params, "%s requires at least %d parameters",
+ if (form) {
+ size_t nargs = len - 1;
+ if (form->ellipsis)
+ CHECK(nargs >= form->min_params, "%s requires at least %d args",
form->name, form->min_params);
- else
- CHECK(nparams == form->min_params, "%s requires exactly %d parameters",
+ else
+ CHECK(nargs == form->min_params, "%s requires exactly %d args",
form->name, form->min_params);
-
- form->action(C, l, form->op, flags);
+ form->action(C, a, form->op, flags);
} else {
// function call
- // (f a b c )
- CHECK(l.len < 256, "max 255 args in a function call");
- for (int i = 0; i < l.len; i++) {
- compile_node(C, l.vals[i], 0);
- }
- if (flags & F_tail) {
- compile_tailcall_instr(C, l.len);
- } else {
- compile_call_instr(C, l.len);
- }
+ CHECK(len < 256, "max 255 args in a function call");
+ for (int i = 0; i a->len; i++)
+ compile_expr(C, a->d[i], 0);
+ if (flags & F_tail)
+ cpl_tailcall(C, len);
+ else
+ cpl_call(C, len);
}
-
break;
}
- }
}
+
+
+
+
+
+
+static char buf[8193];
+
int main(int argc, char **argv) {
State st = state_new();
State *S = &st;
@@ -748,15 +684,16 @@ int main(int argc, char **argv) {
exit(1);
}
- AstNode an = { 0 };
- AstNode top = astnode_new_list();
- pcc_context_t *parser = pcc_create(infile);
- while (pcc_parse(parser, &an)) {
- astnode_append(&top, an);
- }
- pcc_destroy(parser);
- compile_body(&com, top.as.list, 0, 0);
- astnode_free(&top);
+
+ fread(buf, 1, 8192, infile);
+ buf[8192] = '\0';
+ ObjArr *top = read_exprs(S, buf);
+
+ println_val(VAL_OBJ(top));
+
+ // pcc_destroy(parser);
+ // compile_body(&com, top.as.list, 0, 0);
+ // astnode_free(&top);
}