From b8d0ee2e105727021f9466790ec07ecbfee8dff6 Mon Sep 17 00:00:00 2001 From: ubq323 Date: Thu, 20 Jun 2024 17:23:01 +0100 Subject: string interning and print statement --- com.c | 13 ++++++++++--- ht.c | 32 ++++++++++++++++++++++++++------ ht.h | 3 ++- val.c | 29 +++++++++++++++++++++++------ val.h | 2 ++ 5 files changed, 63 insertions(+), 16 deletions(-) diff --git a/com.c b/com.c index 64aee2e..b97ffc0 100644 --- a/com.c +++ b/com.c @@ -27,9 +27,15 @@ static void compile_node(State *S, Chunk *ch, AstNode a) { case AST_LIST: { AstVec l = a.as.list; #define CK(cond, msg) if (!(cond)) { puts(msg); exit(1); } - CK(l.len == 3, "can only compile binary ops"); - CK(l.vals[0].ty == AST_IDENT, "can only call ops"); - #undef CK + CK(l.vals[0].ty == AST_IDENT, "can only call ops"); + + if (0 == strcmp(l.vals[0].as.str, "print")) { + CK(l.len == 2, "print requires exactly 1 argument"); + compile_node(S, ch, l.vals[1]); + chunk_wbc(S, ch, OP_PRINT); + break; + } + CK(l.len == 3, "can only compile binary ops"); char opchar = l.vals[0].as.str[0]; Op op; switch (opchar) { @@ -47,6 +53,7 @@ static void compile_node(State *S, Chunk *ch, AstNode a) { compile_node(S, ch, l.vals[1]); compile_node(S, ch, l.vals[2]); chunk_wbc(S, ch, op); + #undef CK } } } diff --git a/ht.c b/ht.c index 474361a..09eec3e 100644 --- a/ht.c +++ b/ht.c @@ -10,7 +10,7 @@ #define FNV_PRIME 0x01000193 // fnv-1a -uint32_t hash(char *s, size_t len) { +uint32_t hash_string(char *s, size_t len) { uint32_t h = FNV_BASIS; for(int i = 0; i < len; i++) { h ^= s[i]; @@ -25,8 +25,10 @@ Ht ht_new() { static HtEntry *find(HtEntry *b, size_t cap, ObjString *k) { size_t ix = k->hash % cap; + if (cap == 0) return NULL; for (;;) { HtEntry *ent = &b[ix]; + // XXX tombstones if (ent->k == k || ent->k == NULL) { return ent; } @@ -34,6 +36,25 @@ static HtEntry *find(HtEntry *b, size_t cap, ObjString *k) { } } +ObjString *ht_findstring(State *S, Ht *h, char *s, size_t len, uint32_t hash) { + const size_t cap = h->cap; + if (cap == 0) return NULL; + size_t ix = hash % cap; + + for (;;) { + HtEntry *ent = &h->d[ix]; + // XXX tombstones + if (ent->k == NULL) + return NULL; + else if (ent->k->hash == hash + && ent->k->len == len + && memcmp(ent->k->d, s, len) == 0) + return ent->k; + + ix = (ix+1)%cap; + } +} + static void grow(State *S, Ht *h) { HtEntry *old = h->d; size_t newsz = h->cap == 0 ? 8 : h->cap * 2; @@ -74,12 +95,11 @@ void ht_put(State *S, Ht *h, ObjString *k, Val v) { Val ht_get(State *S, Ht *h, ObjString *k) { HtEntry *ent = find(h->d, h->cap, k); - if (ent->k == NULL) { + if (ent == NULL) + return VAL_NIL; + else if (ent->k == NULL) return VAL_NIL; - } else { + else return ent->v; - } } - - diff --git a/ht.h b/ht.h index e0e6560..36a1d2a 100644 --- a/ht.h +++ b/ht.h @@ -17,10 +17,11 @@ typedef struct _ht { HtEntry *d; } Ht; -uint32_t hash(char *s, size_t len); +uint32_t hash_string(char *s, size_t len); Ht ht_new(); void ht_put(State *S, Ht *h, ObjString *k, Val v); Val ht_get(State *S, Ht *h, ObjString *k); +ObjString *ht_findstring(State *S, Ht *h, char *s, size_t len, uint32_t hash); #endif diff --git a/val.c b/val.c index 22c92ce..40b7c5b 100644 --- a/val.c +++ b/val.c @@ -5,27 +5,44 @@ #include "ht.h" +static ObjString *objstring_create(State*, char*, size_t, uint32_t); + ObjString *objstring_copy(State *S, char *src, size_t len) { + uint32_t hash = hash_string(src, len); + ObjString *interned = ht_findstring(S, &S->strings, src, len, hash); + if (interned != NULL) return interned; + char *d = NEW_ARR(S, char, 1+len); memcpy(d, src, len); d[len] = '\0'; - ObjString *o = NEW_OBJ(S, ObjString, OTY_STRING); - o->len = len; - o->d = d; - o->hash = hash(d, len); - return o; + return objstring_create(S, d, len, hash); } ObjString *objstring_take(State *S, char *src, size_t len) { + uint32_t hash = hash_string(src, len); + ObjString *interned = ht_findstring(S, &S->strings, src, len, hash); + if (interned != NULL) { + FREE_ARR(S, src, char, len+1); + return interned; + }; + + return objstring_create(S, src, len, hash); +} + +static ObjString *objstring_create(State *S, char *src, size_t len, uint32_t hash) { + // assumes already deduplicated + // and data is already owned ObjString *o = NEW_OBJ(S, ObjString, OTY_STRING); o->len = len; o->d = src; - o->hash = hash(src, len); + o->hash = hash; + ht_put(S, &S->strings, o, VAL_TRUE); return o; } + void print_val(Val v) { switch (v.ty) { case TY_NIL: diff --git a/val.h b/val.h index d742c14..4c84fd9 100644 --- a/val.h +++ b/val.h @@ -71,6 +71,8 @@ ObjString *objstring_take(State *S, char *src, size_t len); #define VAL_NIL ((Val){.ty=TY_NIL}) #define VAL_NUM(x) ((Val){.ty=TY_NUM, .as.d=(x) }) #define VAL_BOOL(x) ((Val){.ty=TY_BOOL, .as.b=(x) }) +#define VAL_TRUE VAL_BOOL(1) +#define VAL_FALSE VAL_BOOL(0) #define VAL_OBJ(x) ((Val){.ty=TY_OBJ, .as.o=(Obj*)(x) }) static inline bool is_obj_ty(Val v, ObjTy t) { -- cgit v1.2.3