From 9ee73a8459eb2bb58adc29da02de312b7e4e7dca Mon Sep 17 00:00:00 2001 From: ubq323 Date: Sat, 5 Aug 2023 04:15:17 +0100 Subject: refactor hashtables, and use objstrings for keys doesn't yet work without string interning, which will require further refactoring --- Makefile | 4 +-- com.c | 2 +- ht.c | 103 +++++++++++++++++++++++++++++++++++---------------------------- ht.h | 17 ++++++----- mem.h | 1 + val.c | 14 ++++++++- val.h | 9 ++++-- vm.c | 11 ------- 8 files changed, 90 insertions(+), 71 deletions(-) diff --git a/Makefile b/Makefile index 8d9c882..02a5b97 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CS=ast.c com.c dis.c mem.c prs.c read.c val.c vm.c -HS=ast.h dis.h mem.h prs.h read.h val.h vm.h +CS=ast.c com.c dis.c ht.c mem.c prs.c read.c val.c vm.c +HS=ast.h dis.h ht.h mem.h prs.h read.h val.h vm.h CFLAGS=-O3 -Wall -Wpedantic -Werror=implicit-function-declaration bþ: $(CS) $(HS) diff --git a/com.c b/com.c index 387d709..e371e6d 100644 --- a/com.c +++ b/com.c @@ -18,7 +18,7 @@ static void compile_node(Chunk *ch, AstNode a) { break; case AST_STRING: { size_t len = strlen(a.as.str); - ObjString *o = objstring_new(a.as.str, len); + ObjString *o = objstring_copy(a.as.str, len); chunk_wbc(ch, OP_LOADK); chunk_wbc(ch, chunk_wconst(ch, VAL_OBJ(o))); break; diff --git a/ht.c b/ht.c index 44132f5..ca93222 100644 --- a/ht.c +++ b/ht.c @@ -4,73 +4,84 @@ #include #include +#include "mem.h" + #define FNV_BASIS 0x811c9dc5 #define FNV_PRIME 0x01000193 // fnv-1a -static uint32_t hash(char *s) { +uint32_t hash(char *s, size_t len) { uint32_t h = FNV_BASIS; - for(;;) { - h ^= *s; + for(int i = 0; i < len; i++) { + h ^= s[i]; h *= FNV_PRIME; - if (*s == '\0') break; - s++; } return h; } Ht ht_new() { - Ht h; - h.len = 0; - for (int i = 0; i < HT_SIZE; i++) { - h.b[i] = (HtEntry){ .k = NULL, .v=0 }; - } - return h; + return (Ht){ + .len = 0, + .cap = 0, + .b = NULL + }; } -void ht_put(Ht *h, char *k, int v) { - uint32_t hv = hash(k); - int start = hv%HT_SIZE; - for (int cur = start; cur != start - 1; cur = (cur+1)%HT_SIZE) { - HtEntry *ent = &h->b[cur]; - if (ent->k == NULL) { - printf(" p %d\n",cur); - ent->k = strdup(k); - ent->v = v; - h->len++; - break; - } else if (0 == strcmp(k, ent->k)) { - printf(" r %d\n",cur); - ent->v = v; - break; - } else { - printf(" n %d\n",cur); +static HtEntry *find(HtEntry *b, size_t cap, ObjString *k) { + size_t ix = k->hash % cap; + for (;;) { + HtEntry *ent = &b[ix]; + if (ent->k == k || ent->k == NULL) { + return ent; } + ix = (ix+1)%cap; } } -int ht_get(Ht *h, char *k, int *v) { - uint32_t hv = hash(k); - int start = hv%HT_SIZE; - for (int cur = start; cur != start - 1; cur = (cur+1)%HT_SIZE) { - HtEntry *ent = &h->b[cur]; - if (ent->k == NULL) { - return 0; // not found - } else if (0 == strncmp(ent->k, k, 80)) { - *v = ent->v; - return 1; +static void grow(Ht *h) { + HtEntry *old = h->b; + size_t newsz = h->cap == 0 ? 8 : h->cap * 2; + HtEntry *new = NEW_ARR(HtEntry, newsz); + for (int i = 0; icap; i++) { + HtEntry *oldent = &old[i]; + if (oldent->k == NULL) continue; + + HtEntry *newent = find(new, newsz, oldent->k); + newent->k = oldent->k; + newent->v = oldent->v; } } - return 0; // exhausted + + h->b = new; + FREE_ARR(old, HtEntry, h->cap); + h->cap = newsz; } -void ht_dump(Ht *h) { - printf("#%d:\n",h->len); - for (int i = 0; i < HT_SIZE; i++) { - HtEntry ent = h->b[i]; - if (ent.k != NULL) { - printf(" %s:%d\n",ent.k,ent.v); - } + +void ht_put(Ht *h, ObjString *k, Val v) { + if (h->cap == 0 || h->len >= (h->cap/2)) { + grow(h); + } + HtEntry *ent = find(h->b, h->cap, k); + if (ent->k == NULL) { + ent->k = k; + h->len++; + } + ent->v = v; +} + +Val ht_get(Ht *h, ObjString *k) { + HtEntry *ent = find(h->b, h->cap, k); + if (ent->k == NULL) { + return VAL_NIL; + } else { + return ent->v; } } diff --git a/ht.h b/ht.h index 5aaffe7..65e9012 100644 --- a/ht.h +++ b/ht.h @@ -1,22 +1,23 @@ #ifndef _ht_h #define _ht_h -#define HT_SIZE 128 +#include "val.h" typedef struct { - char *k; - int v; + ObjString *k; + Val v; } HtEntry; typedef struct { - int len; - HtEntry b[HT_SIZE]; + size_t len; + size_t cap; + HtEntry *b; } Ht; -typedef Ht Env; +uint32_t hash(char *s, size_t len); Ht ht_new(); -void ht_put(Ht *h, char *k, int v); -int ht_get(Ht *h, char *k, int *v); +void ht_put(Ht *h, ObjString *k, Val v); +Val ht_get(Ht *h, ObjString *k); #endif diff --git a/mem.h b/mem.h index 0607b14..8c528b9 100644 --- a/mem.h +++ b/mem.h @@ -12,6 +12,7 @@ void *M(void *p, size_t old, size_t new); #define NEW_OBJ(t, oty) (t*)alloc_obj(sizeof(t), oty) #define FREE(p,t) M(p, sizeof(t), 0) +#define FREE_ARR(p,t,old) M(p, (old)*sizeof(t), 0) Obj *alloc_obj(size_t sz, ObjTy oty); diff --git a/val.c b/val.c index 355aac6..530737d 100644 --- a/val.c +++ b/val.c @@ -2,15 +2,27 @@ #include #include "val.h" #include "mem.h" +#include "ht.h" -ObjString *objstring_new(char *src, size_t len) { +ObjString *objstring_copy(char *src, size_t len) { char *d = NEW_ARR(char, 1+len); memcpy(d, src, len); d[len] = '\0'; ObjString *o = NEW_OBJ(ObjString, OTY_STRING); o->len = len; o->b = d; + o->hash = hash(d, len); + + return o; +} + +ObjString *objstring_take(char *src, size_t len) { + ObjString *o = NEW_OBJ(ObjString, OTY_STRING); + o->len = len; + o->b = src; + o->hash = hash(src, len); + return o; } diff --git a/val.h b/val.h index 6e4cc18..3cf3251 100644 --- a/val.h +++ b/val.h @@ -2,6 +2,9 @@ #define _val_h #include +#include +#include + typedef struct _val Val; typedef struct _obj Obj; @@ -38,10 +41,12 @@ typedef struct _obj { typedef struct { Obj obj; size_t len; + uint32_t hash; char *b; } ObjString; -// copies data -ObjString *objstring_new(char *src, size_t len); + +ObjString *objstring_copy(char *src, size_t len); +ObjString *objstring_take(char *src, size_t len); #define IS_NIL(x) (x.ty == NIL) diff --git a/vm.c b/vm.c index 4933614..acbfaa3 100644 --- a/vm.c +++ b/vm.c @@ -55,17 +55,6 @@ Vm vm_new(Chunk *ch) { void runvm(Chunk *ch) { - // Chunk ch = chunk_new(); - - // chunk_wbc(&ch, OP_LOADK); - // chunk_wbc(&ch, chunk_wconst(&ch, VAL_NUM(10.0))); - // chunk_wbc(&ch, OP_LOADK); - // chunk_wbc(&ch, chunk_wconst(&ch, VAL_NUM(3.0))); - // chunk_wbc(&ch, OP_DIV); - // chunk_wbc(&ch, OP_PRINT); - - // chunk_wbc(&ch, OP_RET); - disasm_chunk(ch); -- cgit v1.2.3