X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fapc.git;a=blobdiff_plain;f=ston%2Fston_ht.h;h=6ab03e704a0d58f0f6e86626e56622534533c2a7;hp=0864aa7e58c1bbf5caf9448735da73cbe5f9c588;hb=HEAD;hpb=78c78f12412aa19ebdd788a5d0f13eba2ec88bc9 diff --git a/ston/ston_ht.h b/ston/ston_ht.h index 0864aa7..6ab03e7 100644 --- a/ston/ston_ht.h +++ b/ston/ston_ht.h @@ -45,47 +45,57 @@ #ifndef STON_FUNC #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE #endif //STON_FUNC -#ifndef STON_NOSTDIO +#ifdef STON_HT_FREAD #include -#include //memcpy +#include #include -#endif //STON_NOSTDIO +STON_FUNC_STATIC +STON_FUNC_NOINLINE +ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t)); +STON_FUNC_STATIC +STON_FUNC_NOINLINE +size_t ston_ht32_fwrite(ston_ht,FILE*,long); +#else +#include +#endif //STON_HT_FREAD #include +#include //mem* /* STON Hashtable Structure Hashtables are stored as dynamically sized two dimensional arrays */ typedef struct ston_ht_header_t { uint16_t ht_columns; uint8_t ht_2pow, ht_flags; -}* ston_ht; +}ston_ht_h,* ston_ht; STON_FUNC -size_t ston_up2pow(size_t); -STON_FUNC_STATIC -STON_FUNC_NOINLINE -ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t)); +uint32_t ston_up2pow(uint32_t); +STON_FUNC +uint8_t ston_trailing0(uint32_t); STON_FUNC -ston_ht ston_ht32_create(uint16_t,size_t,uint8_t,void*(*)(size_t)); +ston_ht ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t)); STON_FUNC uint32_t* ston_ht32_row(ston_ht,uint32_t); STON_FUNC uint32_t ston_ht32_insert(ston_ht,uint32_t,uint16_t,uint32_t); +STON_FUNC +size_t ston_ht32_insertx(ston_ht,uint32_t,uint32_t*,size_t,size_t); -#define ston_ht32_new(_COL,_N,_FN) ston_ht32_create(_COLS,ston_up2pow(_N << 1),0,_FN) -#define ston_ht32_col(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL) -#define ston_ht32_insertx(_HT,_KEY,_COL,_VAL) *ston_ht32_col(_HT,_KEY,_COL) = _VAL +#define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN)) +#define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL) #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow) #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow) #define ston_ht_cols(_HT) ((_HT)->ht_columns) -#define ston_ht_start(_HT) (((uint8_t*)(_HT)) + sizeof(*(_HT))) +#define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1)) #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1)) #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT)) +#define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT)) #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t)) /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ STON_FUNC -size_t ston_up2pow -( size_t val ) +uint32_t ston_up2pow +( uint32_t val ) { val = (val << 1) - 1; val |= val >> 1; val |= val >> 2; @@ -95,31 +105,49 @@ size_t ston_up2pow return ++val; } +/** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */ +STON_FUNC +uint8_t ston_trailing0 +( uint32_t v ) +{ uint8_t c = 32; + v &= -(int32_t)v; + if (v) c--; + if (v & 0x0000FFFF) c -= 16; + if (v & 0x00FF00FF) c -= 8; + if (v & 0x0F0F0F0F) c -= 4; + if (v & 0x33333333) c -= 2; + if (v & 0x55555555) c -= 1; + return c; +} + /* Creates a new hash table, provided a memory allocation function that takes a - single size_t bytes, a column count, and a row count which estimates the - number of unique keys held in the table. The provided ht_rows is doubled, - and rounded up to the nearest power of two to create a hash table with - minimal collisions. + single size_t bytes, a column count, and a row count which determines the + size of the table. + + use ston_ht32_new to specify the exact or estimated number of unique keys + held in the table. With ston_ht32_new, the provided ht_rows is doubled, and + rounded up to the nearest power of two to create a hash table with minimal + collisions. */ STON_FUNC ston_ht ston_ht32_create ( uint16_t ht_columns, - size_t ht_rows, + uint8_t ht_2pow, uint8_t ht_flags, void* (*alloc_fn)(size_t) ) -{ size_t ht_size = ston_up2pow(ht_rows << 1) * ht_columns * sizeof(uint32_t); - ston_ht ht = (ston_ht) alloc_fn(sizeof(struct ston_ht_header_t) + ht_size); +{ size_t ht_bytes = (ht_columns << ht_2pow) * sizeof(uint32_t); + ston_ht ht = (ston_ht) alloc_fn(sizeof(ston_ht_h) + ht_bytes); if (ht != NULL) - { for (ht->ht_2pow = 0; ht_size; ht->ht_2pow++) - ht_size = ht_size >> 1; - ht->ht_columns = ht_columns; + { ht->ht_columns = ht_columns; + ht->ht_2pow = ht_2pow; ht->ht_flags = ht_flags; + memset(ht + 1, 0, ht_bytes); } return ht; } -#ifndef STON_NO_STDIO +#ifdef STON_HT_FREAD /* Reads a 32-bit hash table out of the provided file at the provide fpos, into a buffer allocated by alloc_fn. Memory is allocated to the stack until the entire structure is verified, and all file operations are finished. @@ -136,40 +164,68 @@ ston_ht ston_ht32_fread ston_ht stack_ht, ht; long fpos_start; size_t table_size, alloc_size; + int errno_local; if ((fpos_start = ftell(file)) == -1) return NULL; if (fread(&header, sizeof(header), 1, file) != 1) - return NULL; + goto fail_seekback; table_size = ston_ht32_size(&header); alloc_size = sizeof(header) + table_size; stack_ht = (ston_ht) alloca(alloc_size); memcpy(stack_ht, &header, sizeof(header)); if (fread(stack_ht + sizeof(header), table_size, 1, file) != 1) - return NULL; + goto fail_seekback; if (fseek(file, fpos_start, SEEK_SET) != 0) return NULL; ht = (ston_ht) alloc_fn(alloc_size); if (ht != NULL) memcpy(ht, stack_ht, alloc_size); return ht; + fail_seekback: + /* Try to seek the file back to origin without clobbering errno */ + errno_local = errno; + fseek(file, fpos_start, SEEK_SET); + errno = errno_local; + return NULL; +} + +/* Writes a 32-bit hash table from memory into a file at fpos. Returns the + number of bytes written to the file, errno is set on error. */ +STON_FUNC_STATIC +STON_FUNC_NOINLINE +size_t ston_ht32_fwrite +( struct ston_ht_header_t* ht, + FILE* file, + long fpos +) +{ size_t bytes_written; + long fpos_start; + if ((fpos_start = ftell(file)) == NULL + || fseek(file, fpos, SEEK_SET) == 0 + || (bytes_written = fwrite(file, 1, sizeof(ston_ht_h), file)) < sizeof(ston_ht_h) + || (bytes_written += fwrite(file, 1, ston_ht32_bytes(ht), file)) < (sizeof(ston_ht_h) + ston_ht32_bytes(ht)) + || fseek(file, fpos_start, SEEK_SET) == 0) + return 0; + return bytes_written; } #endif /* Returns a pointer to the row of data in the hashtable containing the provided key, inserts if not found. Returns NULL on overflow. */ -STON_FUNC_STATIC +STON_FUNC uint32_t* ston_ht32_row ( struct ston_ht_header_t* ht, uint32_t key ) -{ uint32_t* row,* row_start = ston_ht32_start(ht); +{ uint32_t* row; + uint32_t* row_start = ston_ht32_start(ht); + uint32_t* row_end = ston_ht32_end(ht); uint16_t ht_cols = ston_ht_cols(ht); size_t row_number = ston_ht_keyrow(ht,key); - size_t row_max = ston_ht_rows(ht); uint8_t looped = 0; - next_row: row = row_start + (row_number * ht_cols); + next_row: if (row[0] != 0) goto populated; write_position: @@ -178,13 +234,13 @@ uint32_t* ston_ht32_row populated: if (row[0] == key) goto write_position; - if (row_number < row_max) - row_number++; + if (row + ht_cols < row_end) + row += ht_cols; else if (looped) return NULL; else { looped++; - row_number = 0; + row = row_start; } goto next_row; } @@ -199,34 +255,211 @@ uint32_t ston_ht32_insert uint32_t value ) { uint32_t* value_location, old_value; - value_location = ston_ht32_col(ht,key,column); + value_location = ston_ht32_entry(ht,key,column); old_value = *value_location; *value_location = value; return old_value; } +/* Inserts a row of units into a hashtable, starting with the specified column. + Returns the number of elements that were written. This function will not + overflow internal buffers, but will return a short count (lower than the + provided 'units') when truncation of source data occurs. */ +STON_FUNC +size_t +ston_ht32_insertx +( struct ston_ht_header_t* ht, + uint32_t key, + uint32_t* data_src, + size_t start_column, + size_t units +) +{ uint32_t* data_row = ston_ht32_row(ht,key); + uint32_t* data_limit = data_row + ston_ht_cols(ht); + uint32_t* data_trg = data_row + start_column; + if (data_row == NULL) + return 0; + while (units-- && data_trg < data_limit) + *data_trg++ = *data_src++; + return (size_t)(data_trg - data_row); +} + + /* STON Dynamic Hashtable Structure A dynamic form of the generic hashtable implementation above which uses external allocation. */ typedef struct ston_dht_header_t -{ uint16_t ht_columns; - uint8_t ht_2pow, ht_flags; - void (*ht_alloc)(size_t); - void* ht_pages[]; +{ uint16_t val_bytes; + uint8_t key_bytes; + uint8_t flags; +}ston_dht_h; + +typedef struct ston_dht_t +{ ston_dht_h header; + void* (*ht_alloc)(size_t); + void (*ht_free)(void*); + void (*ht_iter)(void*,void*,void*); + void* ht_user_data; + void* bucket_root; + size_t rowsize, bucketsize; }* ston_dht; +/* STON DHT API + Primary functions for creating hashtables, retrieving pointers to values, + iterating over all keys and values, and destroying hashtables. */ +STON_FUNC +ston_dht ston_dht_new(uint16_t,uint8_t,void*(*)(size_t),void(*)(void*)); +STON_FUNC +void* ston_dht_val(ston_dht,void*); +STON_FUNC +ston_dht ston_dht_free(ston_dht); +STON_FUNC +void ston_dht_iterate(ston_dht,void(*)(void*,void*,void*),void*); +/* Recursive functions intended to be called by other functions, above */ +STON_FUNC_STATIC +STON_FUNC_NOINLINE +void ston_dht_free_bucket(ston_dht,void*); +STON_FUNC_STATIC +STON_FUNC_NOINLINE +void ston_dht_iterate_r(ston_dht,void*); +// Compatibility macros - Deprecated +#define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_new(4 * _COL, 4, _ALOC, _FRE)) +#define ston_dht32_row(_HT,_K) ((uint32_t*)((uint8_t*)ston_dht_val(_HT,&(_K)) - 4)) +#define ston_dht32_insertx(_HT,_K,_VP,_OFFS,_N) \ + memcpy((uint32_t*)((uint8_t*)ston_dht_val(_HT,&(_K)) + ((_OFFS - 1) * 4)),_VP,_N * 4) + +/* New dynamic hashtable, provided value bytes, key bytes, allocator function, + and free function. Value bytes and key bytes are respectively constrained to + uint16 and uint8 so they can be aligned to hashtables encoded for + streaming */ +STON_FUNC +ston_dht ston_dht_new +( uint16_t val_bytes, + uint8_t key_bytes, + void* (*ht_alloc)(size_t), + void (*ht_free)(void*) +) +{ ston_dht ht = (ston_dht) ht_alloc(sizeof(struct ston_dht_t)); + if (ht != NULL) + { ht->header.val_bytes = val_bytes; + ht->header.key_bytes = key_bytes; + ht->rowsize = sizeof(void*) + key_bytes + val_bytes; + ht->bucketsize = ht->rowsize * 0x100; + ht->ht_alloc = ht_alloc; + ht->ht_free = ht_free; + ht->bucket_root = ht_alloc(ht->bucketsize); + if (ht->bucket_root == NULL && ht_free != NULL) + ht_free(ht); + else + memset((ht->bucket_root), 0, ht->bucketsize); + } + return ht; +} + -#define ston_dht_size(_HT) (ston_ht_size(_HT)) -#define ston_dht_rows(_HT) (ston_ht_rows(_HT)) -#define ston_dht_cols(_HT) (ston_ht_cols(_HT)) -#define ston_dht_keyrow(_HT,_KEY) (ston_ht_keyrow(_HT,_KEY)) -#define ston_dht_start(_HT) (_HT->ht_pages[0]) -#define ston_dht32_start(_HT) ((_uint32*)ston_dht_start(_HT)) -ston_dht ston_dht32_create(uint16_t,size_t,void*(*)(size_t)); -uint32_t* ston_dht32_row(ston_dht,uint32_t); -#define ston_dht32_col(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL) -uint32_t ston_dht32_insert(ston_dht,uint32_t,uint16_t,uint32_t); -#define ston_dht32_insertx(_HT,_KEY,_COL,_VAL) *ston_dht32_col(_HT,_KEY,_COL) = _VAL +/* Returns a pointer to the value in the hashtable matching the provided key, + inserting if not found, or NULL if a memory error occurs */ +STON_FUNC +void* ston_dht_val +( struct ston_dht_t* ht, + void* key +) +{ size_t key_bytes = ht->header.key_bytes; + uint8_t* key_byte = (uint8_t*)key; + uint8_t* bucket = (uint8_t*)ht->bucket_root; + uint8_t** bucketp; + uint8_t* row,* a,* b; + uint8_t a_not_empty; + size_t i; + next_row: + row = bucket + (ht->rowsize * (*key_byte)); + a = row + sizeof(void*); + b = (uint8_t*)key; + a_not_empty = 0; + for (i = 0; i < key_bytes; i++) + { a_not_empty |= a[i]; + if (a_not_empty && a[i] != b[i]) + goto next_bucket; + } + if (!a_not_empty) + memcpy(row + sizeof(void*),key,key_bytes); + goto done; + next_bucket: + key_byte++; + bucketp = (uint8_t**)row; + if (*bucketp == NULL) + { if ((*bucketp = ht->ht_alloc(ht->bucketsize)) == NULL) + return NULL; + else + memset(*bucketp,0,ht->bucketsize); + } + bucket = *bucketp; + goto next_row; + done: + return (void*) row + sizeof(void*) + key_bytes; +} +/* Recursively frees all memory stored in the hashtable, and the hashtable + itself */ +STON_FUNC +struct ston_dht_t* ston_dht_free +( struct ston_dht_t* ht ) +{ void (*ht_free)(void*) = ht->ht_free; + if (ht_free == NULL) + return NULL; + ston_dht_free_bucket(ht, ht->bucket_root); + ht_free(ht); + return ht; +} + +/* Recursive free function for nested buckets */ +STON_FUNC_STATIC +STON_FUNC_NOINLINE +void ston_dht_free_bucket +( struct ston_dht_t* ht, + void* bucket +) +{ void** bucket_cur = (void**)((uint8_t*)bucket); + void** bucket_max = (void**)((uint8_t*)bucket_cur + (ht->rowsize * 0x100)); + while (bucket_cur < bucket_max) + { if (*bucket_cur != NULL) + ston_dht_free_bucket(ht, *bucket_cur); + bucket_cur = (void**)((uint8_t*)bucket_cur + ht->rowsize); + } + ht->ht_free(bucket); +} + +/* Iterate over each key/value pair and execut 'fn' with key, value and + user_data as its arguments. user_data may be anything, even NULL, and is + expected to be referenced inside the body of 'fn' as the third argument of + 'fn' */ +STON_FUNC +void ston_dht_iterate +( struct ston_dht_t* ht, + void (*fn)(void*,void*,void*), + void* user_data +) +{ ht->ht_iter = fn; + ht->ht_user_data = user_data; + ston_dht_iterate_r(ht,ht->bucket_root); +} + +/* Recursively iterate through the given bucket belonging to hashtable ht */ +STON_FUNC_STATIC +STON_FUNC_NOINLINE +void ston_dht_iterate_r +( struct ston_dht_t* ht, + void* bucket +) +{ uint8_t* row = (uint8_t*)bucket; + uint8_t* row_max = (row + (ht->rowsize * 0x100)); + while (row < row_max) + { if (*(void**)row != NULL) + ston_dht_iterate_r(ht, *(void**)row); + row += sizeof(void*); + ht->ht_iter((void*)row, (void*)(row + ht->header.key_bytes),ht->ht_user_data); + row += ht->header.key_bytes + ht->header.val_bytes; + } +} #endif //_STON_HT_H_