/*!@file \brief STON Hash Tables \details Aligned general purpose hash functions and memory definitions whose columns are provided, and whose rows, and sizes, are derived. ht_size = header.ht_columns << header.ht_2pow; ht_rows = 0x1 << header.ht_2pow; All generic hashtables in henge must have a power-of-two number of rows. An ht_columns value that is also a power-of-two will result in a power-of-two sized memory imprint for the structure, making it easy to page align. Elements in the columns may be of any arbitrary size. typedef uint32_t my_ht_type; ht_bytes = ht_size * sizeof(my_ht_type); implementation covers only 32-bit unit sizes. \author Ken Grimes \date Feb 2017 ----------------------------------------------------------------------------*/ #ifndef _STON_HT_T_ #define _STON_HT_T_ /* Define STON_NOSTATIC to expose included function symbols */ #ifndef STON_NOSTATIC #define STON_FUNC_STATIC static #else #define STON_FUNC_STATIC #endif //STON_NOSTATIC /* If GNUC is detected, uses attributes to stop inlining */ #ifdef __GNUC__ #define STON_FUNC_NOINLINE __attribute__ ((noinline)) #else #define STON_FUNC_NOINLINE #endif //__GNUC__ /* Define STON_NOINLINE to prevent inline compiler hints */ #ifndef STON_NOINLINE #define STON_FUNC_INLINE inline #else #define STON_FUNC_INLINE #endif //STON_NOINLINE /* Define STON_FUNC to override the default STON Function attributes */ #ifndef STON_FUNC #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE #endif //STON_FUNC #ifdef STON_HT_FREAD #include #include #include STON_FUNC_STATIC STON_FUNC_NOINLINE ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t)); size_t ston_ht32_fwrite(ston_ht,FILE*,long); #else #include #endif //STON_HT_FREAD #include #include //mem* /* STON Hashtable Structure Hashtables are stored as dynamically sized two dimensional arrays */ typedef struct ston_ht_header_t { uint16_t ht_columns; uint8_t ht_2pow, ht_flags; }ston_ht_h,* ston_ht; STON_FUNC uint32_t ston_up2pow(uint32_t); STON_FUNC uint8_t ston_trailing0(uint32_t); STON_FUNC ston_ht ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t)); STON_FUNC uint32_t* ston_ht32_row(ston_ht,uint32_t); STON_FUNC uint32_t ston_ht32_insert(ston_ht,uint32_t,uint16_t,uint32_t); STON_FUNC size_t ston_ht32_insertx(ston_ht,uint32_t,uint32_t*,size_t,size_t); #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN)) #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL) #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow) #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow) #define ston_ht_cols(_HT) ((_HT)->ht_columns) #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1)) #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1)) #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT)) #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT)) #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t)) /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ STON_FUNC uint32_t ston_up2pow ( uint32_t val ) { val = (val << 1) - 1; val |= val >> 1; val |= val >> 2; val |= val >> 4; val |= val >> 8; val |= val >> 16; return ++val; } /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */ STON_FUNC uint8_t ston_trailing0 ( uint32_t v ) { uint8_t c = 32; v &= -(int32_t)v; if (v) c--; if (v & 0x0000FFFF) c -= 16; if (v & 0x00FF00FF) c -= 8; if (v & 0x0F0F0F0F) c -= 4; if (v & 0x33333333) c -= 2; if (v & 0x55555555) c -= 1; return c; } /* Creates a new hash table, provided a memory allocation function that takes a single size_t bytes, a column count, and a row count which determines the size of the table. use ston_ht32_new to specify the exact or estimated number of unique keys held in the table. With ston_ht32_new, the provided ht_rows is doubled, and rounded up to the nearest power of two to create a hash table with minimal collisions. */ STON_FUNC ston_ht ston_ht32_create ( uint16_t ht_columns, uint8_t ht_2pow, uint8_t ht_flags, void* (*alloc_fn)(size_t) ) { size_t ht_bytes = (ht_columns << ht_2pow) * sizeof(uint32_t); ston_ht ht = (ston_ht) alloc_fn(sizeof(ston_ht_h) + ht_bytes); if (ht != NULL) { ht->ht_columns = ht_columns; ht->ht_2pow = ht_2pow; ht->ht_flags = ht_flags; memset(ht + 1, 0, ht_bytes); } return ht; } #ifdef STON_HT_FREAD /* Reads a 32-bit hash table out of the provided file at the provide fpos, into a buffer allocated by alloc_fn. Memory is allocated to the stack until the entire structure is verified, and all file operations are finished. Returns NULL with properly set errno on failure. */ ston_ht ston_ht32_fread ( FILE* file, long fpos, void* (*alloc_fn)(size_t) ) { struct ston_ht_header_t header; ston_ht stack_ht, ht; long fpos_start; size_t table_size, alloc_size; int errno_local; if ((fpos_start = ftell(file)) == -1) return NULL; if (fread(&header, sizeof(header), 1, file) != 1) goto fail_seekback; table_size = ston_ht32_size(&header); alloc_size = sizeof(header) + table_size; stack_ht = (ston_ht) alloca(alloc_size); memcpy(stack_ht, &header, sizeof(header)); if (fread(stack_ht + sizeof(header), table_size, 1, file) != 1) goto fail_seekback; if (fseek(file, fpos_start, SEEK_SET) != 0) return NULL; ht = (ston_ht) alloc_fn(alloc_size); if (ht != NULL) memcpy(ht, stack_ht, alloc_size); return ht; fail_seekback: /* Try to seek the file back to origin without clobbering errno */ errno_local = errno; fseek(file, fpos_start, SEEK_SET); errno = errno_local; return NULL; } /* Writes a 32-bit hash table from memory into a file at fpos. Returns the number of bytes written to the file, errno is set on error. */ size_t ston_ht32_fwrite ( struct ston_ht_header_t* ht, FILE* file, long fpos ) { size_t bytes_written; long fpos_start; if ((fpos_start = ftell(file)) == NULL || (bytes_written = fwrite(file, 1, sizeof(ston_ht_h), file)) < sizeof(ston_ht_h) || (bytes_written += fwrite(file, 1, ston_ht32_bytes(ht), file)) < (sizeof(ston_ht_h) + ston_ht32_bytes(ht))) return 0; return bytes_written; } #endif /* Returns a pointer to the row of data in the hashtable containing the provided key, inserts if not found. Returns NULL on overflow. */ STON_FUNC uint32_t* ston_ht32_row ( struct ston_ht_header_t* ht, uint32_t key ) { uint32_t* row; uint32_t* row_start = ston_ht32_start(ht); uint32_t* row_end = ston_ht32_end(ht); uint16_t ht_cols = ston_ht_cols(ht); size_t row_number = ston_ht_keyrow(ht,key); uint8_t looped = 0; row = row_start + (row_number * ht_cols); next_row: if (row[0] != 0) goto populated; write_position: row[0] = key; return row; populated: if (row[0] == key) goto write_position; if (row + ht_cols < row_end) row += ht_cols; else if (looped) return NULL; else { looped++; row = row_start; } goto next_row; } /* Inserts a value into a hashtable at the specified column, returning the previous value */ STON_FUNC uint32_t ston_ht32_insert ( struct ston_ht_header_t* ht, uint32_t key, uint16_t column, uint32_t value ) { uint32_t* value_location, old_value; value_location = ston_ht32_entry(ht,key,column); old_value = *value_location; *value_location = value; return old_value; } /* Inserts a row of units into a hashtable, starting with the specified column. Returns the number of elements that were written. This function will not overflow internal buffers, but will return a short count (lower than the provided 'units') when truncation of source data occurs. */ STON_FUNC size_t ston_ht32_insertx ( struct ston_ht_header_t* ht, uint32_t key, uint32_t* data_src, size_t start_column, size_t units ) { uint32_t* data_row = ston_ht32_row(ht,key); uint32_t* data_limit = data_row + ston_ht_cols(ht); uint32_t* data_trg = data_row + start_column; if (data_row == NULL) return 0; while (units-- && data_trg < data_limit) *data_trg++ = *data_src++; return (size_t)(data_trg - data_row); } #ifndef STON_DHT_SIZE #define STON_DHT_SIZE 4096 #endif /* STON Dynamic Hashtable Structure A dynamic form of the generic hashtable implementation above which uses external allocation. */ typedef struct ston_dht_header_t { uint16_t columns; uint8_t unit_bytes; uint8_t start_depth; }ston_dht_h; typedef struct ston_dht_t { ston_dht_h header; void* pages[sizeof(void*) * 8]; void* (*ht_alloc)(size_t); void (*ht_free)(void*); }* ston_dht; STON_FUNC ston_dht ston_dht_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t),void(*)(void*)); STON_FUNC uint32_t* ston_dht32_row(ston_dht,uint32_t); STON_FUNC uint32_t ston_dht32_insert(ston_dht,uint32_t,uint16_t,uint32_t); STON_FUNC size_t ston_dht32_insertx(ston_dht,uint32_t,uint32_t*,uint16_t,size_t); STON_FUNC ston_dht ston_dht_free(ston_dht); #define ston_dht_units(_HT,_DEPTH) ((_HT)->header.columns << _DEPTH) #define ston_dht_bytes(_HT,_DEPTH) (ston_dht_units(_HT,_DEPTH) * (_HT)->header.unit_bytes) #define ston_dht_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,3,sizeof(int),_ALOC,_FRE)) #define ston_dht_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N),sizeof(int),_ALOC,_FRE))) #define ston_dht32_entry(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL) #define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,0,sizeof(uint32_t),_ALOC,_FRE)) #define ston_dht32_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N)),sizeof(uint32_t),_ALOC,_FRE)) /* Creates a new bucketted hash table, provided a memory allocation function that takes a single size_t bytes, a memory free function, a column count, and a row count which determines the size of the buckets. */ STON_FUNC ston_dht ston_dht_create ( uint16_t columns, uint8_t start_depth, uint8_t unit_bytes, void* (*ht_alloc)(size_t), void (*ht_free)(void*) ) { ston_dht ht = (ston_dht) ht_alloc(sizeof(struct ston_dht_t)); if (ht != NULL) { ht->header.columns = columns; ht->header.start_depth = start_depth; ht->header.unit_bytes = unit_bytes; memset(ht->pages, 0, sizeof(void*) * sizeof(void*) * 8); ht->pages[start_depth] = ht_alloc(ston_dht_bytes(ht, start_depth)); ht->ht_alloc = ht_alloc; ht->ht_free = ht_free; if (ht->pages[start_depth] == NULL && ht_free != NULL) ht_free(ht); else memset(ht->pages[start_depth], 0, ston_dht_bytes(ht, start_depth)); } return ht; } /* Returns a pointer to the row of data in the hashtable containing the provided key, inserts if not found. Returns NULL on overflow. */ STON_FUNC uint32_t* ston_dht32_row ( struct ston_dht_t* ht, uint32_t key ) { uint16_t columns = ht->header.columns; uint8_t depth = ht->header.start_depth; uint32_t mask = ((0x1 << depth) - 1) >> 1; void* page; uint32_t* row; uint32_t row_key; next_page: if (ht->pages[depth] == NULL) { ht->pages[depth] = ht->ht_alloc(ston_dht_bytes(ht, depth)); if (ht->pages[depth] == NULL) return NULL; memset(ht->pages[depth], 0, ston_dht_bytes(ht, depth)); } page = ht->pages[depth]; row = (uint32_t*)page + ((key & mask) * columns); row_key = *row; if (row_key == key || row_key == 0) { row[0] = key; return row; } depth++; mask = (mask << 1) | 0x1; goto next_page; } /* Inserts a value into a hashtable at the specified column, returning the previous value */ STON_FUNC uint32_t ston_dht32_insert ( struct ston_dht_t* ht, uint32_t key, uint16_t column, uint32_t value ) { uint32_t* value_location, old_value; value_location = ston_dht32_entry(ht,key,column); old_value = *value_location; *value_location = value; return old_value; } /* Insert multiple values, returning the number of bytes written */ STON_FUNC size_t ston_dht32_insertx ( struct ston_dht_t* ht, uint32_t key, uint32_t* data_src, uint16_t start_column, size_t units ) { uint32_t* data_row = ston_dht32_row(ht,key); uint32_t* data_limit = data_row + ht->header.columns; uint32_t* data_trg = data_row + start_column; if (data_row == NULL) return 0; while (units-- && data_trg < data_limit) *data_trg++ = *data_src++; return (size_t)(data_trg - data_row); } /* Free the dynamic hash table */ STON_FUNC struct ston_dht_t* ston_dht_free ( struct ston_dht_t* ht ) { void (*ht_free)(void*) = ht->ht_free; uint8_t depth = ht->header.start_depth; void** pages = ht->pages; if (ht_free != NULL) { while (pages[depth] != NULL) ht_free(pages[depth++]); ht_free(ht); return NULL; } return ht; } /******************************************************************************** ********************************************************************************* ********************************************************************************* ********************************************************************************/ typedef struct ston_dht2_header_t { uint16_t val_bytes; uint8_t key_bytes; uint8_t flags; }ston_dht2_h; typedef struct ston_dht2_bucket_t { uint8_t depth; void* page; uint32_t count; }ston_dht2_bucket_h,* ston_dht2_bucket; #define STON_DHT_BUCKETS_SIZE (sizeof(void*) * 8) typedef struct ston_dht2_t { ston_dht2_h header; ston_dht2_bucket_h buckets[1 + STON_DHT_BUCKETS_SIZE]; ston_dht2_bucket bsp; uint32_t row_bytes; uint8_t buckets_len; void* (*ht_alloc)(size_t); void (*ht_free)(void*); }* ston_dht2; STON_FUNC ston_dht2 ston_dht2_create(uint16_t,uint8_t,void*(*)(size_t),void(*)(void*)); STON_FUNC uint32_t* ston_dht232_row(ston_dht2,uint32_t); STON_FUNC uint32_t ston_dht232_insert(ston_dht2,uint32_t,uint16_t,uint32_t); STON_FUNC size_t ston_dht232_insertx(ston_dht2,uint32_t,uint32_t*,uint16_t,size_t); STON_FUNC ston_dht2 ston_dht2_free(ston_dht2); #define ston_dht2_bytes(_HT,_DEPTH) ((_HT)->row_bytes << (_DEPTH)) #define ston_dht2_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(int),_ALOC,_FRE)) #define ston_dht232_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(uint32_t),_ALOC,_FRE)) #define ston_dht232_entry(_HT,_KEY,_COL) (ston_dht232_row(_HT,_KEY) + _COL) /* Creates a new bucketted hash table, provided a memory allocation function that takes a single size_t bytes, a memory free function, a column count, and a row count which determines the size of the buckets. */ static ston_dht2_bucket_h dummy_bucket = { (uint8_t)-1, NULL, (uint32_t)-1 }; STON_FUNC ston_dht2 ston_dht2_create ( uint16_t val_bytes, uint8_t key_bytes, void* (*ht_alloc)(size_t), void (*ht_free)(void*) ) { ston_dht2 ht = (ston_dht2) ht_alloc(sizeof(struct ston_dht2_t)); if (ht != NULL) { ht->header.val_bytes = val_bytes; ht->header.key_bytes = key_bytes; ht->row_bytes = val_bytes + key_bytes; ht->ht_alloc = ht_alloc; ht->ht_free = ht_free; size_t i; for (i = 0; i <= STON_DHT_BUCKETS_SIZE; i++) ht->buckets[i] = dummy_bucket; ht->bsp = ht->buckets + STON_DHT_BUCKETS_SIZE - 1; ht->bsp->page = ht_alloc(ston_dht2_bytes(ht, 4)); if (ht->bsp->page == NULL && ht_free != NULL) ht_free(ht); else { memset((ht->bsp->page), 0, ston_dht2_bytes(ht,4)); ht->bsp->depth = 4; ht->bsp->count = 0; ht->buckets_len = 1; } } return ht; } /* Returns a pointer to the row of data in the hashtable containing the provided key, inserting if not found, or NULL if a memory error occurs */ STON_FUNC uint32_t* ston_dht232_row ( struct ston_dht2_t* ht, uint32_t key ) { int8_t bucket_no = (int8_t)ht->buckets_len - 1; uint32_t* row, row_key, mask; size_t bytes; int8_t zero_bucket = (int8_t)-1; ston_dht2_bucket bsp = ht->bsp; ston_dht2_bucket_h bucket; next_bucket: bucket = bsp[bucket_no]; /* Find until out of allocated pages, then insert at last empty bucket position */ if (bucket.page == NULL) { if (zero_bucket != (int8_t)-1) { bucket = bsp[zero_bucket]; mask = (0x1 << bucket.depth) - 1; row = (uint32_t*)bucket.page + (key & mask); *row = key; bucket.count++; /* Swap the buckets up a level if the count has exceeded its parent's count */ if (bucket.count > bsp[zero_bucket + 1].count) { bsp[zero_bucket] = bsp[zero_bucket + 1]; bsp[zero_bucket + 1] = bucket; } else bsp[zero_bucket].count = bucket.count; return row; } /* No buckets with a slot, shift the key right by depth, try again add a new bucket */ bsp--; bucket.depth = 4 + (ht->buckets_len >> 1); bucket.count = 1; bytes = ston_dht2_bytes(ht,bucket.depth); if ((bucket.page = ht->ht_alloc(bytes)) == NULL) { printf("Failed to allocate %lu bytes, bucket %i at with len %i\n", bytes, bucket_no, ht->buckets_len); return NULL; } memset(bucket.page,0,bytes); *bsp = bucket; ht->bsp = bsp; ht->buckets_len++; mask = (0x1 << bucket.depth) - 1; row = (uint32_t*)bucket.page + (key & mask); *row = key; return row; } /* Compute mask, and use it to find the row in the page */ mask = (0x1 << bucket.depth) - 1; row = (uint32_t*)bucket.page + (key & mask); /* Look at the key at row[0], branch */ row_key = *row; if (row_key == key) return row; if (row_key == 0) zero_bucket = bucket_no; bucket_no--; goto next_bucket; } /* Inserts a value into a hashtable at the specified column, returning the previous value */ STON_FUNC uint32_t ston_dht232_insert ( struct ston_dht2_t* ht, uint32_t key, uint16_t column, uint32_t value ) { uint32_t* value_location, old_value; value_location = ston_dht232_entry(ht,key,column); old_value = *value_location; *value_location = value; return old_value; } /* Insert multiple values, returning the number of bytes written */ STON_FUNC size_t ston_dht232_insertx ( struct ston_dht2_t* ht, uint32_t key, uint32_t* data_src, uint16_t start_column, size_t units ) { uint32_t* data_row = ston_dht232_row(ht,key); uint32_t* data_limit = data_row + ht->row_bytes; uint32_t* data_trg = data_row + start_column; if (data_row == NULL) return 0; while (units-- && data_trg < data_limit) *data_trg++ = *data_src++; return (size_t)(data_trg - data_row); } /* Free the dynamic hash table */ STON_FUNC struct ston_dht2_t* ston_dht2_free ( struct ston_dht2_t* ht ) { void (*ht_free)(void*) = ht->ht_free; uint8_t bucket = ht->buckets_len; if (ht_free != NULL) { while (bucket--) ht_free(ht->buckets[bucket].page); ht_free(ht); return NULL; } return ht; } #endif //_STON_HT_H_