comments updated
[henge/apc.git] / ston / ston_ht.h
1 /*!@file
2 \brief STON Hash Tables
3 \details Aligned general purpose hash functions and memory definitions
4 whose columns are provided, and whose rows, and sizes, are derived.
5
6 ht_size = header.ht_columns << header.ht_2pow;
7 ht_rows = 0x1 << header.ht_2pow;
8
9 All generic hashtables in henge must have a power-of-two number of
10 rows. An ht_columns value that is also a power-of-two will result in
11 a power-of-two sized memory imprint for the structure, making it easy
12 to page align.
13
14 Elements in the columns may be of any arbitrary size.
15
16 typedef uint32_t my_ht_type;
17 ht_bytes = ht_size * sizeof(my_ht_type);
18
19 implementation covers only 32-bit unit sizes.
20
21 \author Ken Grimes
22 \date Feb 2017
23 ----------------------------------------------------------------------------*/
24 #ifndef _STON_HT_T_
25 #define _STON_HT_T_
26 /* Define STON_NOSTATIC to expose included function symbols */
27 #ifndef STON_NOSTATIC
28 #define STON_FUNC_STATIC static
29 #else
30 #define STON_FUNC_STATIC
31 #endif //STON_NOSTATIC
32 /* If GNUC is detected, uses attributes to stop inlining */
33 #ifdef __GNUC__
34 #define STON_FUNC_NOINLINE __attribute__ ((noinline))
35 #else
36 #define STON_FUNC_NOINLINE
37 #endif //__GNUC__
38 /* Define STON_NOINLINE to prevent inline compiler hints */
39 #ifndef STON_NOINLINE
40 #define STON_FUNC_INLINE inline
41 #else
42 #define STON_FUNC_INLINE
43 #endif //STON_NOINLINE
44 /* Define STON_FUNC to override the default STON Function attributes */
45 #ifndef STON_FUNC
46 #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE
47 #endif //STON_FUNC
48 #ifdef STON_HT_FREAD
49 #include <stdio.h>
50 #include <errno.h>
51 #include <alloca.h>
52 STON_FUNC_STATIC
53 STON_FUNC_NOINLINE
54 ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t));
55 STON_FUNC_STATIC
56 STON_FUNC_NOINLINE
57 size_t ston_ht32_fwrite(ston_ht,FILE*,long);
58 #else
59 #include <stddef.h>
60 #endif //STON_HT_FREAD
61 #include <stdint.h>
62 #include <string.h> //mem*
63 /* STON Hashtable Structure
64 Hashtables are stored as dynamically sized two dimensional arrays
65 */
66 typedef struct ston_ht_header_t
67 { uint16_t ht_columns;
68 uint8_t ht_2pow, ht_flags;
69 }ston_ht_h,* ston_ht;
70
71 STON_FUNC
72 uint32_t ston_up2pow(uint32_t);
73 STON_FUNC
74 uint8_t ston_trailing0(uint32_t);
75 STON_FUNC
76 ston_ht ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t));
77 STON_FUNC
78 uint32_t* ston_ht32_row(ston_ht,uint32_t);
79 STON_FUNC
80 uint32_t ston_ht32_insert(ston_ht,uint32_t,uint16_t,uint32_t);
81 STON_FUNC
82 size_t ston_ht32_insertx(ston_ht,uint32_t,uint32_t*,size_t,size_t);
83
84 #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN))
85 #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL)
86 #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow)
87 #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow)
88 #define ston_ht_cols(_HT) ((_HT)->ht_columns)
89 #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1))
90 #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1))
91 #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT))
92 #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT))
93 #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t))
94
95 /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
96 STON_FUNC
97 uint32_t ston_up2pow
98 ( uint32_t val )
99 { val = (val << 1) - 1;
100 val |= val >> 1;
101 val |= val >> 2;
102 val |= val >> 4;
103 val |= val >> 8;
104 val |= val >> 16;
105 return ++val;
106 }
107
108 /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */
109 STON_FUNC
110 uint8_t ston_trailing0
111 ( uint32_t v )
112 { uint8_t c = 32;
113 v &= -(int32_t)v;
114 if (v) c--;
115 if (v & 0x0000FFFF) c -= 16;
116 if (v & 0x00FF00FF) c -= 8;
117 if (v & 0x0F0F0F0F) c -= 4;
118 if (v & 0x33333333) c -= 2;
119 if (v & 0x55555555) c -= 1;
120 return c;
121 }
122
123 /* Creates a new hash table, provided a memory allocation function that takes a
124 single size_t bytes, a column count, and a row count which determines the
125 size of the table.
126
127 use ston_ht32_new to specify the exact or estimated number of unique keys
128 held in the table. With ston_ht32_new, the provided ht_rows is doubled, and
129 rounded up to the nearest power of two to create a hash table with minimal
130 collisions.
131 */
132 STON_FUNC
133 ston_ht ston_ht32_create
134 ( uint16_t ht_columns,
135 uint8_t ht_2pow,
136 uint8_t ht_flags,
137 void* (*alloc_fn)(size_t)
138 )
139 { size_t ht_bytes = (ht_columns << ht_2pow) * sizeof(uint32_t);
140 ston_ht ht = (ston_ht) alloc_fn(sizeof(ston_ht_h) + ht_bytes);
141 if (ht != NULL)
142 { ht->ht_columns = ht_columns;
143 ht->ht_2pow = ht_2pow;
144 ht->ht_flags = ht_flags;
145 memset(ht + 1, 0, ht_bytes);
146 }
147 return ht;
148 }
149
150 #ifdef STON_HT_FREAD
151 /* Reads a 32-bit hash table out of the provided file at the provide fpos, into
152 a buffer allocated by alloc_fn. Memory is allocated to the stack until the
153 entire structure is verified, and all file operations are finished.
154 Returns NULL with properly set errno on failure.
155 */
156 STON_FUNC_STATIC
157 STON_FUNC_NOINLINE
158 ston_ht ston_ht32_fread
159 ( FILE* file,
160 long fpos,
161 void* (*alloc_fn)(size_t)
162 )
163 { struct ston_ht_header_t header;
164 ston_ht stack_ht, ht;
165 long fpos_start;
166 size_t table_size, alloc_size;
167 int errno_local;
168 if ((fpos_start = ftell(file)) == -1)
169 return NULL;
170 if (fread(&header, sizeof(header), 1, file) != 1)
171 goto fail_seekback;
172 table_size = ston_ht32_size(&header);
173 alloc_size = sizeof(header) + table_size;
174 stack_ht = (ston_ht) alloca(alloc_size);
175 memcpy(stack_ht, &header, sizeof(header));
176 if (fread(stack_ht + sizeof(header), table_size, 1, file) != 1)
177 goto fail_seekback;
178 if (fseek(file, fpos_start, SEEK_SET) != 0)
179 return NULL;
180 ht = (ston_ht) alloc_fn(alloc_size);
181 if (ht != NULL)
182 memcpy(ht, stack_ht, alloc_size);
183 return ht;
184 fail_seekback:
185 /* Try to seek the file back to origin without clobbering errno */
186 errno_local = errno;
187 fseek(file, fpos_start, SEEK_SET);
188 errno = errno_local;
189 return NULL;
190 }
191
192 /* Writes a 32-bit hash table from memory into a file at fpos. Returns the
193 number of bytes written to the file, errno is set on error. */
194 STON_FUNC_STATIC
195 STON_FUNC_NOINLINE
196 size_t ston_ht32_fwrite
197 ( struct ston_ht_header_t* ht,
198 FILE* file,
199 long fpos
200 )
201 { size_t bytes_written;
202 long fpos_start;
203 if ((fpos_start = ftell(file)) == NULL
204 || fseek(file, fpos, SEEK_SET) == 0
205 || (bytes_written = fwrite(file, 1, sizeof(ston_ht_h), file)) < sizeof(ston_ht_h)
206 || (bytes_written += fwrite(file, 1, ston_ht32_bytes(ht), file)) < (sizeof(ston_ht_h) + ston_ht32_bytes(ht))
207 || fseek(file, fpos_start, SEEK_SET) == 0)
208 return 0;
209 return bytes_written;
210 }
211 #endif
212
213 /* Returns a pointer to the row of data in the hashtable containing the provided
214 key, inserts if not found. Returns NULL on overflow.
215 */
216 STON_FUNC
217 uint32_t* ston_ht32_row
218 ( struct ston_ht_header_t* ht,
219 uint32_t key
220 )
221 { uint32_t* row;
222 uint32_t* row_start = ston_ht32_start(ht);
223 uint32_t* row_end = ston_ht32_end(ht);
224 uint16_t ht_cols = ston_ht_cols(ht);
225 size_t row_number = ston_ht_keyrow(ht,key);
226 uint8_t looped = 0;
227 row = row_start + (row_number * ht_cols);
228 next_row:
229 if (row[0] != 0)
230 goto populated;
231 write_position:
232 row[0] = key;
233 return row;
234 populated:
235 if (row[0] == key)
236 goto write_position;
237 if (row + ht_cols < row_end)
238 row += ht_cols;
239 else if (looped)
240 return NULL;
241 else
242 { looped++;
243 row = row_start;
244 }
245 goto next_row;
246 }
247
248 /* Inserts a value into a hashtable at the specified column, returning the
249 previous value */
250 STON_FUNC
251 uint32_t ston_ht32_insert
252 ( struct ston_ht_header_t* ht,
253 uint32_t key,
254 uint16_t column,
255 uint32_t value
256 )
257 { uint32_t* value_location, old_value;
258 value_location = ston_ht32_entry(ht,key,column);
259 old_value = *value_location;
260 *value_location = value;
261 return old_value;
262 }
263
264 /* Inserts a row of units into a hashtable, starting with the specified column.
265 Returns the number of elements that were written. This function will not
266 overflow internal buffers, but will return a short count (lower than the
267 provided 'units') when truncation of source data occurs. */
268 STON_FUNC
269 size_t
270 ston_ht32_insertx
271 ( struct ston_ht_header_t* ht,
272 uint32_t key,
273 uint32_t* data_src,
274 size_t start_column,
275 size_t units
276 )
277 { uint32_t* data_row = ston_ht32_row(ht,key);
278 uint32_t* data_limit = data_row + ston_ht_cols(ht);
279 uint32_t* data_trg = data_row + start_column;
280 if (data_row == NULL)
281 return 0;
282 while (units-- && data_trg < data_limit)
283 *data_trg++ = *data_src++;
284 return (size_t)(data_trg - data_row);
285 }
286
287
288 /* STON Dynamic Hashtable Structure
289 A dynamic form of the generic hashtable implementation above which uses
290 external allocation.
291 */
292 typedef struct ston_dht_header_t
293 { uint16_t val_bytes;
294 uint8_t key_bytes;
295 uint8_t flags;
296 }ston_dht_h;
297
298 typedef struct ston_dht_t
299 { ston_dht_h header;
300 void* (*ht_alloc)(size_t);
301 void (*ht_free)(void*);
302 void (*ht_iter)(void*,void*,void*);
303 void* ht_user_data;
304 void* bucket_root;
305 size_t rowsize, bucketsize;
306 }* ston_dht;
307 /* STON DHT API
308 Primary functions for creating hashtables, retrieving pointers to values,
309 iterating over all keys and values, and destroying hashtables. */
310 STON_FUNC
311 ston_dht ston_dht_new(uint16_t,uint8_t,void*(*)(size_t),void(*)(void*));
312 STON_FUNC
313 void* ston_dht_val(ston_dht,void*);
314 STON_FUNC
315 ston_dht ston_dht_free(ston_dht);
316 STON_FUNC
317 void ston_dht_iterate(ston_dht,void(*)(void*,void*,void*),void*);
318 /* Recursive functions intended to be called by other functions, above */
319 STON_FUNC_STATIC
320 STON_FUNC_NOINLINE
321 void ston_dht_free_bucket(ston_dht,void*);
322 STON_FUNC_STATIC
323 STON_FUNC_NOINLINE
324 void ston_dht_iterate_r(ston_dht,void*);
325 // Compatibility macros - Deprecated
326 #define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_new(4 * _COL, 4, _ALOC, _FRE))
327 #define ston_dht32_row(_HT,_K) ((uint32_t*)((uint8_t*)ston_dht_val(_HT,&(_K)) - 4))
328 #define ston_dht32_insertx(_HT,_K,_VP,_OFFS,_N) \
329 memcpy((uint32_t*)((uint8_t*)ston_dht_val(_HT,&(_K)) + ((_OFFS - 1) * 4)),_VP,_N * 4)
330
331 /* New dynamic hashtable, provided value bytes, key bytes, allocator function,
332 and free function. Value bytes and key bytes are respectively constrained to
333 uint16 and uint8 so they can be aligned to hashtables encoded for
334 streaming */
335 STON_FUNC
336 ston_dht ston_dht_new
337 ( uint16_t val_bytes,
338 uint8_t key_bytes,
339 void* (*ht_alloc)(size_t),
340 void (*ht_free)(void*)
341 )
342 { ston_dht ht = (ston_dht) ht_alloc(sizeof(struct ston_dht_t));
343 if (ht != NULL)
344 { ht->header.val_bytes = val_bytes;
345 ht->header.key_bytes = key_bytes;
346 ht->rowsize = sizeof(void*) + key_bytes + val_bytes;
347 ht->bucketsize = ht->rowsize * 0x100;
348 ht->ht_alloc = ht_alloc;
349 ht->ht_free = ht_free;
350 ht->bucket_root = ht_alloc(ht->bucketsize);
351 if (ht->bucket_root == NULL && ht_free != NULL)
352 ht_free(ht);
353 else
354 memset((ht->bucket_root), 0, ht->bucketsize);
355 }
356 return ht;
357 }
358
359
360 /* Returns a pointer to the value in the hashtable matching the provided key,
361 inserting if not found, or NULL if a memory error occurs */
362 STON_FUNC
363 void* ston_dht_val
364 ( struct ston_dht_t* ht,
365 void* key
366 )
367 { size_t key_bytes = ht->header.key_bytes;
368 uint8_t* key_byte = (uint8_t*)key;
369 uint8_t* bucket = (uint8_t*)ht->bucket_root;
370 uint8_t** bucketp;
371 uint8_t* row,* a,* b;
372 uint8_t a_not_empty;
373 size_t i;
374 next_row:
375 row = bucket + (ht->rowsize * (*key_byte));
376 a = row + sizeof(void*);
377 b = (uint8_t*)key;
378 a_not_empty = 0;
379 for (i = 0; i < key_bytes; i++)
380 { a_not_empty |= a[i];
381 if (a_not_empty && a[i] != b[i])
382 goto next_bucket;
383 }
384 if (!a_not_empty)
385 memcpy(row + sizeof(void*),key,key_bytes);
386 goto done;
387 next_bucket:
388 key_byte++;
389 bucketp = (uint8_t**)row;
390 if (*bucketp == NULL)
391 { if ((*bucketp = ht->ht_alloc(ht->bucketsize)) == NULL)
392 return NULL;
393 else
394 memset(*bucketp,0,ht->bucketsize);
395 }
396 bucket = *bucketp;
397 goto next_row;
398 done:
399 return (void*) row + sizeof(void*) + key_bytes;
400 }
401
402 /* Recursively frees all memory stored in the hashtable, and the hashtable
403 itself */
404 STON_FUNC
405 struct ston_dht_t* ston_dht_free
406 ( struct ston_dht_t* ht )
407 { void (*ht_free)(void*) = ht->ht_free;
408 if (ht_free == NULL)
409 return NULL;
410 ston_dht_free_bucket(ht, ht->bucket_root);
411 ht_free(ht);
412 return ht;
413 }
414
415 /* Recursive free function for nested buckets */
416 STON_FUNC_STATIC
417 STON_FUNC_NOINLINE
418 void ston_dht_free_bucket
419 ( struct ston_dht_t* ht,
420 void* bucket
421 )
422 { void** bucket_cur = (void**)((uint8_t*)bucket);
423 void** bucket_max = (void**)((uint8_t*)bucket_cur + (ht->rowsize * 0x100));
424 while (bucket_cur < bucket_max)
425 { if (*bucket_cur != NULL)
426 ston_dht_free_bucket(ht, *bucket_cur);
427 bucket_cur = (void**)((uint8_t*)bucket_cur + ht->rowsize);
428 }
429 ht->ht_free(bucket);
430 }
431
432 /* Iterate over each key/value pair and execut 'fn' with key, value and
433 user_data as its arguments. user_data may be anything, even NULL, and is
434 expected to be referenced inside the body of 'fn' as the third argument of
435 'fn' */
436 STON_FUNC
437 void ston_dht_iterate
438 ( struct ston_dht_t* ht,
439 void (*fn)(void*,void*,void*),
440 void* user_data
441 )
442 { ht->ht_iter = fn;
443 ht->ht_user_data = user_data;
444 ston_dht_iterate_r(ht,ht->bucket_root);
445 }
446
447 /* Recursively iterate through the given bucket belonging to hashtable ht */
448 STON_FUNC_STATIC
449 STON_FUNC_NOINLINE
450 void ston_dht_iterate_r
451 ( struct ston_dht_t* ht,
452 void* bucket
453 )
454 { uint8_t* row = (uint8_t*)bucket;
455 uint8_t* row_max = (row + (ht->rowsize * 0x100));
456 while (row < row_max)
457 { if (*(void**)row != NULL)
458 ston_dht_iterate_r(ht, *(void**)row);
459 row += sizeof(void*);
460 ht->ht_iter((void*)row, (void*)(row + ht->header.key_bytes),ht->ht_user_data);
461 row += ht->header.key_bytes + ht->header.val_bytes;
462 }
463 }
464
465 #endif //_STON_HT_H_