dynamic hashtables optimized
[henge/apc.git] / ston / ston_ht.h
1 /*!@file
2 \brief STON Hash Tables
3 \details Aligned general purpose hash functions and memory definitions
4 whose columns are provided, and whose rows, and sizes, are derived.
5
6 ht_size = header.ht_columns << header.ht_2pow;
7 ht_rows = 0x1 << header.ht_2pow;
8
9 All generic hashtables in henge must have a power-of-two number of
10 rows. An ht_columns value that is also a power-of-two will result in
11 a power-of-two sized memory imprint for the structure, making it easy
12 to page align.
13
14 Elements in the columns may be of any arbitrary size.
15
16 typedef uint32_t my_ht_type;
17 ht_bytes = ht_size * sizeof(my_ht_type);
18
19 implementation covers only 32-bit unit sizes.
20
21 \author Ken Grimes
22 \date Feb 2017
23 ----------------------------------------------------------------------------*/
24 #ifndef _STON_HT_T_
25 #define _STON_HT_T_
26 /* Define STON_NOSTATIC to expose included function symbols */
27 #ifndef STON_NOSTATIC
28 #define STON_FUNC_STATIC static
29 #else
30 #define STON_FUNC_STATIC
31 #endif //STON_NOSTATIC
32 /* If GNUC is detected, uses attributes to stop inlining */
33 #ifdef __GNUC__
34 #define STON_FUNC_NOINLINE __attribute__ ((noinline))
35 #else
36 #define STON_FUNC_NOINLINE
37 #endif //__GNUC__
38 /* Define STON_NOINLINE to prevent inline compiler hints */
39 #ifndef STON_NOINLINE
40 #define STON_FUNC_INLINE inline
41 #else
42 #define STON_FUNC_INLINE
43 #endif //STON_NOINLINE
44 /* Define STON_FUNC to override the default STON Function attributes */
45 #ifndef STON_FUNC
46 #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE
47 #endif //STON_FUNC
48 #ifdef STON_HT_FREAD
49 #include <stdio.h>
50 #include <errno.h>
51 #include <alloca.h>
52 STON_FUNC_STATIC
53 STON_FUNC_NOINLINE
54 ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t));
55 #else
56 #include <stddef.h>
57 #endif //STON_HT_FREAD
58 #include <stdint.h>
59 #include <string.h> //mem*
60 /* STON Hashtable Structure
61 Hashtables are stored as dynamically sized two dimensional arrays
62 */
63 typedef struct ston_ht_header_t
64 { uint16_t ht_columns;
65 uint8_t ht_2pow, ht_flags;
66 }ston_ht_h,* ston_ht;
67
68 STON_FUNC
69 uint32_t ston_up2pow(uint32_t);
70 STON_FUNC
71 uint8_t ston_trailing0(uint32_t);
72 STON_FUNC
73 ston_ht ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t));
74 STON_FUNC
75 uint32_t* ston_ht32_row(ston_ht,uint32_t);
76 STON_FUNC
77 uint32_t ston_ht32_insert(ston_ht,uint32_t,uint16_t,uint32_t);
78 STON_FUNC
79 size_t ston_ht32_insertx(ston_ht,uint32_t,uint32_t*,size_t,size_t);
80
81 #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN))
82 #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL)
83 #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow)
84 #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow)
85 #define ston_ht_cols(_HT) ((_HT)->ht_columns)
86 #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1))
87 #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1))
88 #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT))
89 #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT))
90 #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t))
91
92 /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
93 STON_FUNC
94 uint32_t ston_up2pow
95 ( uint32_t val )
96 { val = (val << 1) - 1;
97 val |= val >> 1;
98 val |= val >> 2;
99 val |= val >> 4;
100 val |= val >> 8;
101 val |= val >> 16;
102 return ++val;
103 }
104
105 /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */
106 STON_FUNC
107 uint8_t ston_trailing0
108 ( uint32_t v )
109 { uint8_t c = 32;
110 v &= -(int32_t)v;
111 if (v) c--;
112 if (v & 0x0000FFFF) c -= 16;
113 if (v & 0x00FF00FF) c -= 8;
114 if (v & 0x0F0F0F0F) c -= 4;
115 if (v & 0x33333333) c -= 2;
116 if (v & 0x55555555) c -= 1;
117 return c;
118 }
119
120 /* Creates a new hash table, provided a memory allocation function that takes a
121 single size_t bytes, a column count, and a row count which determines the
122 size of the table.
123
124 use ston_ht32_new to specify the exact or estimated number of unique keys
125 held in the table. With ston_ht32_new, the provided ht_rows is doubled, and
126 rounded up to the nearest power of two to create a hash table with minimal
127 collisions.
128 */
129 STON_FUNC
130 ston_ht ston_ht32_create
131 ( uint16_t ht_columns,
132 uint8_t ht_2pow,
133 uint8_t ht_flags,
134 void* (*alloc_fn)(size_t)
135 )
136 { size_t ht_bytes = (ht_columns << ht_2pow) * sizeof(uint32_t);
137 ston_ht ht = (ston_ht) alloc_fn(sizeof(ston_ht_h) + ht_bytes);
138 if (ht != NULL)
139 { ht->ht_columns = ht_columns;
140 ht->ht_2pow = ht_2pow;
141 ht->ht_flags = ht_flags;
142 memset(ht + 1, 0, ht_bytes);
143 }
144 return ht;
145 }
146
147 #ifdef STON_HT_FREAD
148 /* Reads a 32-bit hash table out of the provided file at the provide fpos, into
149 a buffer allocated by alloc_fn. Memory is allocated to the stack until the
150 entire structure is verified, and all file operations are finished.
151 Returns NULL with properly set errno on failure.
152 */
153 ston_ht ston_ht32_fread
154 ( FILE* file,
155 long fpos,
156 void* (*alloc_fn)(size_t)
157 )
158 { struct ston_ht_header_t header;
159 ston_ht stack_ht, ht;
160 long fpos_start;
161 size_t table_size, alloc_size;
162 int errno_local;
163 if ((fpos_start = ftell(file)) == -1)
164 return NULL;
165 if (fread(&header, sizeof(header), 1, file) != 1)
166 goto fail_seekback;
167 table_size = ston_ht32_size(&header);
168 alloc_size = sizeof(header) + table_size;
169 stack_ht = (ston_ht) alloca(alloc_size);
170 memcpy(stack_ht, &header, sizeof(header));
171 if (fread(stack_ht + sizeof(header), table_size, 1, file) != 1)
172 goto fail_seekback;
173 if (fseek(file, fpos_start, SEEK_SET) != 0)
174 return NULL;
175 ht = (ston_ht) alloc_fn(alloc_size);
176 if (ht != NULL)
177 memcpy(ht, stack_ht, alloc_size);
178 return ht;
179 fail_seekback:
180 /* Try to seek the file back to origin without clobbering errno */
181 errno_local = errno;
182 fseek(file, fpos_start, SEEK_SET);
183 errno = errno_local;
184 return NULL;
185 }
186 #endif
187
188 /* Returns a pointer to the row of data in the hashtable containing the provided
189 key, inserts if not found. Returns NULL on overflow.
190 */
191 STON_FUNC
192 uint32_t* ston_ht32_row
193 ( struct ston_ht_header_t* ht,
194 uint32_t key
195 )
196 { uint32_t* row;
197 uint32_t* row_start = ston_ht32_start(ht);
198 uint32_t* row_end = ston_ht32_end(ht);
199 uint16_t ht_cols = ston_ht_cols(ht);
200 size_t row_number = ston_ht_keyrow(ht,key);
201 uint8_t looped = 0;
202 row = row_start + (row_number * ht_cols);
203 next_row:
204 if (row[0] != 0)
205 goto populated;
206 write_position:
207 row[0] = key;
208 return row;
209 populated:
210 if (row[0] == key)
211 goto write_position;
212 if (row + ht_cols < row_end)
213 row += ht_cols;
214 else if (looped)
215 return NULL;
216 else
217 { looped++;
218 row = row_start;
219 }
220 goto next_row;
221 }
222
223 /* Inserts a value into a hashtable at the specified column, returning the
224 previous value */
225 STON_FUNC
226 uint32_t ston_ht32_insert
227 ( struct ston_ht_header_t* ht,
228 uint32_t key,
229 uint16_t column,
230 uint32_t value
231 )
232 { uint32_t* value_location, old_value;
233 value_location = ston_ht32_entry(ht,key,column);
234 old_value = *value_location;
235 *value_location = value;
236 return old_value;
237 }
238
239 /* Inserts a row of units into a hashtable, starting with the specified column.
240 Returns the number of elements that were written. This function will not
241 overflow internal buffers, but will return a short count (lower than the
242 provided 'units') when truncation of source data occurs. */
243 STON_FUNC
244 size_t
245 ston_ht32_insertx
246 ( struct ston_ht_header_t* ht,
247 uint32_t key,
248 uint32_t* data_src,
249 size_t start_column,
250 size_t units
251 )
252 { uint32_t* data_row = ston_ht32_row(ht,key);
253 uint32_t* data_limit = data_row + ston_ht_cols(ht);
254 uint32_t* data_trg = data_row + start_column;
255 if (data_row == NULL)
256 return 0;
257 while (units-- && data_trg < data_limit)
258 *data_trg++ = *data_src++;
259 return (size_t)(data_trg - data_row);
260 }
261
262
263 #ifndef STON_DHT_SIZE
264 #define STON_DHT_SIZE 4096
265 #endif
266
267 /* STON Dynamic Hashtable Structure
268 A dynamic form of the generic hashtable implementation above which uses
269 external allocation.
270 */
271 typedef struct ston_dht_header_t
272 { uint16_t columns;
273 uint8_t unit_bytes;
274 uint8_t start_depth;
275 }ston_dht_h;
276
277 typedef struct ston_dht_t
278 { ston_dht_h header;
279 void* pages[sizeof(void*) * 8];
280 void* (*ht_alloc)(size_t);
281 void (*ht_free)(void*);
282 }* ston_dht;
283
284 STON_FUNC
285 ston_dht ston_dht_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t),void(*)(void*));
286 STON_FUNC
287 uint32_t* ston_dht32_row(ston_dht,uint32_t);
288 STON_FUNC
289 uint32_t ston_dht32_insert(ston_dht,uint32_t,uint16_t,uint32_t);
290 STON_FUNC
291 size_t ston_dht32_insertx(ston_dht,uint32_t,uint32_t*,uint16_t,size_t);
292 STON_FUNC
293 ston_dht ston_dht_free(ston_dht);
294
295 #define ston_dht_units(_HT,_DEPTH) ((_HT)->header.columns << _DEPTH)
296 #define ston_dht_bytes(_HT,_DEPTH) (ston_dht_units(_HT,_DEPTH) * (_HT)->header.unit_bytes)
297 #define ston_dht_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,3,sizeof(int),_ALOC,_FRE))
298 #define ston_dht_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N),sizeof(int),_ALOC,_FRE)))
299 #define ston_dht32_entry(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL)
300 #define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,0,sizeof(uint32_t),_ALOC,_FRE))
301 #define ston_dht32_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N)),sizeof(uint32_t),_ALOC,_FRE))
302
303
304 /* Creates a new bucketted hash table, provided a memory allocation function
305 that takes a single size_t bytes, a memory free function, a column count, and
306 a row count which determines the size of the buckets.
307 */
308 STON_FUNC
309 ston_dht ston_dht_create
310 ( uint16_t columns,
311 uint8_t start_depth,
312 uint8_t unit_bytes,
313 void* (*ht_alloc)(size_t),
314 void (*ht_free)(void*)
315 )
316 { ston_dht ht = (ston_dht) ht_alloc(sizeof(struct ston_dht_t));
317 if (ht != NULL)
318 { ht->header.columns = columns;
319 ht->header.start_depth = start_depth;
320 ht->header.unit_bytes = unit_bytes;
321 memset(ht->pages, 0, sizeof(void*) * sizeof(void*) * 8);
322 ht->pages[start_depth] = ht_alloc(ston_dht_bytes(ht, start_depth));
323 ht->ht_alloc = ht_alloc;
324 ht->ht_free = ht_free;
325 if (ht->pages[start_depth] == NULL && ht_free != NULL)
326 ht_free(ht);
327 else
328 memset(ht->pages[start_depth], 0, ston_dht_bytes(ht, start_depth));
329 }
330 return ht;
331 }
332
333 /* Returns a pointer to the row of data in the hashtable containing the provided
334 key, inserts if not found. Returns NULL on overflow.
335 */
336 STON_FUNC
337 uint32_t* ston_dht32_row
338 ( struct ston_dht_t* ht,
339 uint32_t key
340 )
341 { uint16_t columns = ht->header.columns;
342 uint8_t depth = ht->header.start_depth;
343 uint32_t mask = ((0x1 << depth) - 1) >> 1;
344 void* page;
345 uint32_t* row;
346 uint32_t row_key;
347 next_page:
348 if (ht->pages[depth] == NULL)
349 { ht->pages[depth] = ht->ht_alloc(ston_dht_bytes(ht, depth));
350 if (ht->pages[depth] == NULL)
351 return NULL;
352 memset(ht->pages[depth], 0, ston_dht_bytes(ht, depth));
353 }
354 page = ht->pages[depth];
355 row = (uint32_t*)page + ((key & mask) * columns);
356 row_key = *row;
357 if (row_key == key || row_key == 0)
358 { row[0] = key;
359 return row;
360 }
361 depth++;
362 mask = (mask << 1) | 0x1;
363 goto next_page;
364 }
365
366 /* Inserts a value into a hashtable at the specified column, returning the
367 previous value */
368 STON_FUNC
369 uint32_t ston_dht32_insert
370 ( struct ston_dht_t* ht,
371 uint32_t key,
372 uint16_t column,
373 uint32_t value
374 )
375 { uint32_t* value_location, old_value;
376 value_location = ston_dht32_entry(ht,key,column);
377 old_value = *value_location;
378 *value_location = value;
379 return old_value;
380 }
381
382 /* Insert multiple values, returning the number of bytes written */
383 STON_FUNC
384 size_t
385 ston_dht32_insertx
386 ( struct ston_dht_t* ht,
387 uint32_t key,
388 uint32_t* data_src,
389 uint16_t start_column,
390 size_t units
391 )
392 { uint32_t* data_row = ston_dht32_row(ht,key);
393 uint32_t* data_limit = data_row + ht->header.columns;
394 uint32_t* data_trg = data_row + start_column;
395 if (data_row == NULL)
396 return 0;
397 while (units-- && data_trg < data_limit)
398 *data_trg++ = *data_src++;
399 return (size_t)(data_trg - data_row);
400 }
401
402 /* Free the dynamic hash table */
403 STON_FUNC
404 struct ston_dht_t* ston_dht_free
405 ( struct ston_dht_t* ht )
406 { void (*ht_free)(void*) = ht->ht_free;
407 uint8_t depth = ht->header.start_depth;
408 void** pages = ht->pages;
409 if (ht_free != NULL)
410 { while (pages[depth] != NULL)
411 ht_free(pages[depth++]);
412 ht_free(ht);
413 return NULL;
414 }
415 return ht;
416 }
417
418
419 #endif //_STON_HT_H_