2 \brief STON Hash Tables
3 \details Aligned general purpose hash functions and memory definitions
4 whose columns are provided, and whose rows, and sizes, are derived.
6 ht_size = header.ht_columns << header.ht_2pow;
7 ht_rows = 0x1 << header.ht_2pow;
9 All generic hashtables in henge must have a power-of-two number of
10 rows. An ht_columns value that is also a power-of-two will result in
11 a power-of-two sized memory imprint for the structure, making it easy
14 Elements in the columns may be of any arbitrary size.
16 typedef uint32_t my_ht_type;
17 ht_bytes = ht_size * sizeof(my_ht_type);
19 implementation covers only 32-bit unit sizes.
23 ----------------------------------------------------------------------------*/
26 /* Define STON_NOSTATIC to expose included function symbols */
28 #define STON_FUNC_STATIC static
30 #define STON_FUNC_STATIC
31 #endif //STON_NOSTATIC
32 /* If GNUC is detected, uses attributes to stop inlining */
34 #define STON_FUNC_NOINLINE __attribute__ ((noinline))
36 #define STON_FUNC_NOINLINE
38 /* Define STON_NOINLINE to prevent inline compiler hints */
40 #define STON_FUNC_INLINE inline
42 #define STON_FUNC_INLINE
43 #endif //STON_NOINLINE
44 /* Define STON_FUNC to override the default STON Function attributes */
46 #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE
54 ston_ht
ston_ht32_fread(FILE*,long,void*(*)(size_t));
55 size_t ston_ht32_fwrite(ston_ht
,FILE*,long);
58 #endif //STON_HT_FREAD
60 #include <string.h> //mem*
61 /* STON Hashtable Structure
62 Hashtables are stored as dynamically sized two dimensional arrays
64 typedef struct ston_ht_header_t
65 { uint16_t ht_columns
;
66 uint8_t ht_2pow
, ht_flags
;
70 uint32_t ston_up2pow(uint32_t);
72 uint8_t ston_trailing0(uint32_t);
74 ston_ht
ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t));
76 uint32_t* ston_ht32_row(ston_ht
,uint32_t);
78 uint32_t ston_ht32_insert(ston_ht
,uint32_t,uint16_t,uint32_t);
80 size_t ston_ht32_insertx(ston_ht
,uint32_t,uint32_t*,size_t,size_t);
82 #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN))
83 #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL)
84 #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow)
85 #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow)
86 #define ston_ht_cols(_HT) ((_HT)->ht_columns)
87 #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1))
88 #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1))
89 #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT))
90 #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT))
91 #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t))
93 /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
97 { val
= (val
<< 1) - 1;
106 /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */
108 uint8_t ston_trailing0
113 if (v
& 0x0000FFFF) c
-= 16;
114 if (v
& 0x00FF00FF) c
-= 8;
115 if (v
& 0x0F0F0F0F) c
-= 4;
116 if (v
& 0x33333333) c
-= 2;
117 if (v
& 0x55555555) c
-= 1;
121 /* Creates a new hash table, provided a memory allocation function that takes a
122 single size_t bytes, a column count, and a row count which determines the
125 use ston_ht32_new to specify the exact or estimated number of unique keys
126 held in the table. With ston_ht32_new, the provided ht_rows is doubled, and
127 rounded up to the nearest power of two to create a hash table with minimal
131 ston_ht ston_ht32_create
132 ( uint16_t ht_columns
,
135 void* (*alloc_fn
)(size_t)
137 { size_t ht_bytes
= (ht_columns
<< ht_2pow
) * sizeof(uint32_t);
138 ston_ht ht
= (ston_ht
) alloc_fn(sizeof(ston_ht_h
) + ht_bytes
);
140 { ht
->ht_columns
= ht_columns
;
141 ht
->ht_2pow
= ht_2pow
;
142 ht
->ht_flags
= ht_flags
;
143 memset(ht
+ 1, 0, ht_bytes
);
149 /* Reads a 32-bit hash table out of the provided file at the provide fpos, into
150 a buffer allocated by alloc_fn. Memory is allocated to the stack until the
151 entire structure is verified, and all file operations are finished.
152 Returns NULL with properly set errno on failure.
154 ston_ht ston_ht32_fread
157 void* (*alloc_fn
)(size_t)
159 { struct ston_ht_header_t header
;
160 ston_ht stack_ht
, ht
;
162 size_t table_size
, alloc_size
;
164 if ((fpos_start
= ftell(file
)) == -1)
166 if (fread(&header
, sizeof(header
), 1, file
) != 1)
168 table_size
= ston_ht32_size(&header
);
169 alloc_size
= sizeof(header
) + table_size
;
170 stack_ht
= (ston_ht
) alloca(alloc_size
);
171 memcpy(stack_ht
, &header
, sizeof(header
));
172 if (fread(stack_ht
+ sizeof(header
), table_size
, 1, file
) != 1)
174 if (fseek(file
, fpos_start
, SEEK_SET
) != 0)
176 ht
= (ston_ht
) alloc_fn(alloc_size
);
178 memcpy(ht
, stack_ht
, alloc_size
);
181 /* Try to seek the file back to origin without clobbering errno */
183 fseek(file
, fpos_start
, SEEK_SET
);
188 /* Writes a 32-bit hash table from memory into a file at fpos. Returns the
189 number of bytes written to the file, errno is set on error. */
190 size_t ston_ht32_fwrite
191 ( struct ston_ht_header_t
* ht
,
195 { size_t bytes_written
;
197 if ((fpos_start
= ftell(file
)) == NULL
198 || fseek(file
, fpos
, SEEK_SET
) == 0
199 || (bytes_written
= fwrite(file
, 1, sizeof(ston_ht_h
), file
)) < sizeof(ston_ht_h
)
200 || (bytes_written
+= fwrite(file
, 1, ston_ht32_bytes(ht
), file
)) < (sizeof(ston_ht_h
) + ston_ht32_bytes(ht
))
201 || fseek(file
, fpos_start
, SEEK_SET
) == 0)
203 return bytes_written
;
207 /* Returns a pointer to the row of data in the hashtable containing the provided
208 key, inserts if not found. Returns NULL on overflow.
211 uint32_t* ston_ht32_row
212 ( struct ston_ht_header_t
* ht
,
216 uint32_t* row_start
= ston_ht32_start(ht
);
217 uint32_t* row_end
= ston_ht32_end(ht
);
218 uint16_t ht_cols
= ston_ht_cols(ht
);
219 size_t row_number
= ston_ht_keyrow(ht
,key
);
221 row
= row_start
+ (row_number
* ht_cols
);
231 if (row
+ ht_cols
< row_end
)
242 /* Inserts a value into a hashtable at the specified column, returning the
245 uint32_t ston_ht32_insert
246 ( struct ston_ht_header_t
* ht
,
251 { uint32_t* value_location
, old_value
;
252 value_location
= ston_ht32_entry(ht
,key
,column
);
253 old_value
= *value_location
;
254 *value_location
= value
;
258 /* Inserts a row of units into a hashtable, starting with the specified column.
259 Returns the number of elements that were written. This function will not
260 overflow internal buffers, but will return a short count (lower than the
261 provided 'units') when truncation of source data occurs. */
265 ( struct ston_ht_header_t
* ht
,
271 { uint32_t* data_row
= ston_ht32_row(ht
,key
);
272 uint32_t* data_limit
= data_row
+ ston_ht_cols(ht
);
273 uint32_t* data_trg
= data_row
+ start_column
;
274 if (data_row
== NULL
)
276 while (units
-- && data_trg
< data_limit
)
277 *data_trg
++ = *data_src
++;
278 return (size_t)(data_trg
- data_row
);
282 #ifndef STON_DHT_SIZE
283 #define STON_DHT_SIZE 4096
286 /* STON Dynamic Hashtable Structure
287 A dynamic form of the generic hashtable implementation above which uses
290 typedef struct ston_dht_header_t
296 typedef struct ston_dht_t
298 void* pages
[sizeof(void*) * 8];
299 void* (*ht_alloc
)(size_t);
300 void (*ht_free
)(void*);
304 ston_dht
ston_dht_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t),void(*)(void*));
306 uint32_t* ston_dht32_row(ston_dht
,uint32_t);
308 uint32_t ston_dht32_insert(ston_dht
,uint32_t,uint16_t,uint32_t);
310 size_t ston_dht32_insertx(ston_dht
,uint32_t,uint32_t*,uint16_t,size_t);
312 ston_dht
ston_dht_free(ston_dht
);
314 #define ston_dht_units(_HT,_DEPTH) ((_HT)->header.columns << _DEPTH)
315 #define ston_dht_bytes(_HT,_DEPTH) (ston_dht_units(_HT,_DEPTH) * (_HT)->header.unit_bytes)
316 #define ston_dht_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,3,sizeof(int),_ALOC,_FRE))
317 #define ston_dht_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N),sizeof(int),_ALOC,_FRE)))
318 #define ston_dht32_entry(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL)
319 #define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,0,sizeof(uint32_t),_ALOC,_FRE))
320 #define ston_dht32_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N)),sizeof(uint32_t),_ALOC,_FRE))
323 /* Creates a new bucketted hash table, provided a memory allocation function
324 that takes a single size_t bytes, a memory free function, a column count, and
325 a row count which determines the size of the buckets.
328 ston_dht ston_dht_create
332 void* (*ht_alloc
)(size_t),
333 void (*ht_free
)(void*)
335 { ston_dht ht
= (ston_dht
) ht_alloc(sizeof(struct ston_dht_t
));
337 { ht
->header
.columns
= columns
;
338 ht
->header
.start_depth
= start_depth
;
339 ht
->header
.unit_bytes
= unit_bytes
;
340 memset(ht
->pages
, 0, sizeof(void*) * sizeof(void*) * 8);
341 ht
->pages
[start_depth
] = ht_alloc(ston_dht_bytes(ht
, start_depth
));
342 ht
->ht_alloc
= ht_alloc
;
343 ht
->ht_free
= ht_free
;
344 if (ht
->pages
[start_depth
] == NULL
&& ht_free
!= NULL
)
347 memset(ht
->pages
[start_depth
], 0, ston_dht_bytes(ht
, start_depth
));
352 /* Returns a pointer to the row of data in the hashtable containing the provided
353 key, inserts if not found. Returns NULL on overflow.
356 uint32_t* ston_dht32_row
357 ( struct ston_dht_t
* ht
,
360 { uint16_t columns
= ht
->header
.columns
;
361 uint8_t depth
= ht
->header
.start_depth
;
362 uint32_t mask
= ((0x1 << depth
) - 1) >> 1;
367 if (ht
->pages
[depth
] == NULL
)
368 { ht
->pages
[depth
] = ht
->ht_alloc(ston_dht_bytes(ht
, depth
));
369 if (ht
->pages
[depth
] == NULL
)
371 memset(ht
->pages
[depth
], 0, ston_dht_bytes(ht
, depth
));
373 page
= ht
->pages
[depth
];
374 row
= (uint32_t*)page
+ ((key
& mask
) * columns
);
376 if (row_key
== key
|| row_key
== 0)
381 mask
= (mask
<< 1) | 0x1;
385 /* Inserts a value into a hashtable at the specified column, returning the
388 uint32_t ston_dht32_insert
389 ( struct ston_dht_t
* ht
,
394 { uint32_t* value_location
, old_value
;
395 value_location
= ston_dht32_entry(ht
,key
,column
);
396 old_value
= *value_location
;
397 *value_location
= value
;
401 /* Insert multiple values, returning the number of bytes written */
405 ( struct ston_dht_t
* ht
,
408 uint16_t start_column
,
411 { uint32_t* data_row
= ston_dht32_row(ht
,key
);
412 uint32_t* data_limit
= data_row
+ ht
->header
.columns
;
413 uint32_t* data_trg
= data_row
+ start_column
;
414 if (data_row
== NULL
)
416 while (units
-- && data_trg
< data_limit
)
417 *data_trg
++ = *data_src
++;
418 return (size_t)(data_trg
- data_row
);
421 /* Free the dynamic hash table */
423 struct ston_dht_t
* ston_dht_free
424 ( struct ston_dht_t
* ht
)
425 { void (*ht_free
)(void*) = ht
->ht_free
;
426 uint8_t depth
= ht
->header
.start_depth
;
427 void** pages
= ht
->pages
;
429 { while (pages
[depth
] != NULL
)
430 ht_free(pages
[depth
++]);
437 /********************************************************************************
438 *********************************************************************************
439 *********************************************************************************
440 ********************************************************************************/
441 typedef struct ston_dht2_header_t
442 { uint16_t val_bytes
;
447 typedef struct ston_dht2_bucket_t
451 }ston_dht2_bucket_h
,* ston_dht2_bucket
;
453 #define STON_DHT_BUCKETS_SIZE (sizeof(void*) * 8)
454 typedef struct ston_dht2_t
455 { ston_dht2_h header
;
456 ston_dht2_bucket_h buckets
[1 + STON_DHT_BUCKETS_SIZE
];
457 ston_dht2_bucket bsp
;
460 void* (*ht_alloc
)(size_t);
461 void (*ht_free
)(void*);
465 ston_dht2
ston_dht2_create(uint16_t,uint8_t,void*(*)(size_t),void(*)(void*));
467 uint32_t* ston_dht232_row(ston_dht2
,uint32_t);
469 uint32_t ston_dht232_insert(ston_dht2
,uint32_t,uint16_t,uint32_t);
471 size_t ston_dht232_insertx(ston_dht2
,uint32_t,uint32_t*,uint16_t,size_t);
473 ston_dht2
ston_dht2_free(ston_dht2
);
475 #define ston_dht2_bytes(_HT,_DEPTH) ((_HT)->row_bytes << (_DEPTH))
476 #define ston_dht2_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(int),_ALOC,_FRE))
477 #define ston_dht232_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(uint32_t),_ALOC,_FRE))
478 #define ston_dht232_entry(_HT,_KEY,_COL) (ston_dht232_row(_HT,_KEY) + _COL)
481 /* Creates a new bucketted hash table, provided a memory allocation function
482 that takes a single size_t bytes, a memory free function, a column count, and
483 a row count which determines the size of the buckets.
485 static ston_dht2_bucket_h dummy_bucket
= { (uint8_t)-1, NULL
, (uint32_t)-1 };
488 ston_dht2 ston_dht2_create
489 ( uint16_t val_bytes
,
491 void* (*ht_alloc
)(size_t),
492 void (*ht_free
)(void*)
494 { ston_dht2 ht
= (ston_dht2
) ht_alloc(sizeof(struct ston_dht2_t
));
496 { ht
->header
.val_bytes
= val_bytes
;
497 ht
->header
.key_bytes
= key_bytes
;
498 ht
->row_bytes
= val_bytes
+ key_bytes
;
499 ht
->ht_alloc
= ht_alloc
;
500 ht
->ht_free
= ht_free
;
502 for (i
= 0; i
<= STON_DHT_BUCKETS_SIZE
; i
++)
503 ht
->buckets
[i
] = dummy_bucket
;
504 ht
->bsp
= ht
->buckets
+ STON_DHT_BUCKETS_SIZE
- 1;
505 ht
->bsp
->page
= ht_alloc(ston_dht2_bytes(ht
, 4));
506 if (ht
->bsp
->page
== NULL
&& ht_free
!= NULL
)
509 { memset((ht
->bsp
->page
), 0, ston_dht2_bytes(ht
,4));
519 /* Returns a pointer to the row of data in the hashtable containing the provided
520 key, inserting if not found, or NULL if a memory error occurs */
522 uint32_t* ston_dht232_row
523 ( struct ston_dht2_t
* ht
,
526 { int8_t bucket_no
= (int8_t)ht
->buckets_len
- 1;
527 uint32_t* row
, row_key
, mask
;
529 int8_t zero_bucket
= (int8_t)-1;
530 ston_dht2_bucket bsp
= ht
->bsp
;
531 ston_dht2_bucket_h bucket
;
533 bucket
= bsp
[bucket_no
];
534 /* Find until out of allocated pages, then insert at last empty bucket position */
535 if (bucket
.page
== NULL
)
536 { if (zero_bucket
!= (int8_t)-1)
537 { bucket
= bsp
[zero_bucket
];
538 mask
= (0x1 << bucket
.depth
) - 1;
539 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
542 /* Swap the buckets up a level if the count has exceeded its parent's count */
543 if (bucket
.count
> bsp
[zero_bucket
+ 1].count
)
544 { bsp
[zero_bucket
] = bsp
[zero_bucket
+ 1];
545 bsp
[zero_bucket
+ 1] = bucket
;
548 bsp
[zero_bucket
].count
= bucket
.count
;
551 /* No buckets with a slot, shift the key right by depth, try again add a new bucket */
553 bucket
.depth
= 4 + (ht
->buckets_len
>> 1);
555 bytes
= ston_dht2_bytes(ht
,bucket
.depth
);
556 if ((bucket
.page
= ht
->ht_alloc(bytes
)) == NULL
)
557 { printf("Failed to allocate %lu bytes, bucket %i at with len %i\n",
558 bytes
, bucket_no
, ht
->buckets_len
);
561 memset(bucket
.page
,0,bytes
);
565 mask
= (0x1 << bucket
.depth
) - 1;
566 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
570 /* Compute mask, and use it to find the row in the page */
571 mask
= (0x1 << bucket
.depth
) - 1;
572 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
573 /* Look at the key at row[0], branch */
578 zero_bucket
= bucket_no
;
583 /* Inserts a value into a hashtable at the specified column, returning the
586 uint32_t ston_dht232_insert
587 ( struct ston_dht2_t
* ht
,
592 { uint32_t* value_location
, old_value
;
593 value_location
= ston_dht232_entry(ht
,key
,column
);
594 old_value
= *value_location
;
595 *value_location
= value
;
599 /* Insert multiple values, returning the number of bytes written */
603 ( struct ston_dht2_t
* ht
,
606 uint16_t start_column
,
609 { uint32_t* data_row
= ston_dht232_row(ht
,key
);
610 uint32_t* data_limit
= data_row
+ ht
->row_bytes
;
611 uint32_t* data_trg
= data_row
+ start_column
;
612 if (data_row
== NULL
)
614 while (units
-- && data_trg
< data_limit
)
615 *data_trg
++ = *data_src
++;
616 return (size_t)(data_trg
- data_row
);
619 /* Free the dynamic hash table */
621 struct ston_dht2_t
* ston_dht2_free
622 ( struct ston_dht2_t
* ht
)
623 { void (*ht_free
)(void*) = ht
->ht_free
;
624 uint8_t bucket
= ht
->buckets_len
;
627 ht_free(ht
->buckets
[bucket
].page
);