2 \brief STON Hash Tables
3 \details Aligned general purpose hash functions and memory definitions
4 whose columns are provided, and whose rows, and sizes, are derived.
6 ht_size = header.ht_columns << header.ht_2pow;
7 ht_rows = 0x1 << header.ht_2pow;
9 All generic hashtables in henge must have a power-of-two number of
10 rows. An ht_columns value that is also a power-of-two will result in
11 a power-of-two sized memory imprint for the structure, making it easy
14 Elements in the columns may be of any arbitrary size.
16 typedef uint32_t my_ht_type;
17 ht_bytes = ht_size * sizeof(my_ht_type);
19 implementation covers only 32-bit unit sizes.
23 ----------------------------------------------------------------------------*/
26 /* Define STON_NOSTATIC to expose included function symbols */
28 #define STON_FUNC_STATIC static
30 #define STON_FUNC_STATIC
31 #endif //STON_NOSTATIC
32 /* If GNUC is detected, uses attributes to stop inlining */
34 #define STON_FUNC_NOINLINE __attribute__ ((noinline))
36 #define STON_FUNC_NOINLINE
38 /* Define STON_NOINLINE to prevent inline compiler hints */
40 #define STON_FUNC_INLINE inline
42 #define STON_FUNC_INLINE
43 #endif //STON_NOINLINE
44 /* Define STON_FUNC to override the default STON Function attributes */
46 #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE
54 ston_ht
ston_ht32_fread(FILE*,long,void*(*)(size_t));
55 size_t ston_ht32_fwrite(ston_ht
,FILE*,long);
58 #endif //STON_HT_FREAD
60 #include <string.h> //mem*
61 /* STON Hashtable Structure
62 Hashtables are stored as dynamically sized two dimensional arrays
64 typedef struct ston_ht_header_t
65 { uint16_t ht_columns
;
66 uint8_t ht_2pow
, ht_flags
;
70 uint32_t ston_up2pow(uint32_t);
72 uint8_t ston_trailing0(uint32_t);
74 ston_ht
ston_ht32_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t));
76 uint32_t* ston_ht32_row(ston_ht
,uint32_t);
78 uint32_t ston_ht32_insert(ston_ht
,uint32_t,uint16_t,uint32_t);
80 size_t ston_ht32_insertx(ston_ht
,uint32_t,uint32_t*,size_t,size_t);
82 #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create(_COL,ston_trailing0(ston_up2pow(_N << 1)),_F,_FN))
83 #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL)
84 #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow)
85 #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow)
86 #define ston_ht_cols(_HT) ((_HT)->ht_columns)
87 #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1))
88 #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1))
89 #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT))
90 #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT))
91 #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t))
93 /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
97 { val
= (val
<< 1) - 1;
106 /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */
108 uint8_t ston_trailing0
113 if (v
& 0x0000FFFF) c
-= 16;
114 if (v
& 0x00FF00FF) c
-= 8;
115 if (v
& 0x0F0F0F0F) c
-= 4;
116 if (v
& 0x33333333) c
-= 2;
117 if (v
& 0x55555555) c
-= 1;
121 /* Creates a new hash table, provided a memory allocation function that takes a
122 single size_t bytes, a column count, and a row count which determines the
125 use ston_ht32_new to specify the exact or estimated number of unique keys
126 held in the table. With ston_ht32_new, the provided ht_rows is doubled, and
127 rounded up to the nearest power of two to create a hash table with minimal
131 ston_ht ston_ht32_create
132 ( uint16_t ht_columns
,
135 void* (*alloc_fn
)(size_t)
137 { size_t ht_bytes
= (ht_columns
<< ht_2pow
) * sizeof(uint32_t);
138 ston_ht ht
= (ston_ht
) alloc_fn(sizeof(ston_ht_h
) + ht_bytes
);
140 { ht
->ht_columns
= ht_columns
;
141 ht
->ht_2pow
= ht_2pow
;
142 ht
->ht_flags
= ht_flags
;
143 memset(ht
+ 1, 0, ht_bytes
);
149 /* Reads a 32-bit hash table out of the provided file at the provide fpos, into
150 a buffer allocated by alloc_fn. Memory is allocated to the stack until the
151 entire structure is verified, and all file operations are finished.
152 Returns NULL with properly set errno on failure.
154 ston_ht ston_ht32_fread
157 void* (*alloc_fn
)(size_t)
159 { struct ston_ht_header_t header
;
160 ston_ht stack_ht
, ht
;
162 size_t table_size
, alloc_size
;
164 if ((fpos_start
= ftell(file
)) == -1)
166 if (fread(&header
, sizeof(header
), 1, file
) != 1)
168 table_size
= ston_ht32_size(&header
);
169 alloc_size
= sizeof(header
) + table_size
;
170 stack_ht
= (ston_ht
) alloca(alloc_size
);
171 memcpy(stack_ht
, &header
, sizeof(header
));
172 if (fread(stack_ht
+ sizeof(header
), table_size
, 1, file
) != 1)
174 if (fseek(file
, fpos_start
, SEEK_SET
) != 0)
176 ht
= (ston_ht
) alloc_fn(alloc_size
);
178 memcpy(ht
, stack_ht
, alloc_size
);
181 /* Try to seek the file back to origin without clobbering errno */
183 fseek(file
, fpos_start
, SEEK_SET
);
188 /* Writes a 32-bit hash table from memory into a file at fpos. Returns the
189 number of bytes written to the file, errno is set on error. */
190 size_t ston_ht32_fwrite
191 ( struct ston_ht_header_t
* ht
,
195 { size_t bytes_written
;
197 if ((fpos_start
= ftell(file
)) == NULL
198 || (bytes_written
= fwrite(file
, 1, sizeof(ston_ht_h
), file
)) < sizeof(ston_ht_h
)
199 || (bytes_written
+= fwrite(file
, 1, ston_ht32_bytes(ht
), file
)) < (sizeof(ston_ht_h
) + ston_ht32_bytes(ht
)))
201 return bytes_written
;
205 /* Returns a pointer to the row of data in the hashtable containing the provided
206 key, inserts if not found. Returns NULL on overflow.
209 uint32_t* ston_ht32_row
210 ( struct ston_ht_header_t
* ht
,
214 uint32_t* row_start
= ston_ht32_start(ht
);
215 uint32_t* row_end
= ston_ht32_end(ht
);
216 uint16_t ht_cols
= ston_ht_cols(ht
);
217 size_t row_number
= ston_ht_keyrow(ht
,key
);
219 row
= row_start
+ (row_number
* ht_cols
);
229 if (row
+ ht_cols
< row_end
)
240 /* Inserts a value into a hashtable at the specified column, returning the
243 uint32_t ston_ht32_insert
244 ( struct ston_ht_header_t
* ht
,
249 { uint32_t* value_location
, old_value
;
250 value_location
= ston_ht32_entry(ht
,key
,column
);
251 old_value
= *value_location
;
252 *value_location
= value
;
256 /* Inserts a row of units into a hashtable, starting with the specified column.
257 Returns the number of elements that were written. This function will not
258 overflow internal buffers, but will return a short count (lower than the
259 provided 'units') when truncation of source data occurs. */
263 ( struct ston_ht_header_t
* ht
,
269 { uint32_t* data_row
= ston_ht32_row(ht
,key
);
270 uint32_t* data_limit
= data_row
+ ston_ht_cols(ht
);
271 uint32_t* data_trg
= data_row
+ start_column
;
272 if (data_row
== NULL
)
274 while (units
-- && data_trg
< data_limit
)
275 *data_trg
++ = *data_src
++;
276 return (size_t)(data_trg
- data_row
);
280 #ifndef STON_DHT_SIZE
281 #define STON_DHT_SIZE 4096
284 /* STON Dynamic Hashtable Structure
285 A dynamic form of the generic hashtable implementation above which uses
288 typedef struct ston_dht_header_t
294 typedef struct ston_dht_t
296 void* pages
[sizeof(void*) * 8];
297 void* (*ht_alloc
)(size_t);
298 void (*ht_free
)(void*);
302 ston_dht
ston_dht_create(uint16_t,uint8_t,uint8_t,void*(*)(size_t),void(*)(void*));
304 uint32_t* ston_dht32_row(ston_dht
,uint32_t);
306 uint32_t ston_dht32_insert(ston_dht
,uint32_t,uint16_t,uint32_t);
308 size_t ston_dht32_insertx(ston_dht
,uint32_t,uint32_t*,uint16_t,size_t);
310 ston_dht
ston_dht_free(ston_dht
);
312 #define ston_dht_units(_HT,_DEPTH) ((_HT)->header.columns << _DEPTH)
313 #define ston_dht_bytes(_HT,_DEPTH) (ston_dht_units(_HT,_DEPTH) * (_HT)->header.unit_bytes)
314 #define ston_dht_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,3,sizeof(int),_ALOC,_FRE))
315 #define ston_dht_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N),sizeof(int),_ALOC,_FRE)))
316 #define ston_dht32_entry(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL)
317 #define ston_dht32_new(_COL,_ALOC,_FRE) (ston_dht_create(_COL,0,sizeof(uint32_t),_ALOC,_FRE))
318 #define ston_dht32_sized(_COL,_N,_ALOC,_FRE) (ston_dht_create(_COL,ston_trailing0(ston_up2pow(_N)),sizeof(uint32_t),_ALOC,_FRE))
321 /* Creates a new bucketted hash table, provided a memory allocation function
322 that takes a single size_t bytes, a memory free function, a column count, and
323 a row count which determines the size of the buckets.
326 ston_dht ston_dht_create
330 void* (*ht_alloc
)(size_t),
331 void (*ht_free
)(void*)
333 { ston_dht ht
= (ston_dht
) ht_alloc(sizeof(struct ston_dht_t
));
335 { ht
->header
.columns
= columns
;
336 ht
->header
.start_depth
= start_depth
;
337 ht
->header
.unit_bytes
= unit_bytes
;
338 memset(ht
->pages
, 0, sizeof(void*) * sizeof(void*) * 8);
339 ht
->pages
[start_depth
] = ht_alloc(ston_dht_bytes(ht
, start_depth
));
340 ht
->ht_alloc
= ht_alloc
;
341 ht
->ht_free
= ht_free
;
342 if (ht
->pages
[start_depth
] == NULL
&& ht_free
!= NULL
)
345 memset(ht
->pages
[start_depth
], 0, ston_dht_bytes(ht
, start_depth
));
350 /* Returns a pointer to the row of data in the hashtable containing the provided
351 key, inserts if not found. Returns NULL on overflow.
354 uint32_t* ston_dht32_row
355 ( struct ston_dht_t
* ht
,
358 { uint16_t columns
= ht
->header
.columns
;
359 uint8_t depth
= ht
->header
.start_depth
;
360 uint32_t mask
= ((0x1 << depth
) - 1) >> 1;
365 if (ht
->pages
[depth
] == NULL
)
366 { ht
->pages
[depth
] = ht
->ht_alloc(ston_dht_bytes(ht
, depth
));
367 if (ht
->pages
[depth
] == NULL
)
369 memset(ht
->pages
[depth
], 0, ston_dht_bytes(ht
, depth
));
371 page
= ht
->pages
[depth
];
372 row
= (uint32_t*)page
+ ((key
& mask
) * columns
);
374 if (row_key
== key
|| row_key
== 0)
379 mask
= (mask
<< 1) | 0x1;
383 /* Inserts a value into a hashtable at the specified column, returning the
386 uint32_t ston_dht32_insert
387 ( struct ston_dht_t
* ht
,
392 { uint32_t* value_location
, old_value
;
393 value_location
= ston_dht32_entry(ht
,key
,column
);
394 old_value
= *value_location
;
395 *value_location
= value
;
399 /* Insert multiple values, returning the number of bytes written */
403 ( struct ston_dht_t
* ht
,
406 uint16_t start_column
,
409 { uint32_t* data_row
= ston_dht32_row(ht
,key
);
410 uint32_t* data_limit
= data_row
+ ht
->header
.columns
;
411 uint32_t* data_trg
= data_row
+ start_column
;
412 if (data_row
== NULL
)
414 while (units
-- && data_trg
< data_limit
)
415 *data_trg
++ = *data_src
++;
416 return (size_t)(data_trg
- data_row
);
419 /* Free the dynamic hash table */
421 struct ston_dht_t
* ston_dht_free
422 ( struct ston_dht_t
* ht
)
423 { void (*ht_free
)(void*) = ht
->ht_free
;
424 uint8_t depth
= ht
->header
.start_depth
;
425 void** pages
= ht
->pages
;
427 { while (pages
[depth
] != NULL
)
428 ht_free(pages
[depth
++]);
435 /********************************************************************************
436 *********************************************************************************
437 *********************************************************************************
438 ********************************************************************************/
439 typedef struct ston_dht2_header_t
440 { uint16_t val_bytes
;
445 typedef struct ston_dht2_bucket_t
449 }ston_dht2_bucket_h
,* ston_dht2_bucket
;
451 #define STON_DHT_BUCKETS_SIZE (sizeof(void*) * 8)
452 typedef struct ston_dht2_t
453 { ston_dht2_h header
;
454 ston_dht2_bucket_h buckets
[1 + STON_DHT_BUCKETS_SIZE
];
455 ston_dht2_bucket bsp
;
458 void* (*ht_alloc
)(size_t);
459 void (*ht_free
)(void*);
463 ston_dht2
ston_dht2_create(uint16_t,uint8_t,void*(*)(size_t),void(*)(void*));
465 uint32_t* ston_dht232_row(ston_dht2
,uint32_t);
467 uint32_t ston_dht232_insert(ston_dht2
,uint32_t,uint16_t,uint32_t);
469 size_t ston_dht232_insertx(ston_dht2
,uint32_t,uint32_t*,uint16_t,size_t);
471 ston_dht2
ston_dht2_free(ston_dht2
);
473 #define ston_dht2_bytes(_HT,_DEPTH) ((_HT)->row_bytes << (_DEPTH))
474 #define ston_dht2_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(int),_ALOC,_FRE))
475 #define ston_dht232_new(_COL,_ALOC,_FRE) (ston_dht2_create(_COL,sizeof(uint32_t),_ALOC,_FRE))
476 #define ston_dht232_entry(_HT,_KEY,_COL) (ston_dht232_row(_HT,_KEY) + _COL)
479 /* Creates a new bucketted hash table, provided a memory allocation function
480 that takes a single size_t bytes, a memory free function, a column count, and
481 a row count which determines the size of the buckets.
483 static ston_dht2_bucket_h dummy_bucket
= { (uint8_t)-1, NULL
, (uint32_t)-1 };
486 ston_dht2 ston_dht2_create
487 ( uint16_t val_bytes
,
489 void* (*ht_alloc
)(size_t),
490 void (*ht_free
)(void*)
492 { ston_dht2 ht
= (ston_dht2
) ht_alloc(sizeof(struct ston_dht2_t
));
494 { ht
->header
.val_bytes
= val_bytes
;
495 ht
->header
.key_bytes
= key_bytes
;
496 ht
->row_bytes
= val_bytes
+ key_bytes
;
497 ht
->ht_alloc
= ht_alloc
;
498 ht
->ht_free
= ht_free
;
500 for (i
= 0; i
<= STON_DHT_BUCKETS_SIZE
; i
++)
501 ht
->buckets
[i
] = dummy_bucket
;
502 ht
->bsp
= ht
->buckets
+ STON_DHT_BUCKETS_SIZE
- 1;
503 ht
->bsp
->page
= ht_alloc(ston_dht2_bytes(ht
, 4));
504 if (ht
->bsp
->page
== NULL
&& ht_free
!= NULL
)
507 { memset((ht
->bsp
->page
), 0, ston_dht2_bytes(ht
,4));
517 /* Returns a pointer to the row of data in the hashtable containing the provided
518 key, inserting if not found, or NULL if a memory error occurs */
520 uint32_t* ston_dht232_row
521 ( struct ston_dht2_t
* ht
,
524 { int8_t bucket_no
= (int8_t)ht
->buckets_len
- 1;
525 uint32_t* row
, row_key
, mask
;
527 int8_t zero_bucket
= (int8_t)-1;
528 ston_dht2_bucket bsp
= ht
->bsp
;
529 ston_dht2_bucket_h bucket
;
531 bucket
= bsp
[bucket_no
];
532 /* Find until out of allocated pages, then insert at last empty bucket position */
533 if (bucket
.page
== NULL
)
534 { if (zero_bucket
!= (int8_t)-1)
535 { bucket
= bsp
[zero_bucket
];
536 mask
= (0x1 << bucket
.depth
) - 1;
537 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
540 /* Swap the buckets up a level if the count has exceeded its parent's count */
541 if (bucket
.count
> bsp
[zero_bucket
+ 1].count
)
542 { bsp
[zero_bucket
] = bsp
[zero_bucket
+ 1];
543 bsp
[zero_bucket
+ 1] = bucket
;
546 bsp
[zero_bucket
].count
= bucket
.count
;
549 /* No buckets with a slot, shift the key right by depth, try again add a new bucket */
551 bucket
.depth
= 4 + (ht
->buckets_len
>> 1);
553 bytes
= ston_dht2_bytes(ht
,bucket
.depth
);
554 if ((bucket
.page
= ht
->ht_alloc(bytes
)) == NULL
)
555 { printf("Failed to allocate %lu bytes, bucket %i at with len %i\n",
556 bytes
, bucket_no
, ht
->buckets_len
);
559 memset(bucket
.page
,0,bytes
);
563 mask
= (0x1 << bucket
.depth
) - 1;
564 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
568 /* Compute mask, and use it to find the row in the page */
569 mask
= (0x1 << bucket
.depth
) - 1;
570 row
= (uint32_t*)bucket
.page
+ (key
& mask
);
571 /* Look at the key at row[0], branch */
576 zero_bucket
= bucket_no
;
581 /* Inserts a value into a hashtable at the specified column, returning the
584 uint32_t ston_dht232_insert
585 ( struct ston_dht2_t
* ht
,
590 { uint32_t* value_location
, old_value
;
591 value_location
= ston_dht232_entry(ht
,key
,column
);
592 old_value
= *value_location
;
593 *value_location
= value
;
597 /* Insert multiple values, returning the number of bytes written */
601 ( struct ston_dht2_t
* ht
,
604 uint16_t start_column
,
607 { uint32_t* data_row
= ston_dht232_row(ht
,key
);
608 uint32_t* data_limit
= data_row
+ ht
->row_bytes
;
609 uint32_t* data_trg
= data_row
+ start_column
;
610 if (data_row
== NULL
)
612 while (units
-- && data_trg
< data_limit
)
613 *data_trg
++ = *data_src
++;
614 return (size_t)(data_trg
- data_row
);
617 /* Free the dynamic hash table */
619 struct ston_dht2_t
* ston_dht2_free
620 ( struct ston_dht2_t
* ht
)
621 { void (*ht_free
)(void*) = ht
->ht_free
;
622 uint8_t bucket
= ht
->buckets_len
;
625 ht_free(ht
->buckets
[bucket
].page
);