dht implementation first round tests pass
[henge/apc.git] / ston / ston_ht.h
1 /*!@file
2 \brief STON Hash Tables
3 \details Aligned general purpose hash functions and memory definitions
4 whose columns are provided, and whose rows, and sizes, are derived.
5
6 ht_size = header.ht_columns << header.ht_2pow;
7 ht_rows = 0x1 << header.ht_2pow;
8
9 All generic hashtables in henge must have a power-of-two number of
10 rows. An ht_columns value that is also a power-of-two will result in
11 a power-of-two sized memory imprint for the structure, making it easy
12 to page align.
13
14 Elements in the columns may be of any arbitrary size.
15
16 typedef uint32_t my_ht_type;
17 ht_bytes = ht_size * sizeof(my_ht_type);
18
19 implementation covers only 32-bit unit sizes.
20
21 \author Ken Grimes
22 \date Feb 2017
23 ----------------------------------------------------------------------------*/
24 #ifndef _STON_HT_T_
25 #define _STON_HT_T_
26 /* Define STON_NOSTATIC to expose included function symbols */
27 #ifndef STON_NOSTATIC
28 #define STON_FUNC_STATIC static
29 #else
30 #define STON_FUNC_STATIC
31 #endif //STON_NOSTATIC
32 /* If GNUC is detected, uses attributes to stop inlining */
33 #ifdef __GNUC__
34 #define STON_FUNC_NOINLINE __attribute__ ((noinline))
35 #else
36 #define STON_FUNC_NOINLINE
37 #endif //__GNUC__
38 /* Define STON_NOINLINE to prevent inline compiler hints */
39 #ifndef STON_NOINLINE
40 #define STON_FUNC_INLINE inline
41 #else
42 #define STON_FUNC_INLINE
43 #endif //STON_NOINLINE
44 /* Define STON_FUNC to override the default STON Function attributes */
45 #ifndef STON_FUNC
46 #define STON_FUNC STON_FUNC_STATIC STON_FUNC_INLINE
47 #endif //STON_FUNC
48 #ifdef STON_HT_FREAD
49 #include <stdio.h>
50 #include <errno.h>
51 #include <alloca.h>
52 STON_FUNC_STATIC
53 STON_FUNC_NOINLINE
54 ston_ht ston_ht32_fread(FILE*,long,void*(*)(size_t));
55 #else
56 #include <stddef.h>
57 #endif //STON_HT_FREAD
58 #include <stdint.h>
59 #include <string.h> //mem*
60 /* STON Hashtable Structure
61 Hashtables are stored as dynamically sized two dimensional arrays
62 */
63 typedef struct ston_ht_header_t
64 { uint16_t ht_columns;
65 uint8_t ht_2pow, ht_flags;
66 }ston_ht_h,* ston_ht;
67 #define STON_HT_HEADERSIZE (sizeof(struct ston_ht_header_t))
68
69 STON_FUNC
70 uint32_t ston_up2pow(uint32_t);
71 STON_FUNC
72 uint8_t ston_trailing0(uint32_t);
73 STON_FUNC
74 ston_ht ston_ht32_create(struct ston_ht_header_t,void*(*)(size_t));
75 STON_FUNC
76 uint32_t* ston_ht32_row(ston_ht,uint32_t);
77 STON_FUNC
78 uint32_t ston_ht32_insert(ston_ht,uint32_t,uint16_t,uint32_t);
79 STON_FUNC
80 size_t ston_ht32_insertx(ston_ht,uint32_t,uint32_t*,size_t,size_t);
81
82 #define ston_ht32_new(_COL,_N,_F,_FN) (ston_ht32_create((ston_ht_h){_COL,ston_trailing0(ston_up2pow(_N << 1)),_F},_FN))
83 #define ston_ht32_entry(_HT,_KEY,_COL) (ston_ht32_row(_HT,_KEY) + _COL)
84 #define ston_ht_size(_HT) ((_HT)->ht_columns << (_HT)->ht_2pow)
85 #define ston_ht_rows(_HT) (0x1 << (_HT)->ht_2pow)
86 #define ston_ht_cols(_HT) ((_HT)->ht_columns)
87 #define ston_ht_start(_HT) ((uint8_t*)((_HT) + 1))
88 #define ston_ht_keyrow(_HT,_KEY) ((_KEY) & (ston_ht_rows(ht) - 1))
89 #define ston_ht32_start(_HT) ((uint32_t*)ston_ht_start(_HT))
90 #define ston_ht32_end(_HT) (ston_ht32_start(_HT) + ston_ht_size(_HT))
91 #define ston_ht32_size(_HT) (ston_ht_size(_HT) * sizeof(uint32_t))
92
93 /** @see http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
94 STON_FUNC
95 uint32_t ston_up2pow
96 ( uint32_t val )
97 { val = (val << 1) - 1;
98 val |= val >> 1;
99 val |= val >> 2;
100 val |= val >> 4;
101 val |= val >> 8;
102 val |= val >> 16;
103 return ++val;
104 }
105
106 /** @see https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel */
107 STON_FUNC
108 uint8_t ston_trailing0
109 ( uint32_t v )
110 { uint8_t c = 32;
111 v &= -(int32_t)v;
112 if (v) c--;
113 if (v & 0x0000FFFF) c -= 16;
114 if (v & 0x00FF00FF) c -= 8;
115 if (v & 0x0F0F0F0F) c -= 4;
116 if (v & 0x33333333) c -= 2;
117 if (v & 0x55555555) c -= 1;
118 return c;
119 }
120
121 /* Creates a new hash table, provided a memory allocation function that takes a
122 single size_t bytes, a column count, and a row count which determines the
123 size of the table.
124
125 use ston_ht32_new to specify the exact or estimated number of unique keys
126 held in the table. With ston_ht32_new, the provided ht_rows is doubled, and
127 rounded up to the nearest power of two to create a hash table with minimal
128 collisions.
129 */
130 STON_FUNC
131 ston_ht ston_ht32_create
132 ( struct ston_ht_header_t ht_header,
133 void* (*alloc_fn)(size_t)
134 )
135 { size_t ht_bytes = ston_ht32_size(&ht_header);
136 ston_ht ht = (ston_ht) alloc_fn(STON_HT_HEADERSIZE + ht_bytes);
137 if (ht != NULL)
138 { memcpy(ht,&ht_header,STON_HT_HEADERSIZE);
139 memset(ht + 1, 0, ht_bytes);
140 }
141 return ht;
142 }
143
144 #ifdef STON_HT_FREAD
145 /* Reads a 32-bit hash table out of the provided file at the provide fpos, into
146 a buffer allocated by alloc_fn. Memory is allocated to the stack until the
147 entire structure is verified, and all file operations are finished.
148 Returns NULL with properly set errno on failure.
149 */
150 ston_ht ston_ht32_fread
151 ( FILE* file,
152 long fpos,
153 void* (*alloc_fn)(size_t)
154 )
155 { struct ston_ht_header_t header;
156 ston_ht stack_ht, ht;
157 long fpos_start;
158 size_t table_size, alloc_size;
159 int errno_local;
160 if ((fpos_start = ftell(file)) == -1)
161 return NULL;
162 if (fread(&header, sizeof(header), 1, file) != 1)
163 goto fail_seekback;
164 table_size = ston_ht32_size(&header);
165 alloc_size = sizeof(header) + table_size;
166 stack_ht = (ston_ht) alloca(alloc_size);
167 memcpy(stack_ht, &header, sizeof(header));
168 if (fread(stack_ht + sizeof(header), table_size, 1, file) != 1)
169 goto fail_seekback;
170 if (fseek(file, fpos_start, SEEK_SET) != 0)
171 return NULL;
172 ht = (ston_ht) alloc_fn(alloc_size);
173 if (ht != NULL)
174 memcpy(ht, stack_ht, alloc_size);
175 return ht;
176 fail_seekback:
177 /* Try to seek the file back to origin without clobbering errno */
178 errno_local = errno;
179 fseek(file, fpos_start, SEEK_SET);
180 errno = errno_local;
181 return NULL;
182 }
183 #endif
184
185 /* Returns a pointer to the row of data in the hashtable containing the provided
186 key, inserts if not found. Returns NULL on overflow.
187 */
188 STON_FUNC
189 uint32_t* ston_ht32_row
190 ( struct ston_ht_header_t* ht,
191 uint32_t key
192 )
193 { uint32_t* row;
194 uint32_t* row_start = ston_ht32_start(ht);
195 uint32_t* row_end = ston_ht32_end(ht);
196 uint16_t ht_cols = ston_ht_cols(ht);
197 size_t row_number = ston_ht_keyrow(ht,key);
198 uint8_t looped = 0;
199 row = row_start + (row_number * ht_cols);
200 next_row:
201 if (row[0] != 0)
202 goto populated;
203 write_position:
204 row[0] = key;
205 return row;
206 populated:
207 if (row[0] == key)
208 goto write_position;
209 if (row + ht_cols < row_end)
210 row += ht_cols;
211 else if (looped)
212 return NULL;
213 else
214 { looped++;
215 row = row_start;
216 }
217 goto next_row;
218 }
219
220 /* Inserts a value into a hashtable at the specified column, returning the
221 previous value */
222 STON_FUNC
223 uint32_t ston_ht32_insert
224 ( struct ston_ht_header_t* ht,
225 uint32_t key,
226 uint16_t column,
227 uint32_t value
228 )
229 { uint32_t* value_location, old_value;
230 value_location = ston_ht32_entry(ht,key,column);
231 old_value = *value_location;
232 *value_location = value;
233 return old_value;
234 }
235
236 /* Inserts a row of units into a hashtable, starting with the specified column.
237 Returns the number of elements that were written. This function will not
238 overflow internal buffers, but will return a short count (lower than the
239 provided 'units') when truncation of source data occurs. */
240 STON_FUNC
241 size_t
242 ston_ht32_insertx
243 ( struct ston_ht_header_t* ht,
244 uint32_t key,
245 uint32_t* data_src,
246 size_t start_column,
247 size_t units
248 )
249 { uint32_t* data_row = ston_ht32_row(ht,key);
250 uint32_t* data_limit = data_row + ston_ht_cols(ht);
251 uint32_t* data_trg = data_row + start_column;
252 if (data_row == NULL)
253 return 0;
254 while (units-- && data_trg < data_limit)
255 *data_trg++ = *data_src++;
256 return (size_t)(data_trg - data_row);
257 }
258
259
260 #ifndef STON_DHT_SIZE
261 #define STON_DHT_SIZE 4096
262 #endif
263
264 /* STON Dynamic Hashtable Structure
265 A dynamic form of the generic hashtable implementation above which uses
266 external allocation.
267 */
268 typedef struct ston_dht_header_t
269 { uint16_t ht_columns;
270 uint8_t ht_2pow, ht_flags;
271 void* (*ht_alloc)(size_t);
272 void (*ht_free)(void*);
273 void** page_head;
274 }ston_dht_h,* ston_dht;
275 #define STON_DHT_HEADERSIZE (sizeof(struct ston_dht_header_t))
276
277 STON_FUNC
278 ston_dht ston_dht32_create(struct ston_ht_header_t,void*(*)(size_t),void(*)(void*));
279 STON_FUNC
280 uint32_t* ston_dht32_row(ston_dht,uint32_t);
281 STON_FUNC
282 uint32_t ston_dht32_insert(ston_dht,uint32_t,uint16_t,uint32_t);
283 STON_FUNC
284 size_t ston_dht32_insertx(ston_dht,uint32_t,uint32_t*,size_t,size_t);
285 STON_FUNC
286 ston_dht ston_dht32_free(ston_dht);
287
288 #define ston_dht32_new(_COL,_N,_F,_ALLOC,_FREE) (ston_dht32_create((ston_ht_h){_COL,ston_trailing0(ston_up2pow(_N << 1)),_F},_ALLOC,_FREE))
289 #define ston_dht32_entry(_HT,_KEY,_COL) (ston_dht32_row(_HT,_KEY) + _COL)
290 #define ston_dht_size(_HT) (ston_ht_size(_HT))
291 #define ston_dht_rows(_HT) (ston_ht_rows(_HT))
292 #define ston_dht_cols(_HT) (ston_ht_cols(_HT))
293 #define ston_dht_keyrow(_HT,_KEY) (ston_ht_keyrow(_HT,_KEY))
294 #define ston_dht_pagestart(_HT) ((void**)(((uint8_t*)(_HT)) + STON_DHT_HEADERSIZE))
295 #define ston_dht_pagehead(_HT) ((_HT)->page_head)
296 #define ston_dht_pagemax(_HT) ((void**)((uint8_t*)(_HT) + STON_DHT_SIZE - sizeof(void**)))
297 #define ston_dht_start(_HT,_DEPTH) ((uint8_t*)*(ston_dht_pagestart(_HT) + _DEPTH))
298 #define ston_dht32_start(_HT,_DEPTH) ((uint32_t*)ston_dht_start(_HT,_DEPTH))
299 #define ston_dht32_end(_HT,_DEPTH) (ston_ht32_start(_HT,_DEPTH) + ston_ht_size(_HT))
300 #define ston_dht32_size(_HT) (ston_dht_size(_HT) * sizeof(uint32_t))
301 #define ston_dht32_pagepush(_HT) ((*(++((_HT)->page_head)) = (_HT)->ht_alloc(ston_dht32_size(_HT))))
302 #define ston_dht32_pagepop(_HT) ((_HT)->ht_free((_HT)->page_head--))
303
304 /* Creates a new bucketted hash table, provided a memory allocation function
305 that takes a single size_t bytes, a memory free function, a column count, and
306 a row count which determines the size of the buckets.
307 */
308 STON_FUNC
309 ston_dht ston_dht32_create
310 ( struct ston_ht_header_t ht_header,
311 void* (*ht_alloc)(size_t),
312 void (*ht_free)(void*)
313 )
314 { size_t ht_bytes = ston_dht32_size(&ht_header);
315 ston_dht ht = (ston_dht) ht_alloc(STON_DHT_SIZE);
316 if (ht != NULL)
317 { memcpy(ht, &ht_header, sizeof(ht_header));
318 ht->ht_alloc = ht_alloc;
319 ht->ht_free = ht_free;
320 ht->page_head = ston_dht_pagestart(ht);
321 if ((*(ht->page_head) = ht->ht_alloc(ht_bytes)) == NULL)
322 if (ht_free != NULL)
323 ht_free(ht);
324 }
325 return ht;
326 }
327
328 /* Returns a pointer to the row of data in the hashtable containing the provided
329 key, inserts if not found. Returns NULL on overflow.
330 */
331 STON_FUNC
332 uint32_t* ston_dht32_row
333 ( struct ston_dht_header_t* ht,
334 uint32_t key
335 )
336 { uint16_t ht_cols = ston_dht_cols(ht);
337 size_t row_number = ston_dht_keyrow(ht,key);
338 uint32_t** page = (uint32_t**)ston_dht_pagestart(ht);
339 uint32_t** pagemax = (uint32_t**)ston_dht_pagemax(ht);
340 uint8_t loop_x = 0;
341 uint8_t loop_y = 0;
342 uint32_t* row,* row_end;
343 next_page:
344 row = *page + (row_number * ht_cols);
345 row_end = *page + (ston_dht_size(ht) - 1);
346 next_row:
347 if (row[0] != 0)
348 goto populated;
349 write_position:
350 row[0] = key;
351 return row;
352 populated:
353 if (row[0] == key)
354 goto write_position;
355 if (!loop_x)
356 { if (page < pagemax)
357 { if (page == (uint32_t**)ston_dht_pagehead(ht))
358 if (ston_dht32_pagepush(ht) == NULL)
359 { ston_dht32_free(ht);
360 return NULL;
361 }
362 ++page;
363 goto next_row;
364 }
365 loop_x = 1;
366 row_number = (row_number + 1) % ston_dht_rows(ht);
367 page = (uint32_t**)ston_dht_pagestart(ht);
368 goto next_row;
369 }
370 if (row + ht_cols < row_end)
371 { row += ht_cols;
372 goto next_row;
373 }
374 else if (!loop_y)
375 { loop_y = 1;
376 row = *page;
377 goto next_row;
378 }
379 if (page < pagemax)
380 { loop_y = 0;
381 page++;
382 goto next_page;
383 }
384 return NULL;
385 }
386
387 /* Inserts a value into a hashtable at the specified column, returning the
388 previous value */
389 STON_FUNC
390 uint32_t ston_dht32_insert
391 ( struct ston_dht_header_t* ht,
392 uint32_t key,
393 uint16_t column,
394 uint32_t value
395 )
396 { uint32_t* value_location, old_value;
397 value_location = ston_dht32_entry(ht,key,column);
398 old_value = *value_location;
399 *value_location = value;
400 return old_value;
401 }
402
403 /* Free the dynamic hash table */
404 STON_FUNC
405 struct ston_dht_header_t* ston_dht32_free
406 ( struct ston_dht_header_t* ht )
407 { void (*ht_free)(void*) = ht->ht_free;
408 if (ht_free != NULL)
409 { while (ht->page_head >= ston_dht_pagestart(ht))
410 { ht_free(*(ht->page_head));
411 ht->page_head--;
412 }
413 ht_free(ht);
414 return NULL;
415 }
416 return ht;
417 }
418
419 /* Insert multiple values, returning the number of bytes written */
420 STON_FUNC
421 size_t
422 ston_dht32_insertx
423 ( struct ston_dht_header_t* ht,
424 uint32_t key,
425 uint32_t* data_src,
426 size_t start_column,
427 size_t units
428 )
429 { uint32_t* data_row = ston_dht32_row(ht,key);
430 uint32_t* data_limit = data_row + ston_dht_cols(ht);
431 uint32_t* data_trg = data_row + start_column;
432 if (data_row == NULL)
433 return 0;
434 while (units-- && data_trg < data_limit)
435 *data_trg++ = *data_src++;
436 return (size_t)(data_trg - data_row);
437 }
438
439
440 #endif //_STON_HT_H_