Ruby 3.2.3p157 (2024-01-18 revision 52bb2ac0a6971d0391efa2275f7a66bff319087c)
st.c
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#else
107#include "internal.h"
108#include "internal/bits.h"
109#include "internal/hash.h"
110#include "internal/sanitizers.h"
111#endif
112
113#include <stdio.h>
114#ifdef HAVE_STDLIB_H
115#include <stdlib.h>
116#endif
117#include <string.h>
118#include <assert.h>
119
120#ifdef __GNUC__
121#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122#define EXPECT(expr, val) __builtin_expect(expr, val)
123#define ATTRIBUTE_UNUSED __attribute__((unused))
124#else
125#define PREFETCH(addr, write_p)
126#define EXPECT(expr, val) (expr)
127#define ATTRIBUTE_UNUSED
128#endif
129
130/* The type of hashes. */
131typedef st_index_t st_hash_t;
132
134 st_hash_t hash;
135 st_data_t key;
136 st_data_t record;
137};
138
139#define type_numhash st_hashtype_num
140static const struct st_hash_type st_hashtype_num = {
141 st_numcmp,
142 st_numhash,
143};
144
145static int st_strcmp(st_data_t, st_data_t);
146static st_index_t strhash(st_data_t);
147static const struct st_hash_type type_strhash = {
148 st_strcmp,
149 strhash,
150};
151
152static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153static st_index_t strcasehash(st_data_t);
154static const struct st_hash_type type_strcasehash = {
155 st_locale_insensitive_strcasecmp_i,
156 strcasehash,
157};
158
159/* Value used to catch uninitialized entries/bins during debugging.
160 There is a possibility for a false alarm, but its probability is
161 extremely small. */
162#define ST_INIT_VAL 0xafafafafafafafaf
163#define ST_INIT_VAL_BYTE 0xafa
164
165#ifdef RUBY
166#undef malloc
167#undef realloc
168#undef calloc
169#undef free
170#define malloc ruby_xmalloc
171#define calloc ruby_xcalloc
172#define realloc ruby_xrealloc
173#define free ruby_xfree
174#endif
175
176#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177#define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
179
180/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
181 up to TRUE if the table is rebuilt during the comparison. */
182#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183 do { \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187 } while (FALSE)
188
189/* Features of a table. */
191 /* Power of 2 used for number of allocated entries. */
192 unsigned char entry_power;
193 /* Power of 2 used for number of allocated bins. Depending on the
194 table size, the number of bins is 2-4 times more than the
195 number of entries. */
196 unsigned char bin_power;
197 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198 unsigned char size_ind;
199 /* Bins are packed in words of type st_index_t. The following is
200 a size of bins counted by words. */
201 st_index_t bins_words;
202};
203
204/* Features of all possible size tables. */
205#if SIZEOF_ST_INDEX_T == 8
206#define MAX_POWER2 62
207static const struct st_features features[] = {
208 {0, 1, 0, 0x0},
209 {1, 2, 0, 0x1},
210 {2, 3, 0, 0x1},
211 {3, 4, 0, 0x2},
212 {4, 5, 0, 0x4},
213 {5, 6, 0, 0x8},
214 {6, 7, 0, 0x10},
215 {7, 8, 0, 0x20},
216 {8, 9, 1, 0x80},
217 {9, 10, 1, 0x100},
218 {10, 11, 1, 0x200},
219 {11, 12, 1, 0x400},
220 {12, 13, 1, 0x800},
221 {13, 14, 1, 0x1000},
222 {14, 15, 1, 0x2000},
223 {15, 16, 1, 0x4000},
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
271};
272
273#else
274#define MAX_POWER2 30
275
276static const struct st_features features[] = {
277 {0, 1, 0, 0x1},
278 {1, 2, 0, 0x1},
279 {2, 3, 0, 0x2},
280 {3, 4, 0, 0x4},
281 {4, 5, 0, 0x8},
282 {5, 6, 0, 0x10},
283 {6, 7, 0, 0x20},
284 {7, 8, 0, 0x40},
285 {8, 9, 1, 0x100},
286 {9, 10, 1, 0x200},
287 {10, 11, 1, 0x400},
288 {11, 12, 1, 0x800},
289 {12, 13, 1, 0x1000},
290 {13, 14, 1, 0x2000},
291 {14, 15, 1, 0x4000},
292 {15, 16, 1, 0x8000},
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
308};
309
310#endif
311
312/* The reserved hash value and its substitution. */
313#define RESERVED_HASH_VAL (~(st_hash_t) 0)
314#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
315
316/* Return hash value of KEY for table TAB. */
317static inline st_hash_t
318do_hash(st_data_t key, st_table *tab)
319{
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
321
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Power of 2 defining the minimal number of allocated entries. */
328#define MINIMAL_POWER2 2
329
330#if MINIMAL_POWER2 < 2
331#error "MINIMAL_POWER2 should be >= 2"
332#endif
333
334/* If the power2 of the allocated `entries` is less than the following
335 value, don't allocate bins and use a linear search. */
336#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
337
338/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339static int
340get_power2(st_index_t size)
341{
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343 if (n <= MAX_POWER2)
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345#ifndef NOT_RUBY
346 /* Ran out of the table entries */
347 rb_raise(rb_eRuntimeError, "st_table too big");
348#endif
349 /* should raise exception */
350 return -1;
351}
352
353/* Return value of N-th bin in array BINS of table with bins size
354 index S. */
355static inline st_index_t
356get_bin(st_index_t *bins, int s, st_index_t n)
357{
358 return (s == 0 ? ((unsigned char *) bins)[n]
359 : s == 1 ? ((unsigned short *) bins)[n]
360 : s == 2 ? ((unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
362}
363
364/* Set up N-th bin in array BINS of table with bins size index S to
365 value V. */
366static inline void
367set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
368{
369 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
373}
374
375/* These macros define reserved values for empty table bin and table
376 bin which contains a deleted entry. We will never use such values
377 for an entry index in bins. */
378#define EMPTY_BIN 0
379#define DELETED_BIN 1
380/* Base of a real entry index in the bins. */
381#define ENTRY_BASE 2
382
383/* Mark I-th bin of table TAB as empty, in other words not
384 corresponding to any entry. */
385#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
386
387/* Values used for not found entry and bin with given
388 characteristics. */
389#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390#define UNDEFINED_BIN_IND (~(st_index_t) 0)
391
392/* Entry and bin values returned when we found a table rebuild during
393 the search. */
394#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
396
397/* Mark I-th bin of table TAB as corresponding to a deleted table
398 entry. Update number of entries in the table and number of bins
399 corresponding to deleted entries. */
400#define MARK_BIN_DELETED(tab, i) \
401 do { \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403 } while (0)
404
405/* Macros to check that value B is used empty bins and bins
406 corresponding deleted entries. */
407#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
410
411/* Macros to check empty bins and bins corresponding to deleted
412 entries. Bins are given by their index I in table TAB. */
413#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416
417/* Macros for marking and checking deleted entries given by their
418 pointer E_PTR. */
419#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
421
422/* Return bin size index of table TAB. */
423static inline unsigned int
424get_size_ind(const st_table *tab)
425{
426 return tab->size_ind;
427}
428
429/* Return the number of allocated bins of table TAB. */
430static inline st_index_t
431get_bins_num(const st_table *tab)
432{
433 return ((st_index_t) 1)<<tab->bin_power;
434}
435
436/* Return mask for a bin index in table TAB. */
437static inline st_index_t
438bins_mask(const st_table *tab)
439{
440 return get_bins_num(tab) - 1;
441}
442
443/* Return the index of table TAB bin corresponding to
444 HASH_VALUE. */
445static inline st_index_t
446hash_bin(st_hash_t hash_value, st_table *tab)
447{
448 return hash_value & bins_mask(tab);
449}
450
451/* Return the number of allocated entries of table TAB. */
452static inline st_index_t
453get_allocated_entries(const st_table *tab)
454{
455 return ((st_index_t) 1)<<tab->entry_power;
456}
457
458/* Return size of the allocated bins of table TAB. */
459static inline st_index_t
460bins_size(const st_table *tab)
461{
462 return features[tab->entry_power].bins_words * sizeof (st_index_t);
463}
464
465/* Mark all bins of table TAB as empty. */
466static void
467initialize_bins(st_table *tab)
468{
469 memset(tab->bins, 0, bins_size(tab));
470}
471
472/* Make table TAB empty. */
473static void
474make_tab_empty(st_table *tab)
475{
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
480}
481
482#ifdef HASH_LOG
483#ifdef HAVE_UNISTD_H
484#include <unistd.h>
485#endif
486static struct {
487 int all, total, num, str, strcase;
488} collision;
489
490/* Flag switching off output of package statistics at the end of
491 program. */
492static int init_st = 0;
493
494/* Output overall number of table searches and collisions into a
495 temporary file. */
496static void
497stat_col(void)
498{
499 char fname[10+sizeof(long)*3];
500 FILE *f;
501 if (!collision.total) return;
502 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503 if (f == NULL)
504 return;
505 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((double)collision.all / (collision.total)) * 100);
507 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508 fclose(f);
509}
510#endif
511
512/* Create and return table with TYPE which can hold at least SIZE
513 entries. The real number of entries which the table can hold is
514 the nearest power of two for SIZE. */
515st_table *
516st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
517{
518 st_table *tab;
519 int n;
520
521#ifdef HASH_LOG
522#if HASH_LOG+0 < 0
523 {
524 const char *e = getenv("ST_HASH_LOG");
525 if (!e || !*e) init_st = 1;
526 }
527#endif
528 if (init_st == 0) {
529 init_st = 1;
530 atexit(stat_col);
531 }
532#endif
533
534 n = get_power2(size);
535#ifndef RUBY
536 if (n < 0)
537 return NULL;
538#endif
539 tab = (st_table *) malloc(sizeof (st_table));
540#ifndef RUBY
541 if (tab == NULL)
542 return NULL;
543#endif
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572size_t
573st_table_size(const struct st_table *tbl)
574{
575 return tbl->num_entries;
576}
577
578/* Create and return table with TYPE which can hold a minimal number
579 of entries (see comments for get_power2). */
580st_table *
581st_init_table(const struct st_hash_type *type)
582{
583 return st_init_table_with_size(type, 0);
584}
585
586/* Create and return table which can hold a minimal number of
587 numbers. */
588st_table *
589st_init_numtable(void)
590{
591 return st_init_table(&type_numhash);
592}
593
594/* Create and return table which can hold SIZE numbers. */
595st_table *
596st_init_numtable_with_size(st_index_t size)
597{
598 return st_init_table_with_size(&type_numhash, size);
599}
600
601/* Create and return table which can hold a minimal number of
602 strings. */
603st_table *
604st_init_strtable(void)
605{
606 return st_init_table(&type_strhash);
607}
608
609/* Create and return table which can hold SIZE strings. */
610st_table *
611st_init_strtable_with_size(st_index_t size)
612{
613 return st_init_table_with_size(&type_strhash, size);
614}
615
616/* Create and return table which can hold a minimal number of strings
617 whose character case is ignored. */
618st_table *
619st_init_strcasetable(void)
620{
621 return st_init_table(&type_strcasehash);
622}
623
624/* Create and return table which can hold SIZE strings whose character
625 case is ignored. */
626st_table *
627st_init_strcasetable_with_size(st_index_t size)
628{
629 return st_init_table_with_size(&type_strcasehash, size);
630}
631
632/* Make table TAB empty. */
633void
634st_clear(st_table *tab)
635{
636 make_tab_empty(tab);
637 tab->rebuilds_num++;
638}
639
640/* Free table TAB space. */
641void
642st_free_table(st_table *tab)
643{
644 if (tab->bins != NULL)
645 free(tab->bins);
646 free(tab->entries);
647 free(tab);
648}
649
650/* Return byte size of memory allocated for table TAB. */
651size_t
652st_memsize(const st_table *tab)
653{
654 return(sizeof(st_table)
655 + (tab->bins == NULL ? 0 : bins_size(tab))
656 + get_allocated_entries(tab) * sizeof(st_table_entry));
657}
658
659static st_index_t
660find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
661
662static st_index_t
663find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
664
665static st_index_t
666find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
667
668static st_index_t
669find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
670 st_data_t key, st_index_t *bin_ind);
671
672#ifdef HASH_LOG
673static void
674count_collision(const struct st_hash_type *type)
675{
676 collision.all++;
677 if (type == &type_numhash) {
678 collision.num++;
679 }
680 else if (type == &type_strhash) {
681 collision.strcase++;
682 }
683 else if (type == &type_strcasehash) {
684 collision.str++;
685 }
686}
687
688#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
689#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
690#define collision_check 0
691#else
692#define COLLISION
693#define FOUND_BIN
694#endif
695
696/* If the number of entries in the table is at least REBUILD_THRESHOLD
697 times less than the entry array length, decrease the table
698 size. */
699#define REBUILD_THRESHOLD 4
700
701#if REBUILD_THRESHOLD < 2
702#error "REBUILD_THRESHOLD should be >= 2"
703#endif
704
705static void rebuild_table_with(st_table *new_tab, st_table *tab);
706
707/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
708 and can change size of the table entries and bins arrays.
709 Rebuilding is implemented by creation of a new table or by
710 compaction of the existing one. */
711static void
712rebuild_table(st_table *tab)
713{
714 if ((2 * tab->num_entries <= get_allocated_entries(tab)
715 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
716 || tab->num_entries < (1 << MINIMAL_POWER2)) {
717 /* Compaction: */
718 tab->num_entries = 0;
719 if (tab->bins != NULL)
720 initialize_bins(tab);
721 rebuild_table_with(tab, tab);
722 }
723 else {
724 st_table *new_tab;
725 /* This allocation could trigger GC and compaction. If tab is the
726 * gen_iv_tbl, then tab could have changed in size due to objects being
727 * freed and/or moved. Do not store attributes of tab before this line. */
728 new_tab = st_init_table_with_size(tab->type,
729 2 * tab->num_entries - 1);
730 rebuild_table_with(new_tab, tab);
731 }
732}
733
734static void
735rebuild_table_with(st_table *new_tab, st_table *tab)
736{
737 st_index_t i, ni;
738 unsigned int size_ind;
739 st_table_entry *new_entries;
740 st_table_entry *curr_entry_ptr;
741 st_index_t *bins;
742 st_index_t bin_ind;
743
744 new_entries = new_tab->entries;
745
746 ni = 0;
747 bins = new_tab->bins;
748 size_ind = get_size_ind(new_tab);
749 st_index_t bound = tab->entries_bound;
750 st_table_entry *entries = tab->entries;
751
752 for (i = tab->entries_start; i < bound; i++) {
753 curr_entry_ptr = &entries[i];
754 PREFETCH(entries + i + 1, 0);
755 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
756 continue;
757 if (&new_entries[ni] != curr_entry_ptr)
758 new_entries[ni] = *curr_entry_ptr;
759 if (EXPECT(bins != NULL, 1)) {
760 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
761 curr_entry_ptr->key);
762 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
763 }
764 new_tab->num_entries++;
765 ni++;
766 }
767 if (new_tab != tab) {
768 tab->entry_power = new_tab->entry_power;
769 tab->bin_power = new_tab->bin_power;
770 tab->size_ind = new_tab->size_ind;
771 if (tab->bins != NULL)
772 free(tab->bins);
773 tab->bins = new_tab->bins;
774 free(tab->entries);
775 tab->entries = new_tab->entries;
776 free(new_tab);
777 }
778 tab->entries_start = 0;
779 tab->entries_bound = tab->num_entries;
780 tab->rebuilds_num++;
781}
782
783/* Return the next secondary hash index for table TAB using previous
784 index IND and PERTERB. Finally modulo of the function becomes a
785 full *cycle linear congruential generator*, in other words it
786 guarantees traversing all table bins in extreme case.
787
788 According the Hull-Dobell theorem a generator
789 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
790 o m and c are relatively prime
791 o a-1 is divisible by all prime factors of m
792 o a-1 is divisible by 4 if m is divisible by 4.
793
794 For our case a is 5, c is 1, and m is a power of two. */
795static inline st_index_t
796secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
797{
798 *perterb >>= 11;
799 ind = (ind << 2) + ind + *perterb + 1;
800 return hash_bin(ind, tab);
801}
802
803/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
804 search. Return the index of the found entry in array `entries`.
805 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
806 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
807static inline st_index_t
808find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
809{
810 int eq_p, rebuilt_p;
811 st_index_t i, bound;
812 st_table_entry *entries;
813
814 bound = tab->entries_bound;
815 entries = tab->entries;
816 for (i = tab->entries_start; i < bound; i++) {
817 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
818 if (EXPECT(rebuilt_p, 0))
819 return REBUILT_TABLE_ENTRY_IND;
820 if (eq_p)
821 return i;
822 }
823 return UNDEFINED_ENTRY_IND;
824}
825
826/* Use the quadratic probing. The method has a better data locality
827 but more collisions than the current approach. In average it
828 results in a bit slower search. */
829/*#define QUADRATIC_PROBE*/
830
831/* Return index of entry with HASH_VALUE and KEY in table TAB. If
832 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
833 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
834static st_index_t
835find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
836{
837 int eq_p, rebuilt_p;
838 st_index_t ind;
839#ifdef QUADRATIC_PROBE
840 st_index_t d;
841#else
842 st_index_t peterb;
843#endif
844 st_index_t bin;
845 st_table_entry *entries = tab->entries;
846
847 ind = hash_bin(hash_value, tab);
848#ifdef QUADRATIC_PROBE
849 d = 1;
850#else
851 peterb = hash_value;
852#endif
853 FOUND_BIN;
854 for (;;) {
855 bin = get_bin(tab->bins, get_size_ind(tab), ind);
856 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
857 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
858 if (EXPECT(rebuilt_p, 0))
859 return REBUILT_TABLE_ENTRY_IND;
860 if (eq_p)
861 break;
862 }
863 else if (EMPTY_BIN_P(bin))
864 return UNDEFINED_ENTRY_IND;
865#ifdef QUADRATIC_PROBE
866 ind = hash_bin(ind + d, tab);
867 d++;
868#else
869 ind = secondary_hash(ind, tab, &peterb);
870#endif
871 COLLISION;
872 }
873 return bin;
874}
875
876/* Find and return index of table TAB bin corresponding to an entry
877 with HASH_VALUE and KEY. If there is no such bin, return
878 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
879 return REBUILT_TABLE_BIN_IND. */
880static st_index_t
881find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
882{
883 int eq_p, rebuilt_p;
884 st_index_t ind;
885#ifdef QUADRATIC_PROBE
886 st_index_t d;
887#else
888 st_index_t peterb;
889#endif
890 st_index_t bin;
891 st_table_entry *entries = tab->entries;
892
893 ind = hash_bin(hash_value, tab);
894#ifdef QUADRATIC_PROBE
895 d = 1;
896#else
897 peterb = hash_value;
898#endif
899 FOUND_BIN;
900 for (;;) {
901 bin = get_bin(tab->bins, get_size_ind(tab), ind);
902 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
903 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
904 if (EXPECT(rebuilt_p, 0))
905 return REBUILT_TABLE_BIN_IND;
906 if (eq_p)
907 break;
908 }
909 else if (EMPTY_BIN_P(bin))
910 return UNDEFINED_BIN_IND;
911#ifdef QUADRATIC_PROBE
912 ind = hash_bin(ind + d, tab);
913 d++;
914#else
915 ind = secondary_hash(ind, tab, &peterb);
916#endif
917 COLLISION;
918 }
919 return ind;
920}
921
922/* Find and return index of table TAB bin corresponding to an entry
923 with HASH_VALUE and KEY. The entry should be in the table
924 already. */
925static st_index_t
926find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
927{
928 st_index_t ind;
929#ifdef QUADRATIC_PROBE
930 st_index_t d;
931#else
932 st_index_t peterb;
933#endif
934 st_index_t bin;
935
936 ind = hash_bin(hash_value, tab);
937#ifdef QUADRATIC_PROBE
938 d = 1;
939#else
940 peterb = hash_value;
941#endif
942 FOUND_BIN;
943 for (;;) {
944 bin = get_bin(tab->bins, get_size_ind(tab), ind);
945 if (EMPTY_OR_DELETED_BIN_P(bin))
946 return ind;
947#ifdef QUADRATIC_PROBE
948 ind = hash_bin(ind + d, tab);
949 d++;
950#else
951 ind = secondary_hash(ind, tab, &peterb);
952#endif
953 COLLISION;
954 }
955}
956
957/* Return index of table TAB bin for HASH_VALUE and KEY through
958 BIN_IND and the pointed value as the function result. Reserve the
959 bin for inclusion of the corresponding entry into the table if it
960 is not there yet. We always find such bin as bins array length is
961 bigger entries array. Although we can reuse a deleted bin, the
962 result bin value is always empty if the table has no entry with
963 KEY. Return the entries array index of the found entry or
964 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
965 during the search, return REBUILT_TABLE_ENTRY_IND. */
966static st_index_t
967find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
968 st_data_t key, st_index_t *bin_ind)
969{
970 int eq_p, rebuilt_p;
971 st_index_t ind;
972 st_hash_t curr_hash_value = *hash_value;
973#ifdef QUADRATIC_PROBE
974 st_index_t d;
975#else
976 st_index_t peterb;
977#endif
978 st_index_t entry_index;
979 st_index_t first_deleted_bin_ind;
980 st_table_entry *entries;
981
982 ind = hash_bin(curr_hash_value, tab);
983#ifdef QUADRATIC_PROBE
984 d = 1;
985#else
986 peterb = curr_hash_value;
987#endif
988 FOUND_BIN;
989 first_deleted_bin_ind = UNDEFINED_BIN_IND;
990 entries = tab->entries;
991 for (;;) {
992 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
993 if (EMPTY_BIN_P(entry_index)) {
994 tab->num_entries++;
995 entry_index = UNDEFINED_ENTRY_IND;
996 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
997 /* We can reuse bin of a deleted entry. */
998 ind = first_deleted_bin_ind;
999 MARK_BIN_EMPTY(tab, ind);
1000 }
1001 break;
1002 }
1003 else if (! DELETED_BIN_P(entry_index)) {
1004 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1005 if (EXPECT(rebuilt_p, 0))
1006 return REBUILT_TABLE_ENTRY_IND;
1007 if (eq_p)
1008 break;
1009 }
1010 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1011 first_deleted_bin_ind = ind;
1012#ifdef QUADRATIC_PROBE
1013 ind = hash_bin(ind + d, tab);
1014 d++;
1015#else
1016 ind = secondary_hash(ind, tab, &peterb);
1017#endif
1018 COLLISION;
1019 }
1020 *bin_ind = ind;
1021 return entry_index;
1022}
1023
1024/* Find an entry with KEY in table TAB. Return non-zero if we found
1025 it. Set up *RECORD to the found entry record. */
1026int
1027st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1028{
1029 st_index_t bin;
1030 st_hash_t hash = do_hash(key, tab);
1031
1032 retry:
1033 if (tab->bins == NULL) {
1034 bin = find_entry(tab, hash, key);
1035 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1036 goto retry;
1037 if (bin == UNDEFINED_ENTRY_IND)
1038 return 0;
1039 }
1040 else {
1041 bin = find_table_entry_ind(tab, hash, key);
1042 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1043 goto retry;
1044 if (bin == UNDEFINED_ENTRY_IND)
1045 return 0;
1046 bin -= ENTRY_BASE;
1047 }
1048 if (value != 0)
1049 *value = tab->entries[bin].record;
1050 return 1;
1051}
1052
1053/* Find an entry with KEY in table TAB. Return non-zero if we found
1054 it. Set up *RESULT to the found table entry key. */
1055int
1056st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1057{
1058 st_index_t bin;
1059 st_hash_t hash = do_hash(key, tab);
1060
1061 retry:
1062 if (tab->bins == NULL) {
1063 bin = find_entry(tab, hash, key);
1064 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1065 goto retry;
1066 if (bin == UNDEFINED_ENTRY_IND)
1067 return 0;
1068 }
1069 else {
1070 bin = find_table_entry_ind(tab, hash, key);
1071 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 goto retry;
1073 if (bin == UNDEFINED_ENTRY_IND)
1074 return 0;
1075 bin -= ENTRY_BASE;
1076 }
1077 if (result != 0)
1078 *result = tab->entries[bin].key;
1079 return 1;
1080}
1081
1082/* Check the table and rebuild it if it is necessary. */
1083static inline void
1084rebuild_table_if_necessary (st_table *tab)
1085{
1086 st_index_t bound = tab->entries_bound;
1087
1088 if (bound == get_allocated_entries(tab))
1089 rebuild_table(tab);
1090}
1091
1092/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1093 already entry with KEY in the table, return nonzero and update
1094 the value of the found entry. */
1095int
1096st_insert(st_table *tab, st_data_t key, st_data_t value)
1097{
1098 st_table_entry *entry;
1099 st_index_t bin;
1100 st_index_t ind;
1101 st_hash_t hash_value;
1102 st_index_t bin_ind;
1103 int new_p;
1104
1105 hash_value = do_hash(key, tab);
1106 retry:
1107 rebuild_table_if_necessary(tab);
1108 if (tab->bins == NULL) {
1109 bin = find_entry(tab, hash_value, key);
1110 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1111 goto retry;
1112 new_p = bin == UNDEFINED_ENTRY_IND;
1113 if (new_p)
1114 tab->num_entries++;
1115 bin_ind = UNDEFINED_BIN_IND;
1116 }
1117 else {
1118 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1119 key, &bin_ind);
1120 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1121 goto retry;
1122 new_p = bin == UNDEFINED_ENTRY_IND;
1123 bin -= ENTRY_BASE;
1124 }
1125 if (new_p) {
1126 ind = tab->entries_bound++;
1127 entry = &tab->entries[ind];
1128 entry->hash = hash_value;
1129 entry->key = key;
1130 entry->record = value;
1131 if (bin_ind != UNDEFINED_BIN_IND)
1132 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1133 return 0;
1134 }
1135 tab->entries[bin].record = value;
1136 return 1;
1137}
1138
1139/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1140 entry with KEY before the insertion. */
1141static inline void
1142st_add_direct_with_hash(st_table *tab,
1143 st_data_t key, st_data_t value, st_hash_t hash)
1144{
1145 st_table_entry *entry;
1146 st_index_t ind;
1147 st_index_t bin_ind;
1148
1149 rebuild_table_if_necessary(tab);
1150 ind = tab->entries_bound++;
1151 entry = &tab->entries[ind];
1152 entry->hash = hash;
1153 entry->key = key;
1154 entry->record = value;
1155 tab->num_entries++;
1156 if (tab->bins != NULL) {
1157 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1158 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1159 }
1160}
1161
1162/* Insert (KEY, VALUE) into table TAB. The table should not have
1163 entry with KEY before the insertion. */
1164void
1165st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1166{
1167 st_hash_t hash_value;
1168
1169 hash_value = do_hash(key, tab);
1170 st_add_direct_with_hash(tab, key, value, hash_value);
1171}
1172
1173/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1174 there is already entry with KEY in the table, return nonzero and
1175 update the value of the found entry. */
1176int
1177st_insert2(st_table *tab, st_data_t key, st_data_t value,
1178 st_data_t (*func)(st_data_t))
1179{
1180 st_table_entry *entry;
1181 st_index_t bin;
1182 st_index_t ind;
1183 st_hash_t hash_value;
1184 st_index_t bin_ind;
1185 int new_p;
1186
1187 hash_value = do_hash(key, tab);
1188 retry:
1189 rebuild_table_if_necessary (tab);
1190 if (tab->bins == NULL) {
1191 bin = find_entry(tab, hash_value, key);
1192 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1193 goto retry;
1194 new_p = bin == UNDEFINED_ENTRY_IND;
1195 if (new_p)
1196 tab->num_entries++;
1197 bin_ind = UNDEFINED_BIN_IND;
1198 }
1199 else {
1200 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1201 key, &bin_ind);
1202 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1203 goto retry;
1204 new_p = bin == UNDEFINED_ENTRY_IND;
1205 bin -= ENTRY_BASE;
1206 }
1207 if (new_p) {
1208 key = (*func)(key);
1209 ind = tab->entries_bound++;
1210 entry = &tab->entries[ind];
1211 entry->hash = hash_value;
1212 entry->key = key;
1213 entry->record = value;
1214 if (bin_ind != UNDEFINED_BIN_IND)
1215 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1216 return 0;
1217 }
1218 tab->entries[bin].record = value;
1219 return 1;
1220}
1221
1222/* Create and return a copy of table OLD_TAB. */
1223st_table *
1224st_copy(st_table *old_tab)
1225{
1226 st_table *new_tab;
1227
1228 new_tab = (st_table *) malloc(sizeof(st_table));
1229#ifndef RUBY
1230 if (new_tab == NULL)
1231 return NULL;
1232#endif
1233 *new_tab = *old_tab;
1234 if (old_tab->bins == NULL)
1235 new_tab->bins = NULL;
1236 else {
1237 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1238#ifndef RUBY
1239 if (new_tab->bins == NULL) {
1240 free(new_tab);
1241 return NULL;
1242 }
1243#endif
1244 }
1245 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1246 * sizeof(st_table_entry));
1247#ifndef RUBY
1248 if (new_tab->entries == NULL) {
1249 st_free_table(new_tab);
1250 return NULL;
1251 }
1252#endif
1253 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1254 get_allocated_entries(old_tab));
1255 if (old_tab->bins != NULL)
1256 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1257 return new_tab;
1258}
1259
1260/* Update the entries start of table TAB after removing an entry
1261 with index N in the array entries. */
1262static inline void
1263update_range_for_deleted(st_table *tab, st_index_t n)
1264{
1265 /* Do not update entries_bound here. Otherwise, we can fill all
1266 bins by deleted entry value before rebuilding the table. */
1267 if (tab->entries_start == n) {
1268 st_index_t start = n + 1;
1269 st_index_t bound = tab->entries_bound;
1270 st_table_entry *entries = tab->entries;
1271 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1272 tab->entries_start = start;
1273 }
1274}
1275
1276/* Delete entry with KEY from table TAB, set up *VALUE (unless
1277 VALUE is zero) from deleted table entry, and return non-zero. If
1278 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1279 is zero), and return zero. */
1280static int
1281st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1282{
1283 st_table_entry *entry;
1284 st_index_t bin;
1285 st_index_t bin_ind;
1286 st_hash_t hash;
1287
1288 hash = do_hash(*key, tab);
1289 retry:
1290 if (tab->bins == NULL) {
1291 bin = find_entry(tab, hash, *key);
1292 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1293 goto retry;
1294 if (bin == UNDEFINED_ENTRY_IND) {
1295 if (value != 0) *value = 0;
1296 return 0;
1297 }
1298 }
1299 else {
1300 bin_ind = find_table_bin_ind(tab, hash, *key);
1301 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1302 goto retry;
1303 if (bin_ind == UNDEFINED_BIN_IND) {
1304 if (value != 0) *value = 0;
1305 return 0;
1306 }
1307 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1308 MARK_BIN_DELETED(tab, bin_ind);
1309 }
1310 entry = &tab->entries[bin];
1311 *key = entry->key;
1312 if (value != 0) *value = entry->record;
1313 MARK_ENTRY_DELETED(entry);
1314 tab->num_entries--;
1315 update_range_for_deleted(tab, bin);
1316 return 1;
1317}
1318
1319int
1320st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1321{
1322 return st_general_delete(tab, key, value);
1323}
1324
1325/* The function and other functions with suffix '_safe' or '_check'
1326 are originated from the previous implementation of the hash tables.
1327 It was necessary for correct deleting entries during traversing
1328 tables. The current implementation permits deletion during
1329 traversing without a specific way to do this. */
1330int
1331st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1332 st_data_t never ATTRIBUTE_UNUSED)
1333{
1334 return st_general_delete(tab, key, value);
1335}
1336
1337/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1338 return zero. Otherwise, remove the first entry in the table.
1339 Return its key through KEY and its record through VALUE (unless
1340 VALUE is zero). */
1341int
1342st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1343{
1344 st_index_t i, bound;
1345 st_index_t bin;
1346 st_table_entry *entries, *curr_entry_ptr;
1347 st_index_t bin_ind;
1348
1349 entries = tab->entries;
1350 bound = tab->entries_bound;
1351 for (i = tab->entries_start; i < bound; i++) {
1352 curr_entry_ptr = &entries[i];
1353 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1354 st_hash_t entry_hash = curr_entry_ptr->hash;
1355 st_data_t entry_key = curr_entry_ptr->key;
1356
1357 if (value != 0) *value = curr_entry_ptr->record;
1358 *key = entry_key;
1359 retry:
1360 if (tab->bins == NULL) {
1361 bin = find_entry(tab, entry_hash, entry_key);
1362 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1363 entries = tab->entries;
1364 goto retry;
1365 }
1366 curr_entry_ptr = &entries[bin];
1367 }
1368 else {
1369 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1370 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1371 entries = tab->entries;
1372 goto retry;
1373 }
1374 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1375 - ENTRY_BASE];
1376 MARK_BIN_DELETED(tab, bin_ind);
1377 }
1378 MARK_ENTRY_DELETED(curr_entry_ptr);
1379 tab->num_entries--;
1380 update_range_for_deleted(tab, i);
1381 return 1;
1382 }
1383 }
1384 if (value != 0) *value = 0;
1385 return 0;
1386}
1387
1388/* See comments for function st_delete_safe. */
1389void
1390st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1391 st_data_t never ATTRIBUTE_UNUSED)
1392{
1393}
1394
1395/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1396 of the key and the value of the found entry, and non-zero as the
1397 3rd argument. If the entry is not found, call FUNC with a pointer
1398 to KEY, a pointer to zero, and a zero argument. If the call
1399 returns ST_CONTINUE, the table will have an entry with key and
1400 value returned by FUNC through the 1st and 2nd parameters. If the
1401 call of FUNC returns ST_DELETE, the table will not have entry with
1402 KEY. The function returns flag of that the entry with KEY was in
1403 the table before the call. */
1404int
1405st_update(st_table *tab, st_data_t key,
1406 st_update_callback_func *func, st_data_t arg)
1407{
1408 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1409 st_index_t bin = 0; /* Ditto */
1410 st_table_entry *entries;
1411 st_index_t bin_ind;
1412 st_data_t value = 0, old_key;
1413 int retval, existing;
1414 st_hash_t hash = do_hash(key, tab);
1415
1416 retry:
1417 entries = tab->entries;
1418 if (tab->bins == NULL) {
1419 bin = find_entry(tab, hash, key);
1420 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1421 goto retry;
1422 existing = bin != UNDEFINED_ENTRY_IND;
1423 entry = &entries[bin];
1424 bin_ind = UNDEFINED_BIN_IND;
1425 }
1426 else {
1427 bin_ind = find_table_bin_ind(tab, hash, key);
1428 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1429 goto retry;
1430 existing = bin_ind != UNDEFINED_BIN_IND;
1431 if (existing) {
1432 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1433 entry = &entries[bin];
1434 }
1435 }
1436 if (existing) {
1437 key = entry->key;
1438 value = entry->record;
1439 }
1440 old_key = key;
1441 retval = (*func)(&key, &value, arg, existing);
1442 switch (retval) {
1443 case ST_CONTINUE:
1444 if (! existing) {
1445 st_add_direct_with_hash(tab, key, value, hash);
1446 break;
1447 }
1448 if (old_key != key) {
1449 entry->key = key;
1450 }
1451 entry->record = value;
1452 break;
1453 case ST_DELETE:
1454 if (existing) {
1455 if (bin_ind != UNDEFINED_BIN_IND)
1456 MARK_BIN_DELETED(tab, bin_ind);
1457 MARK_ENTRY_DELETED(entry);
1458 tab->num_entries--;
1459 update_range_for_deleted(tab, bin);
1460 }
1461 break;
1462 }
1463 return existing;
1464}
1465
1466/* Traverse all entries in table TAB calling FUNC with current entry
1467 key and value and zero. If the call returns ST_STOP, stop
1468 traversing. If the call returns ST_DELETE, delete the current
1469 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1470 traversing. The function returns zero unless an error is found.
1471 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1472 different for ST_CHECK and when the current element is removed
1473 during traversing. */
1474static inline int
1475st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1476 int check_p)
1477{
1478 st_index_t bin;
1479 st_index_t bin_ind;
1480 st_table_entry *entries, *curr_entry_ptr;
1481 enum st_retval retval;
1482 st_index_t i, rebuilds_num;
1483 st_hash_t hash;
1484 st_data_t key;
1485 int error_p, packed_p = tab->bins == NULL;
1486
1487 entries = tab->entries;
1488 /* The bound can change inside the loop even without rebuilding
1489 the table, e.g. by an entry insertion. */
1490 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1491 curr_entry_ptr = &entries[i];
1492 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1493 continue;
1494 key = curr_entry_ptr->key;
1495 rebuilds_num = tab->rebuilds_num;
1496 hash = curr_entry_ptr->hash;
1497 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1498
1499 if (retval == ST_REPLACE && replace) {
1500 st_data_t value;
1501 value = curr_entry_ptr->record;
1502 retval = (*replace)(&key, &value, arg, TRUE);
1503 curr_entry_ptr->key = key;
1504 curr_entry_ptr->record = value;
1505 }
1506
1507 if (rebuilds_num != tab->rebuilds_num) {
1508 retry:
1509 entries = tab->entries;
1510 packed_p = tab->bins == NULL;
1511 if (packed_p) {
1512 i = find_entry(tab, hash, key);
1513 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1514 goto retry;
1515 error_p = i == UNDEFINED_ENTRY_IND;
1516 }
1517 else {
1518 i = find_table_entry_ind(tab, hash, key);
1519 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1520 goto retry;
1521 error_p = i == UNDEFINED_ENTRY_IND;
1522 i -= ENTRY_BASE;
1523 }
1524 if (error_p && check_p) {
1525 /* call func with error notice */
1526 retval = (*func)(0, 0, arg, 1);
1527 return 1;
1528 }
1529 curr_entry_ptr = &entries[i];
1530 }
1531 switch (retval) {
1532 case ST_REPLACE:
1533 break;
1534 case ST_CONTINUE:
1535 break;
1536 case ST_CHECK:
1537 if (check_p)
1538 break;
1539 case ST_STOP:
1540 return 0;
1541 case ST_DELETE: {
1542 st_data_t key = curr_entry_ptr->key;
1543
1544 again:
1545 if (packed_p) {
1546 bin = find_entry(tab, hash, key);
1547 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1548 goto again;
1549 if (bin == UNDEFINED_ENTRY_IND)
1550 break;
1551 }
1552 else {
1553 bin_ind = find_table_bin_ind(tab, hash, key);
1554 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1555 goto again;
1556 if (bin_ind == UNDEFINED_BIN_IND)
1557 break;
1558 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1559 MARK_BIN_DELETED(tab, bin_ind);
1560 }
1561 curr_entry_ptr = &entries[bin];
1562 MARK_ENTRY_DELETED(curr_entry_ptr);
1563 tab->num_entries--;
1564 update_range_for_deleted(tab, bin);
1565 break;
1566 }
1567 }
1568 }
1569 return 0;
1570}
1571
1572int
1573st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1574{
1575 return st_general_foreach(tab, func, replace, arg, TRUE);
1576}
1577
1578struct functor {
1579 st_foreach_callback_func *func;
1580 st_data_t arg;
1581};
1582
1583static int
1584apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1585{
1586 const struct functor *f = (void *)d;
1587 return f->func(k, v, f->arg);
1588}
1589
1590int
1591st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1592{
1593 const struct functor f = { func, arg };
1594 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1595}
1596
1597/* See comments for function st_delete_safe. */
1598int
1599st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1600 st_data_t never ATTRIBUTE_UNUSED)
1601{
1602 return st_general_foreach(tab, func, 0, arg, TRUE);
1603}
1604
1605/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1606 Return the number of keys set up in array KEYS. */
1607static inline st_index_t
1608st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1609{
1610 st_index_t i, bound;
1611 st_data_t key, *keys_start, *keys_end;
1612 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1613
1614 bound = tab->entries_bound;
1615 keys_start = keys;
1616 keys_end = keys + size;
1617 for (i = tab->entries_start; i < bound; i++) {
1618 if (keys == keys_end)
1619 break;
1620 curr_entry_ptr = &entries[i];
1621 key = curr_entry_ptr->key;
1622 if (! DELETED_ENTRY_P(curr_entry_ptr))
1623 *keys++ = key;
1624 }
1625
1626 return keys - keys_start;
1627}
1628
1629st_index_t
1630st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1631{
1632 return st_general_keys(tab, keys, size);
1633}
1634
1635/* See comments for function st_delete_safe. */
1636st_index_t
1637st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1638 st_data_t never ATTRIBUTE_UNUSED)
1639{
1640 return st_general_keys(tab, keys, size);
1641}
1642
1643/* Set up array VALUES by at most SIZE values of head table TAB
1644 entries. Return the number of values set up in array VALUES. */
1645static inline st_index_t
1646st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1647{
1648 st_index_t i, bound;
1649 st_data_t *values_start, *values_end;
1650 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1651
1652 values_start = values;
1653 values_end = values + size;
1654 bound = tab->entries_bound;
1655 for (i = tab->entries_start; i < bound; i++) {
1656 if (values == values_end)
1657 break;
1658 curr_entry_ptr = &entries[i];
1659 if (! DELETED_ENTRY_P(curr_entry_ptr))
1660 *values++ = curr_entry_ptr->record;
1661 }
1662
1663 return values - values_start;
1664}
1665
1666st_index_t
1667st_values(st_table *tab, st_data_t *values, st_index_t size)
1668{
1669 return st_general_values(tab, values, size);
1670}
1671
1672/* See comments for function st_delete_safe. */
1673st_index_t
1674st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1675 st_data_t never ATTRIBUTE_UNUSED)
1676{
1677 return st_general_values(tab, values, size);
1678}
1679
1680#define FNV1_32A_INIT 0x811c9dc5
1681
1682/*
1683 * 32 bit magic FNV-1a prime
1684 */
1685#define FNV_32_PRIME 0x01000193
1686
1687/* __POWERPC__ added to accommodate Darwin case. */
1688#ifndef UNALIGNED_WORD_ACCESS
1689# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1690 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1691 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1692 defined(__mc68020__)
1693# define UNALIGNED_WORD_ACCESS 1
1694# endif
1695#endif
1696#ifndef UNALIGNED_WORD_ACCESS
1697# define UNALIGNED_WORD_ACCESS 0
1698#endif
1699
1700/* This hash function is quite simplified MurmurHash3
1701 * Simplification is legal, cause most of magic still happens in finalizator.
1702 * And finalizator is almost the same as in MurmurHash3 */
1703#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1704#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1705
1706#if ST_INDEX_BITS <= 32
1707#define C1 (st_index_t)0xcc9e2d51
1708#define C2 (st_index_t)0x1b873593
1709#else
1710#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1711#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1712#endif
1713NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1714NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1715NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1716
1717static inline st_index_t
1718murmur_step(st_index_t h, st_index_t k)
1719{
1720#if ST_INDEX_BITS <= 32
1721#define r1 (17)
1722#define r2 (11)
1723#else
1724#define r1 (33)
1725#define r2 (24)
1726#endif
1727 k *= C1;
1728 h ^= ROTL(k, r1);
1729 h *= C2;
1730 h = ROTL(h, r2);
1731 return h;
1732}
1733#undef r1
1734#undef r2
1735
1736static inline st_index_t
1737murmur_finish(st_index_t h)
1738{
1739#if ST_INDEX_BITS <= 32
1740#define r1 (16)
1741#define r2 (13)
1742#define r3 (16)
1743 const st_index_t c1 = 0x85ebca6b;
1744 const st_index_t c2 = 0xc2b2ae35;
1745#else
1746/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1747#define r1 (30)
1748#define r2 (27)
1749#define r3 (31)
1750 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1751 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1752#endif
1753#if ST_INDEX_BITS > 64
1754 h ^= h >> 64;
1755 h *= c2;
1756 h ^= h >> 65;
1757#endif
1758 h ^= h >> r1;
1759 h *= c1;
1760 h ^= h >> r2;
1761 h *= c2;
1762 h ^= h >> r3;
1763 return h;
1764}
1765#undef r1
1766#undef r2
1767#undef r3
1768
1769st_index_t
1770st_hash(const void *ptr, size_t len, st_index_t h)
1771{
1772 const char *data = ptr;
1773 st_index_t t = 0;
1774 size_t l = len;
1775
1776#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1777#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1778#if SIZEOF_ST_INDEX_T > 4
1779#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1780#if SIZEOF_ST_INDEX_T > 8
1781#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1782 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1783#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1784#endif
1785#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1786#else
1787#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1788#endif
1789#undef SKIP_TAIL
1790 if (len >= sizeof(st_index_t)) {
1791#if !UNALIGNED_WORD_ACCESS
1792 int align = (int)((st_data_t)data % sizeof(st_index_t));
1793 if (align) {
1794 st_index_t d = 0;
1795 int sl, sr, pack;
1796
1797 switch (align) {
1798#ifdef WORDS_BIGENDIAN
1799# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1800 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1801#else
1802# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1803 t |= data_at(n) << CHAR_BIT*(n)
1804#endif
1805 UNALIGNED_ADD_ALL;
1806#undef UNALIGNED_ADD
1807 }
1808
1809#ifdef WORDS_BIGENDIAN
1810 t >>= (CHAR_BIT * align) - CHAR_BIT;
1811#else
1812 t <<= (CHAR_BIT * align);
1813#endif
1814
1815 data += sizeof(st_index_t)-align;
1816 len -= sizeof(st_index_t)-align;
1817
1818 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1819 sr = CHAR_BIT * align;
1820
1821 while (len >= sizeof(st_index_t)) {
1822 d = *(st_index_t *)data;
1823#ifdef WORDS_BIGENDIAN
1824 t = (t << sr) | (d >> sl);
1825#else
1826 t = (t >> sr) | (d << sl);
1827#endif
1828 h = murmur_step(h, t);
1829 t = d;
1830 data += sizeof(st_index_t);
1831 len -= sizeof(st_index_t);
1832 }
1833
1834 pack = len < (size_t)align ? (int)len : align;
1835 d = 0;
1836 switch (pack) {
1837#ifdef WORDS_BIGENDIAN
1838# define UNALIGNED_ADD(n) case (n) + 1: \
1839 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1840#else
1841# define UNALIGNED_ADD(n) case (n) + 1: \
1842 d |= data_at(n) << CHAR_BIT*(n)
1843#endif
1844 UNALIGNED_ADD_ALL;
1845#undef UNALIGNED_ADD
1846 }
1847#ifdef WORDS_BIGENDIAN
1848 t = (t << sr) | (d >> sl);
1849#else
1850 t = (t >> sr) | (d << sl);
1851#endif
1852
1853 if (len < (size_t)align) goto skip_tail;
1854# define SKIP_TAIL 1
1855 h = murmur_step(h, t);
1856 data += pack;
1857 len -= pack;
1858 }
1859 else
1860#endif
1861#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1862#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1863#else
1864#define aligned_data data
1865#endif
1866 {
1867 do {
1868 h = murmur_step(h, *(st_index_t *)aligned_data);
1869 data += sizeof(st_index_t);
1870 len -= sizeof(st_index_t);
1871 } while (len >= sizeof(st_index_t));
1872 }
1873 }
1874
1875 t = 0;
1876 switch (len) {
1877#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1878 /* in this case byteorder doesn't really matter */
1879#if SIZEOF_ST_INDEX_T > 4
1880 case 7: t |= data_at(6) << 48;
1881 case 6: t |= data_at(5) << 40;
1882 case 5: t |= data_at(4) << 32;
1883 case 4:
1884 t |= (st_index_t)*(uint32_t*)aligned_data;
1885 goto skip_tail;
1886# define SKIP_TAIL 1
1887#endif
1888 case 3: t |= data_at(2) << 16;
1889 case 2: t |= data_at(1) << 8;
1890 case 1: t |= data_at(0);
1891#else
1892#ifdef WORDS_BIGENDIAN
1893# define UNALIGNED_ADD(n) case (n) + 1: \
1894 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1895#else
1896# define UNALIGNED_ADD(n) case (n) + 1: \
1897 t |= data_at(n) << CHAR_BIT*(n)
1898#endif
1899 UNALIGNED_ADD_ALL;
1900#undef UNALIGNED_ADD
1901#endif
1902#ifdef SKIP_TAIL
1903 skip_tail:
1904#endif
1905 h ^= t; h -= ROTL(t, 7);
1906 h *= C2;
1907 }
1908 h ^= l;
1909#undef aligned_data
1910
1911 return murmur_finish(h);
1912}
1913
1914st_index_t
1915st_hash_uint32(st_index_t h, uint32_t i)
1916{
1917 return murmur_step(h, i);
1918}
1919
1920NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1921st_index_t
1922st_hash_uint(st_index_t h, st_index_t i)
1923{
1924 i += h;
1925/* no matter if it is BigEndian or LittleEndian,
1926 * we hash just integers */
1927#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1928 h = murmur_step(h, i >> 8*8);
1929#endif
1930 h = murmur_step(h, i);
1931 return h;
1932}
1933
1934st_index_t
1935st_hash_end(st_index_t h)
1936{
1937 h = murmur_finish(h);
1938 return h;
1939}
1940
1941#undef st_hash_start
1942st_index_t
1943rb_st_hash_start(st_index_t h)
1944{
1945 return h;
1946}
1947
1948static st_index_t
1949strhash(st_data_t arg)
1950{
1951 register const char *string = (const char *)arg;
1952 return st_hash(string, strlen(string), FNV1_32A_INIT);
1953}
1954
1955int
1956st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1957{
1958 char c1, c2;
1959
1960 while (1) {
1961 c1 = *s1++;
1962 c2 = *s2++;
1963 if (c1 == '\0' || c2 == '\0') {
1964 if (c1 != '\0') return 1;
1965 if (c2 != '\0') return -1;
1966 return 0;
1967 }
1968 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1969 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1970 if (c1 != c2) {
1971 if (c1 > c2)
1972 return 1;
1973 else
1974 return -1;
1975 }
1976 }
1977}
1978
1979int
1980st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1981{
1982 char c1, c2;
1983 size_t i;
1984
1985 for (i = 0; i < n; i++) {
1986 c1 = *s1++;
1987 c2 = *s2++;
1988 if (c1 == '\0' || c2 == '\0') {
1989 if (c1 != '\0') return 1;
1990 if (c2 != '\0') return -1;
1991 return 0;
1992 }
1993 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1994 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1995 if (c1 != c2) {
1996 if (c1 > c2)
1997 return 1;
1998 else
1999 return -1;
2000 }
2001 }
2002 return 0;
2003}
2004
2005static int
2006st_strcmp(st_data_t lhs, st_data_t rhs)
2007{
2008 const char *s1 = (char *)lhs;
2009 const char *s2 = (char *)rhs;
2010 return strcmp(s1, s2);
2011}
2012
2013static int
2014st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2015{
2016 const char *s1 = (char *)lhs;
2017 const char *s2 = (char *)rhs;
2018 return st_locale_insensitive_strcasecmp(s1, s2);
2019}
2020
2021NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2022static st_index_t
2023strcasehash(st_data_t arg)
2024{
2025 register const char *string = (const char *)arg;
2026 register st_index_t hval = FNV1_32A_INIT;
2027
2028 /*
2029 * FNV-1a hash each octet in the buffer
2030 */
2031 while (*string) {
2032 unsigned int c = (unsigned char)*string++;
2033 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2034 hval ^= c;
2035
2036 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2037 hval *= FNV_32_PRIME;
2038 }
2039 return hval;
2040}
2041
2042int
2043st_numcmp(st_data_t x, st_data_t y)
2044{
2045 return x != y;
2046}
2047
2048st_index_t
2049st_numhash(st_data_t n)
2050{
2051 enum {s1 = 11, s2 = 3};
2052 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2053}
2054
2055/* Expand TAB to be suitable for holding SIZ entries in total.
2056 Pre-existing entries remain not deleted inside of TAB, but its bins
2057 are cleared to expect future reconstruction. See rehash below. */
2058static void
2059st_expand_table(st_table *tab, st_index_t siz)
2060{
2061 st_table *tmp;
2062 st_index_t n;
2063
2064 if (siz <= get_allocated_entries(tab))
2065 return; /* enough room already */
2066
2067 tmp = st_init_table_with_size(tab->type, siz);
2068 n = get_allocated_entries(tab);
2069 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2070 free(tab->entries);
2071 if (tab->bins != NULL)
2072 free(tab->bins);
2073 if (tmp->bins != NULL)
2074 free(tmp->bins);
2075 tab->entry_power = tmp->entry_power;
2076 tab->bin_power = tmp->bin_power;
2077 tab->size_ind = tmp->size_ind;
2078 tab->entries = tmp->entries;
2079 tab->bins = NULL;
2080 tab->rebuilds_num++;
2081 free(tmp);
2082}
2083
2084/* Rehash using linear search. Return TRUE if we found that the table
2085 was rebuilt. */
2086static int
2087st_rehash_linear(st_table *tab)
2088{
2089 int eq_p, rebuilt_p;
2090 st_index_t i, j;
2091 st_table_entry *p, *q;
2092 if (tab->bins) {
2093 free(tab->bins);
2094 tab->bins = NULL;
2095 }
2096 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2097 p = &tab->entries[i];
2098 if (DELETED_ENTRY_P(p))
2099 continue;
2100 for (j = i + 1; j < tab->entries_bound; j++) {
2101 q = &tab->entries[j];
2102 if (DELETED_ENTRY_P(q))
2103 continue;
2104 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2105 if (EXPECT(rebuilt_p, 0))
2106 return TRUE;
2107 if (eq_p) {
2108 *p = *q;
2109 MARK_ENTRY_DELETED(q);
2110 tab->num_entries--;
2111 update_range_for_deleted(tab, j);
2112 }
2113 }
2114 }
2115 return FALSE;
2116}
2117
2118/* Rehash using index. Return TRUE if we found that the table was
2119 rebuilt. */
2120static int
2121st_rehash_indexed(st_table *tab)
2122{
2123 int eq_p, rebuilt_p;
2124 st_index_t i;
2125 st_index_t const n = bins_size(tab);
2126 unsigned int const size_ind = get_size_ind(tab);
2127 st_index_t *bins = realloc(tab->bins, n);
2128 tab->bins = bins;
2129 initialize_bins(tab);
2130 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2131 st_table_entry *p = &tab->entries[i];
2132 st_index_t ind;
2133#ifdef QUADRATIC_PROBE
2134 st_index_t d = 1;
2135#else
2136 st_index_t peterb = p->hash;
2137#endif
2138
2139 if (DELETED_ENTRY_P(p))
2140 continue;
2141
2142 ind = hash_bin(p->hash, tab);
2143 for (;;) {
2144 st_index_t bin = get_bin(bins, size_ind, ind);
2145 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2146 /* ok, new room */
2147 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2148 break;
2149 }
2150 else {
2151 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2152 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2153 if (EXPECT(rebuilt_p, 0))
2154 return TRUE;
2155 if (eq_p) {
2156 /* duplicated key; delete it */
2157 q->record = p->record;
2158 MARK_ENTRY_DELETED(p);
2159 tab->num_entries--;
2160 update_range_for_deleted(tab, bin);
2161 break;
2162 }
2163 else {
2164 /* hash collision; skip it */
2165#ifdef QUADRATIC_PROBE
2166 ind = hash_bin(ind + d, tab);
2167 d++;
2168#else
2169 ind = secondary_hash(ind, tab, &peterb);
2170#endif
2171 }
2172 }
2173 }
2174 }
2175 return FALSE;
2176}
2177
2178/* Reconstruct TAB's bins according to TAB's entries. This function
2179 permits conflicting keys inside of entries. No errors are reported
2180 then. All but one of them are discarded silently. */
2181static void
2182st_rehash(st_table *tab)
2183{
2184 int rebuilt_p;
2185
2186 do {
2187 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2188 rebuilt_p = st_rehash_linear(tab);
2189 else
2190 rebuilt_p = st_rehash_indexed(tab);
2191 } while (rebuilt_p);
2192}
2193
2194#ifdef RUBY
2195static st_data_t
2196st_stringify(VALUE key)
2197{
2198 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2199 rb_hash_key_str(key) : key;
2200}
2201
2202static void
2203st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2204{
2205 st_data_t k = st_stringify(key);
2207 e.hash = do_hash(k, tab);
2208 e.key = k;
2209 e.record = val;
2210
2211 tab->entries[tab->entries_bound++] = e;
2212 tab->num_entries++;
2213 RB_OBJ_WRITTEN(hash, Qundef, k);
2214 RB_OBJ_WRITTEN(hash, Qundef, val);
2215}
2216
2217static void
2218st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2219{
2220 long i;
2221
2222 for (i = 0; i < argc; /* */) {
2223 st_data_t k = st_stringify(argv[i++]);
2224 st_data_t v = argv[i++];
2225 st_insert(tab, k, v);
2226 RB_OBJ_WRITTEN(hash, Qundef, k);
2227 RB_OBJ_WRITTEN(hash, Qundef, v);
2228 }
2229}
2230
2231static void
2232st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2233{
2234 long i;
2235
2236 /* push elems */
2237 for (i = 0; i < argc; /* */) {
2238 VALUE key = argv[i++];
2239 VALUE val = argv[i++];
2240 st_insert_single(tab, hash, key, val);
2241 }
2242
2243 /* reindex */
2244 st_rehash(tab);
2245}
2246
2247/* Mimics ruby's { foo => bar } syntax. This function is subpart
2248 of rb_hash_bulk_insert. */
2249void
2250rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2251{
2252 st_index_t n, size = argc / 2;
2253 st_table *tab = RHASH_ST_TABLE(hash);
2254
2255 tab = RHASH_TBL_RAW(hash);
2256 n = tab->entries_bound + size;
2257 st_expand_table(tab, n);
2258 if (UNLIKELY(tab->num_entries))
2259 st_insert_generic(tab, argc, argv, hash);
2260 else if (argc <= 2)
2261 st_insert_single(tab, hash, argv[0], argv[1]);
2262 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2263 st_insert_linear(tab, argc, argv, hash);
2264 else
2265 st_insert_generic(tab, argc, argv, hash);
2266}
2267
2268// to iterate iv_index_tbl
2269st_data_t
2270rb_st_nth_key(st_table *tab, st_index_t index)
2271{
2272 if (LIKELY(tab->entries_start == 0 &&
2273 tab->num_entries == tab->entries_bound &&
2274 index < tab->num_entries)) {
2275 return tab->entries[index].key;
2276 }
2277 else {
2278 rb_bug("unreachable");
2279 }
2280}
2281
2282void
2283rb_st_compact_table(st_table *tab)
2284{
2285 st_index_t num = tab->num_entries;
2286 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2287 /* Compaction: */
2288 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2289 rebuild_table_with(new_tab, tab);
2290 }
2291}
2292
2293#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:921
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3150
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1089
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:190
VALUE rb_cString
String class.
Definition string.c:79
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition rgengc.h:232
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:366
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:133
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40