suricata
util-hash-lookup3.c
Go to the documentation of this file.
1/*
2-------------------------------------------------------------------------------
3lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4
5These are functions for producing 32-bit hashes for hash table lookup.
6hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7are externally useful functions. Routines to test the hash are included
8if SELF_TEST is defined. You can use this free for any purpose. It's in
9the public domain. It has no warranty.
10
11You probably want to use hashlittle(). hashlittle() and hashbig()
12hash byte arrays. hashlittle() is is faster than hashbig() on
13little-endian machines. Intel and AMD are little-endian machines.
14On second thought, you probably want hashlittle2(), which is identical to
15hashlittle() except it returns two 32-bit hashes for the price of one.
16You could implement hashbig2() if you wanted but I haven't bothered here.
17
18If you want to find a hash of, say, exactly 7 integers, do
19 a = i1; b = i2; c = i3;
20 mix(a,b,c);
21 a += i4; b += i5; c += i6;
22 mix(a,b,c);
23 a += i7;
24 final(a,b,c);
25then use c as the hash value. If you have a variable length array of
264-byte integers to hash, use hashword(). If you have a byte array (like
27a character string), use hashlittle(). If you have several byte arrays, or
28a mix of things, see the comments above hashlittle().
29
30Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31then mix those integers. This is fast (you can do a lot more thorough
32mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34-------------------------------------------------------------------------------
35*/
36//#define SELF_TEST 1
37
38#include <stdio.h> /* defines printf for tests */
39#include <time.h> /* defines time_t for timings in the test */
40#include <stdint.h> /* defines uint32_t etc */
41#include <sys/param.h> /* attempt to define endianness */
42#ifdef linux
43# include <endian.h> /* attempt to define endianness */
44#endif
45#include "util-hash-lookup3.h"
46
47/*
48 * My best guess at if you are big-endian or little-endian. This may
49 * need adjustment.
50 */
51#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
52 __BYTE_ORDER == __LITTLE_ENDIAN) || \
53 (defined(i386) || defined(__i386__) || defined(__i486__) || \
54 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
55# define HASH_LITTLE_ENDIAN 1
56# define HASH_BIG_ENDIAN 0
57#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
58 __BYTE_ORDER == __BIG_ENDIAN) || \
59 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
60# define HASH_LITTLE_ENDIAN 0
61# define HASH_BIG_ENDIAN 1
62#else
63# define HASH_LITTLE_ENDIAN 0
64# define HASH_BIG_ENDIAN 0
65#endif
66
67#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
68
69/*
70-------------------------------------------------------------------------------
71mix -- mix 3 32-bit values reversibly.
72
73This is reversible, so any information in (a,b,c) before mix() is
74still in (a,b,c) after mix().
75
76If four pairs of (a,b,c) inputs are run through mix(), or through
77mix() in reverse, there are at least 32 bits of the output that
78are sometimes the same for one pair and different for another pair.
79This was tested for:
80* pairs that differed by one bit, by two bits, in any combination
81 of top bits of (a,b,c), or in any combination of bottom bits of
82 (a,b,c).
83* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
84 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
85 is commonly produced by subtraction) look like a single 1-bit
86 difference.
87* the base values were pseudorandom, all zero but one bit set, or
88 all zero plus a counter that starts at zero.
89
90Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
91satisfy this are
92 4 6 8 16 19 4
93 9 15 3 18 27 15
94 14 9 3 7 17 3
95Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
96for "differ" defined as + with a one-bit base and a two-bit delta. I
97used http://burtleburtle.net/bob/hash/avalanche.html to choose
98the operations, constants, and arrangements of the variables.
99
100This does not achieve avalanche. There are input bits of (a,b,c)
101that fail to affect some output bits of (a,b,c), especially of a. The
102most thoroughly mixed value is c, but it doesn't really even achieve
103avalanche in c.
104
105This allows some parallelism. Read-after-writes are good at doubling
106the number of bits affected, so the goal of mixing pulls in the opposite
107direction as the goal of parallelism. I did what I could. Rotates
108seem to cost as much as shifts on every machine I could lay my hands
109on, and rotates are much kinder to the top and bottom bits, so I used
110rotates.
111-------------------------------------------------------------------------------
112*/
113#define mix(a,b,c) \
114{ \
115 a -= c; a ^= rot(c, 4); c += b; \
116 b -= a; b ^= rot(a, 6); a += c; \
117 c -= b; c ^= rot(b, 8); b += a; \
118 a -= c; a ^= rot(c,16); c += b; \
119 b -= a; b ^= rot(a,19); a += c; \
120 c -= b; c ^= rot(b, 4); b += a; \
121}
122
123/*
124-------------------------------------------------------------------------------
125final -- final mixing of 3 32-bit values (a,b,c) into c
126
127Pairs of (a,b,c) values differing in only a few bits will usually
128produce values of c that look totally different. This was tested for
129* pairs that differed by one bit, by two bits, in any combination
130 of top bits of (a,b,c), or in any combination of bottom bits of
131 (a,b,c).
132* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
133 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
134 is commonly produced by subtraction) look like a single 1-bit
135 difference.
136* the base values were pseudorandom, all zero but one bit set, or
137 all zero plus a counter that starts at zero.
138
139These constants passed:
140 14 11 25 16 4 14 24
141 12 14 25 16 4 14 24
142and these came close:
143 4 8 15 26 3 22 24
144 10 8 15 26 3 22 24
145 11 8 15 26 3 22 24
146-------------------------------------------------------------------------------
147*/
148#define final(a,b,c) \
149{ \
150 c ^= b; c -= rot(b,14); \
151 a ^= c; a -= rot(c,11); \
152 b ^= a; b -= rot(a,25); \
153 c ^= b; c -= rot(b,16); \
154 a ^= c; a -= rot(c,4); \
155 b ^= a; b -= rot(a,14); \
156 c ^= b; c -= rot(b,24); \
157}
158
159/*
160--------------------------------------------------------------------
161 This works on all machines. To be useful, it requires
162 -- that the key be an array of uint32_t's, and
163 -- that the length be the number of uint32_t's in the key
164
165 The function hashword() is identical to hashlittle() on little-endian
166 machines, and identical to hashbig() on big-endian machines,
167 except that the length has to be measured in uint32_ts rather than in
168 bytes. hashlittle() is more complicated than hashword() only because
169 hashlittle() has to dance around fitting the key bytes into registers.
170--------------------------------------------------------------------
171*/
172uint32_t hashword(
173const uint32_t *k, /* the key, an array of uint32_t values */
174size_t length, /* the length of the key, in uint32_ts */
175uint32_t initval) /* the previous hash, or an arbitrary value */
176{
177 uint32_t a,b,c;
178
179 /* Set up the internal state */
180 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
181
182 /*------------------------------------------------- handle most of the key */
183 while (length > 3)
184 {
185 a += k[0];
186 b += k[1];
187 c += k[2];
188 mix(a,b,c);
189 length -= 3;
190 k += 3;
191 }
192
193 /*------------------------------------------- handle the last 3 uint32_t's */
194 switch(length) /* all the case statements fall through */
195 {
196 case 3 : c+=k[2]; /* fall through */
197 case 2 : b+=k[1]; /* fall through */
198 case 1 : a+=k[0];
199 final(a,b,c); /* fall through */
200 case 0: /* case 0: nothing left to add */
201 break;
202 }
203 /*------------------------------------------------------ report the result */
204 return c;
205}
206
207
208/*
209--------------------------------------------------------------------
210hashword2() -- same as hashword(), but take two seeds and return two
21132-bit values. pc and pb must both be nonnull, and *pc and *pb must
212both be initialized with seeds. If you pass in (*pb)==0, the output
213(*pc) will be the same as the return value from hashword().
214--------------------------------------------------------------------
215*/
217const uint32_t *k, /* the key, an array of uint32_t values */
218size_t length, /* the length of the key, in uint32_ts */
219uint32_t *pc, /* IN: seed OUT: primary hash value */
220uint32_t *pb) /* IN: more seed OUT: secondary hash value */
221{
222 uint32_t a,b,c;
223
224 /* Set up the internal state */
225 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
226 c += *pb;
227
228 /*------------------------------------------------- handle most of the key */
229 while (length > 3)
230 {
231 a += k[0];
232 b += k[1];
233 c += k[2];
234 mix(a,b,c);
235 length -= 3;
236 k += 3;
237 }
238
239 /*------------------------------------------- handle the last 3 uint32_t's */
240 switch(length) /* all the case statements fall through */
241 {
242 case 3 : c+=k[2]; /* fall through */
243 case 2 : b+=k[1]; /* fall through */
244 case 1 : a+=k[0];
245 final(a,b,c); /* fall through */
246 case 0: /* case 0: nothing left to add */
247 break;
248 }
249 /*------------------------------------------------------ report the result */
250 *pc=c; *pb=b;
251}
252
253
254/*
255-------------------------------------------------------------------------------
256hashlittle() -- hash a variable-length key into a 32-bit value
257 k : the key (the unaligned variable-length array of bytes)
258 length : the length of the key, counting by bytes
259 initval : can be any 4-byte value
260Returns a 32-bit value. Every bit of the key affects every bit of
261the return value. Two keys differing by one or two bits will have
262totally different hash values.
263
264The best hash table sizes are powers of 2. There is no need to do
265mod a prime (mod is sooo slow!). If you need less than 32 bits,
266use a bitmask. For example, if you need only 10 bits, do
267 h = (h & hashmask(10));
268In which case, the hash table should have hashsize(10) elements.
269
270If you are hashing n strings (uint8_t **)k, do it like this:
271 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
272
273By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
274code any way you wish, private, educational, or commercial. It's free.
275
276Use for hash table lookup, or anything where one collision in 2^^32 is
277acceptable. Do NOT use for cryptographic purposes.
278-------------------------------------------------------------------------------
279*/
280
281uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
282{
283 uint32_t a,b,c; /* internal state */
284 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
285
286 /* Set up the internal state */
287 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
288
289 u.ptr = key;
290 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
291 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
292
293 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
294 while (length > 12)
295 {
296 a += k[0];
297 b += k[1];
298 c += k[2];
299 mix(a,b,c);
300 length -= 12;
301 k += 3;
302 }
303
304 /*----------------------------- handle the last (probably partial) block */
305 /*
306 * "k[2]&0xffffff" actually reads beyond the end of the string, but
307 * then masks off the part it's not allowed to read. Because the
308 * string is aligned, the masked-off tail is in the same word as the
309 * rest of the string. Every machine with memory protection I've seen
310 * does it on word boundaries, so is OK with this. But VALGRIND will
311 * still catch it and complain. The masking trick does make the hash
312 * noticeably faster for short strings (like English words).
313 */
314#ifndef VALGRIND
315
316 switch(length)
317 {
318 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
319 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
320 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
321 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
322 case 8 : b+=k[1]; a+=k[0]; break;
323 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
324 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
325 case 5 : b+=k[1]&0xff; a+=k[0]; break;
326 case 4 : a+=k[0]; break;
327 case 3 : a+=k[0]&0xffffff; break;
328 case 2 : a+=k[0]&0xffff; break;
329 case 1 : a+=k[0]&0xff; break;
330 case 0 : return c; /* zero length strings require no mixing */
331 }
332
333#else /* make valgrind happy */
334
335 const uint8_t *k8 = (const uint8_t *)k;
336
337 switch(length)
338 {
339 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
340 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
341 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
342 case 9 : c+=k8[8]; /* fall through */
343 case 8 : b+=k[1]; a+=k[0]; break;
344 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
345 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
346 case 5 : b+=k8[4]; /* fall through */
347 case 4 : a+=k[0]; break;
348 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
349 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
350 case 1 : a+=k8[0]; break;
351 case 0 : return c;
352 }
353
354#endif /* !valgrind */
355
356 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
357 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
358 const uint8_t *k8;
359
360 /*--------------- all but last block: aligned reads and different mixing */
361 while (length > 12)
362 {
363 a += k[0] + (((uint32_t)k[1])<<16);
364 b += k[2] + (((uint32_t)k[3])<<16);
365 c += k[4] + (((uint32_t)k[5])<<16);
366 mix(a,b,c);
367 length -= 12;
368 k += 6;
369 }
370
371 /*----------------------------- handle the last (probably partial) block */
372 k8 = (const uint8_t *)k;
373 switch(length)
374 {
375 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
376 b+=k[2]+(((uint32_t)k[3])<<16);
377 a+=k[0]+(((uint32_t)k[1])<<16);
378 break;
379 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
380 case 10: c+=k[4];
381 b+=k[2]+(((uint32_t)k[3])<<16);
382 a+=k[0]+(((uint32_t)k[1])<<16);
383 break;
384 case 9 : c+=k8[8]; /* fall through */
385 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
386 a+=k[0]+(((uint32_t)k[1])<<16);
387 break;
388 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
389 case 6 : b+=k[2];
390 a+=k[0]+(((uint32_t)k[1])<<16);
391 break;
392 case 5 : b+=k8[4]; /* fall through */
393 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
394 break;
395 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
396 case 2 : a+=k[0];
397 break;
398 case 1 : a+=k8[0];
399 break;
400 case 0 : return c; /* zero length requires no mixing */
401 }
402
403 } else { /* need to read the key one byte at a time */
404 const uint8_t *k = (const uint8_t *)key;
405
406 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
407 while (length > 12)
408 {
409 a += k[0];
410 a += ((uint32_t)k[1])<<8;
411 a += ((uint32_t)k[2])<<16;
412 a += ((uint32_t)k[3])<<24;
413 b += k[4];
414 b += ((uint32_t)k[5])<<8;
415 b += ((uint32_t)k[6])<<16;
416 b += ((uint32_t)k[7])<<24;
417 c += k[8];
418 c += ((uint32_t)k[9])<<8;
419 c += ((uint32_t)k[10])<<16;
420 c += ((uint32_t)k[11])<<24;
421 mix(a,b,c);
422 length -= 12;
423 k += 12;
424 }
425
426 /*-------------------------------- last block: affect all 32 bits of (c) */
427 switch(length) /* all the case statements fall through */
428 {
429 case 12: c+=((uint32_t)k[11])<<24; /* fall through */
430 case 11: c+=((uint32_t)k[10])<<16; /* fall through */
431 case 10: c+=((uint32_t)k[9])<<8; /* fall through */
432 case 9 : c+=k[8]; /* fall through */
433 case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
434 case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
435 case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
436 case 5 : b+=k[4]; /* fall through */
437 case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
438 case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
439 case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
440 case 1 : a+=k[0];
441 break;
442 case 0 : return c;
443 }
444 }
445
446 final(a,b,c);
447 return c;
448}
449
450
451/*
452-------------------------------------------------------------------------------
453hashlittle_safe() -- hash a variable-length key into a 32-bit value
454 k : the key (the unaligned variable-length array of bytes)
455 length : the length of the key, counting by bytes
456 initval : can be any 4-byte value
457Returns a 32-bit value. Every bit of the key affects every bit of
458the return value. Two keys differing by one or two bits will have
459totally different hash values.
460
461The best hash table sizes are powers of 2. There is no need to do
462mod a prime (mod is sooo slow!). If you need less than 32 bits,
463use a bitmask. For example, if you need only 10 bits, do
464 h = (h & hashmask(10));
465In which case, the hash table should have hashsize(10) elements.
466
467If you are hashing n strings (uint8_t **)k, do it like this:
468 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
469
470By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
471code any way you wish, private, educational, or commercial. It's free.
472
473This version has been modified from hashlittle() above to avoid accesses beyond
474the last byte of the key, which causes warnings from Valgrind and Address
475Sanitizer.
476
477Use for hash table lookup, or anything where one collision in 2^^32 is
478acceptable. Do NOT use for cryptographic purposes.
479-------------------------------------------------------------------------------
480*/
481
482uint32_t hashlittle_safe(const void *key, size_t length, uint32_t initval)
483{
484 uint32_t a,b,c; /* internal state */
485 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
486
487 /* Set up the internal state */
488 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
489
490 u.ptr = key;
491 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
492 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
493
494 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
495 while (length > 12)
496 {
497 a += k[0];
498 b += k[1];
499 c += k[2];
500 mix(a,b,c);
501 length -= 12;
502 k += 3;
503 }
504
505 /*----------------------------- handle the last (probably partial) block */
506 /*
507 * Note that unlike hashlittle() above, we use the "safe" version of this
508 * block that is #ifdef VALGRIND above, in order to avoid warnings from
509 * Valgrind or Address Sanitizer.
510 */
511
512 const uint8_t *k8 = (const uint8_t *)k;
513
514 switch(length)
515 {
516 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
517 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
518 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
519 case 9 : c+=k8[8]; /* fall through */
520 case 8 : b+=k[1]; a+=k[0]; break;
521 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
522 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
523 case 5 : b+=k8[4]; /* fall through */
524 case 4 : a+=k[0]; break;
525 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
526 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
527 case 1 : a+=k8[0]; break;
528 case 0 : return c;
529 }
530 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
531 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
532 const uint8_t *k8;
533
534 /*--------------- all but last block: aligned reads and different mixing */
535 while (length > 12)
536 {
537 a += k[0] + (((uint32_t)k[1])<<16);
538 b += k[2] + (((uint32_t)k[3])<<16);
539 c += k[4] + (((uint32_t)k[5])<<16);
540 mix(a,b,c);
541 length -= 12;
542 k += 6;
543 }
544
545 /*----------------------------- handle the last (probably partial) block */
546 k8 = (const uint8_t *)k;
547 switch(length)
548 {
549 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
550 b+=k[2]+(((uint32_t)k[3])<<16);
551 a+=k[0]+(((uint32_t)k[1])<<16);
552 break;
553 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
554 case 10: c+=k[4];
555 b+=k[2]+(((uint32_t)k[3])<<16);
556 a+=k[0]+(((uint32_t)k[1])<<16);
557 break;
558 case 9 : c+=k8[8]; /* fall through */
559 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
560 a+=k[0]+(((uint32_t)k[1])<<16);
561 break;
562 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
563 case 6 : b+=k[2];
564 a+=k[0]+(((uint32_t)k[1])<<16);
565 break;
566 case 5 : b+=k8[4]; /* fall through */
567 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
568 break;
569 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
570 case 2 : a+=k[0];
571 break;
572 case 1 : a+=k8[0];
573 break;
574 case 0 : return c; /* zero length requires no mixing */
575 }
576
577 } else { /* need to read the key one byte at a time */
578 const uint8_t *k = (const uint8_t *)key;
579
580 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
581 while (length > 12)
582 {
583 a += k[0];
584 a += ((uint32_t)k[1])<<8;
585 a += ((uint32_t)k[2])<<16;
586 a += ((uint32_t)k[3])<<24;
587 b += k[4];
588 b += ((uint32_t)k[5])<<8;
589 b += ((uint32_t)k[6])<<16;
590 b += ((uint32_t)k[7])<<24;
591 c += k[8];
592 c += ((uint32_t)k[9])<<8;
593 c += ((uint32_t)k[10])<<16;
594 c += ((uint32_t)k[11])<<24;
595 mix(a,b,c);
596 length -= 12;
597 k += 12;
598 }
599
600 /*-------------------------------- last block: affect all 32 bits of (c) */
601 switch(length) /* all the case statements fall through */
602 {
603 case 12: c+=((uint32_t)k[11])<<24; /* fall through */
604 case 11: c+=((uint32_t)k[10])<<16; /* fall through */
605 case 10: c+=((uint32_t)k[9])<<8; /* fall through */
606 case 9 : c+=k[8]; /* fall through */
607 case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
608 case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
609 case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
610 case 5 : b+=k[4]; /* fall through */
611 case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
612 case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
613 case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
614 case 1 : a+=k[0];
615 break;
616 case 0 : return c;
617 }
618 }
619
620 final(a,b,c);
621 return c;
622}
623
624
625/*
626 * hashlittle2: return 2 32-bit hash values
627 *
628 * This is identical to hashlittle(), except it returns two 32-bit hash
629 * values instead of just one. This is good enough for hash table
630 * lookup with 2^^64 buckets, or if you want a second hash if you're not
631 * happy with the first, or if you want a probably-unique 64-bit ID for
632 * the key. *pc is better mixed than *pb, so use *pc first. If you want
633 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
634 */
636 const void *key, /* the key to hash */
637 size_t length, /* length of the key */
638 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
639 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
640{
641 uint32_t a,b,c; /* internal state */
642 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
643
644 /* Set up the internal state */
645 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
646 c += *pb;
647
648 u.ptr = key;
649 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
650 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
651
652 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
653 while (length > 12)
654 {
655 a += k[0];
656 b += k[1];
657 c += k[2];
658 mix(a,b,c);
659 length -= 12;
660 k += 3;
661 }
662
663 /*----------------------------- handle the last (probably partial) block */
664 /*
665 * "k[2]&0xffffff" actually reads beyond the end of the string, but
666 * then masks off the part it's not allowed to read. Because the
667 * string is aligned, the masked-off tail is in the same word as the
668 * rest of the string. Every machine with memory protection I've seen
669 * does it on word boundaries, so is OK with this. But VALGRIND will
670 * still catch it and complain. The masking trick does make the hash
671 * noticeably faster for short strings (like English words).
672 */
673#ifndef VALGRIND
674
675 switch(length)
676 {
677 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
678 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
679 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
680 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
681 case 8 : b+=k[1]; a+=k[0]; break;
682 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
683 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
684 case 5 : b+=k[1]&0xff; a+=k[0]; break;
685 case 4 : a+=k[0]; break;
686 case 3 : a+=k[0]&0xffffff; break;
687 case 2 : a+=k[0]&0xffff; break;
688 case 1 : a+=k[0]&0xff; break;
689 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
690 }
691
692#else /* make valgrind happy */
693
694 const uint8_t *k8 = (const uint8_t *)k;
695 switch(length)
696 {
697 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
698 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
699 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
700 case 9 : c+=k8[8]; /* fall through */
701 case 8 : b+=k[1]; a+=k[0]; break;
702 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
703 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
704 case 5 : b+=k8[4]; /* fall through */
705 case 4 : a+=k[0]; break;
706 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
707 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
708 case 1 : a+=k8[0]; break;
709 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
710 }
711
712#endif /* !valgrind */
713
714 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
715 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
716 const uint8_t *k8;
717
718 /*--------------- all but last block: aligned reads and different mixing */
719 while (length > 12)
720 {
721 a += k[0] + (((uint32_t)k[1])<<16);
722 b += k[2] + (((uint32_t)k[3])<<16);
723 c += k[4] + (((uint32_t)k[5])<<16);
724 mix(a,b,c);
725 length -= 12;
726 k += 6;
727 }
728
729 /*----------------------------- handle the last (probably partial) block */
730 k8 = (const uint8_t *)k;
731 switch(length)
732 {
733 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
734 b+=k[2]+(((uint32_t)k[3])<<16);
735 a+=k[0]+(((uint32_t)k[1])<<16);
736 break;
737 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
738 case 10: c+=k[4];
739 b+=k[2]+(((uint32_t)k[3])<<16);
740 a+=k[0]+(((uint32_t)k[1])<<16);
741 break;
742 case 9 : c+=k8[8]; /* fall through */
743 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
744 a+=k[0]+(((uint32_t)k[1])<<16);
745 break;
746 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
747 case 6 : b+=k[2];
748 a+=k[0]+(((uint32_t)k[1])<<16);
749 break;
750 case 5 : b+=k8[4]; /* fall through */
751 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
752 break;
753 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
754 case 2 : a+=k[0];
755 break;
756 case 1 : a+=k8[0];
757 break;
758 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
759 }
760
761 } else { /* need to read the key one byte at a time */
762 const uint8_t *k = (const uint8_t *)key;
763
764 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
765 while (length > 12)
766 {
767 a += k[0];
768 a += ((uint32_t)k[1])<<8;
769 a += ((uint32_t)k[2])<<16;
770 a += ((uint32_t)k[3])<<24;
771 b += k[4];
772 b += ((uint32_t)k[5])<<8;
773 b += ((uint32_t)k[6])<<16;
774 b += ((uint32_t)k[7])<<24;
775 c += k[8];
776 c += ((uint32_t)k[9])<<8;
777 c += ((uint32_t)k[10])<<16;
778 c += ((uint32_t)k[11])<<24;
779 mix(a,b,c);
780 length -= 12;
781 k += 12;
782 }
783
784 /*-------------------------------- last block: affect all 32 bits of (c) */
785 switch(length) /* all the case statements fall through */
786 {
787 case 12: c+=((uint32_t)k[11])<<24; /* fall through */
788 case 11: c+=((uint32_t)k[10])<<16; /* fall through */
789 case 10: c+=((uint32_t)k[9])<<8; /* fall through */
790 case 9 : c+=k[8]; /* fall through */
791 case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
792 case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
793 case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
794 case 5 : b+=k[4]; /* fall through */
795 case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
796 case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
797 case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
798 case 1 : a+=k[0];
799 break;
800 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
801 }
802 }
803
804 final(a,b,c);
805 *pc=c; *pb=b;
806}
807
808/*
809 * hashlittle2: return 2 32-bit hash values
810 *
811 * This is identical to hashlittle(), except it returns two 32-bit hash
812 * values instead of just one. This is good enough for hash table
813 * lookup with 2^^64 buckets, or if you want a second hash if you're not
814 * happy with the first, or if you want a probably-unique 64-bit ID for
815 * the key. *pc is better mixed than *pb, so use *pc first. If you want
816 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
817 */
818void hashlittle2_safe(const void *key, /* the key to hash */
819 size_t length, /* length of the key */
820 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
821 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
822{
823 uint32_t a, b, c; /* internal state */
824 union {
825 const void *ptr;
826 size_t i;
827 } u; /* needed for Mac Powerbook G4 */
828
829 /* Set up the internal state */
830 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
831 c += *pb;
832
833 u.ptr = key;
834 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
835 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
836
837 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
838 while (length > 12) {
839 a += k[0];
840 b += k[1];
841 c += k[2];
842 mix(a, b, c);
843 length -= 12;
844 k += 3;
845 }
846
847 /*----------------------------- handle the last (probably partial) block */
848 /*
849 * Note that unlike hashlittle() above, we use the "safe" version of this
850 * block that is #ifdef VALGRIND above, in order to avoid warnings from
851 * Valgrind or Address Sanitizer.
852 */
853 const uint8_t *k8 = (const uint8_t *)k;
854 switch (length) {
855 case 12:
856 c += k[2];
857 b += k[1];
858 a += k[0];
859 break;
860 case 11:
861 c += ((uint32_t)k8[10]) << 16; /* fall through */
862 case 10:
863 c += ((uint32_t)k8[9]) << 8; /* fall through */
864 case 9:
865 c += k8[8]; /* fall through */
866 case 8:
867 b += k[1];
868 a += k[0];
869 break;
870 case 7:
871 b += ((uint32_t)k8[6]) << 16; /* fall through */
872 case 6:
873 b += ((uint32_t)k8[5]) << 8; /* fall through */
874 case 5:
875 b += k8[4]; /* fall through */
876 case 4:
877 a += k[0];
878 break;
879 case 3:
880 a += ((uint32_t)k8[2]) << 16; /* fall through */
881 case 2:
882 a += ((uint32_t)k8[1]) << 8; /* fall through */
883 case 1:
884 a += k8[0];
885 break;
886 case 0:
887 *pc = c;
888 *pb = b;
889 return; /* zero length strings require no mixing */
890 }
891
892 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
893 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
894 const uint8_t *k8;
895
896 /*--------------- all but last block: aligned reads and different mixing */
897 while (length > 12) {
898 a += k[0] + (((uint32_t)k[1]) << 16);
899 b += k[2] + (((uint32_t)k[3]) << 16);
900 c += k[4] + (((uint32_t)k[5]) << 16);
901 mix(a, b, c);
902 length -= 12;
903 k += 6;
904 }
905
906 /*----------------------------- handle the last (probably partial) block */
907 k8 = (const uint8_t *)k;
908 switch (length) {
909 case 12:
910 c += k[4] + (((uint32_t)k[5]) << 16);
911 b += k[2] + (((uint32_t)k[3]) << 16);
912 a += k[0] + (((uint32_t)k[1]) << 16);
913 break;
914 case 11:
915 c += ((uint32_t)k8[10]) << 16; /* fall through */
916 case 10:
917 c += k[4];
918 b += k[2] + (((uint32_t)k[3]) << 16);
919 a += k[0] + (((uint32_t)k[1]) << 16);
920 break;
921 case 9:
922 c += k8[8]; /* fall through */
923 case 8:
924 b += k[2] + (((uint32_t)k[3]) << 16);
925 a += k[0] + (((uint32_t)k[1]) << 16);
926 break;
927 case 7:
928 b += ((uint32_t)k8[6]) << 16; /* fall through */
929 case 6:
930 b += k[2];
931 a += k[0] + (((uint32_t)k[1]) << 16);
932 break;
933 case 5:
934 b += k8[4]; /* fall through */
935 case 4:
936 a += k[0] + (((uint32_t)k[1]) << 16);
937 break;
938 case 3:
939 a += ((uint32_t)k8[2]) << 16; /* fall through */
940 case 2:
941 a += k[0];
942 break;
943 case 1:
944 a += k8[0];
945 break;
946 case 0:
947 *pc = c;
948 *pb = b;
949 return; /* zero length strings require no mixing */
950 }
951
952 } else { /* need to read the key one byte at a time */
953 const uint8_t *k = (const uint8_t *)key;
954
955 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
956 while (length > 12) {
957 a += k[0];
958 a += ((uint32_t)k[1]) << 8;
959 a += ((uint32_t)k[2]) << 16;
960 a += ((uint32_t)k[3]) << 24;
961 b += k[4];
962 b += ((uint32_t)k[5]) << 8;
963 b += ((uint32_t)k[6]) << 16;
964 b += ((uint32_t)k[7]) << 24;
965 c += k[8];
966 c += ((uint32_t)k[9]) << 8;
967 c += ((uint32_t)k[10]) << 16;
968 c += ((uint32_t)k[11]) << 24;
969 mix(a, b, c);
970 length -= 12;
971 k += 12;
972 }
973
974 /*-------------------------------- last block: affect all 32 bits of (c) */
975 switch (length) /* all the case statements fall through */
976 {
977 case 12:
978 c += ((uint32_t)k[11]) << 24; /* fall through */
979 case 11:
980 c += ((uint32_t)k[10]) << 16; /* fall through */
981 case 10:
982 c += ((uint32_t)k[9]) << 8; /* fall through */
983 case 9:
984 c += k[8]; /* fall through */
985 case 8:
986 b += ((uint32_t)k[7]) << 24; /* fall through */
987 case 7:
988 b += ((uint32_t)k[6]) << 16; /* fall through */
989 case 6:
990 b += ((uint32_t)k[5]) << 8; /* fall through */
991 case 5:
992 b += k[4]; /* fall through */
993 case 4:
994 a += ((uint32_t)k[3]) << 24; /* fall through */
995 case 3:
996 a += ((uint32_t)k[2]) << 16; /* fall through */
997 case 2:
998 a += ((uint32_t)k[1]) << 8; /* fall through */
999 case 1:
1000 a += k[0];
1001 break;
1002 case 0:
1003 *pc = c;
1004 *pb = b;
1005 return; /* zero length strings require no mixing */
1006 }
1007 }
1008
1009 final(a, b, c);
1010 *pc = c;
1011 *pb = b;
1012}
1013
1014/*
1015 * hashbig():
1016 * This is the same as hashword() on big-endian machines. It is different
1017 * from hashlittle() on all machines. hashbig() takes advantage of
1018 * big-endian byte ordering.
1019 */
1020uint32_t hashbig( const void *key, size_t length, uint32_t initval)
1021{
1022 uint32_t a,b,c;
1023 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
1024
1025 /* Set up the internal state */
1026 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
1027
1028 u.ptr = key;
1029 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
1030 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
1031
1032 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
1033 while (length > 12)
1034 {
1035 a += k[0];
1036 b += k[1];
1037 c += k[2];
1038 mix(a,b,c);
1039 length -= 12;
1040 k += 3;
1041 }
1042
1043 /*----------------------------- handle the last (probably partial) block */
1044 /*
1045 * "k[2]<<8" actually reads beyond the end of the string, but
1046 * then shifts out the part it's not allowed to read. Because the
1047 * string is aligned, the illegal read is in the same word as the
1048 * rest of the string. Every machine with memory protection I've seen
1049 * does it on word boundaries, so is OK with this. But VALGRIND will
1050 * still catch it and complain. The masking trick does make the hash
1051 * noticeably faster for short strings (like English words).
1052 */
1053#ifndef VALGRIND
1054
1055 switch(length)
1056 {
1057 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
1058 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
1059 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
1060 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
1061 case 8 : b+=k[1]; a+=k[0]; break;
1062 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
1063 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
1064 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
1065 case 4 : a+=k[0]; break;
1066 case 3 : a+=k[0]&0xffffff00; break;
1067 case 2 : a+=k[0]&0xffff0000; break;
1068 case 1 : a+=k[0]&0xff000000; break;
1069 case 0 : return c; /* zero length strings require no mixing */
1070 }
1071
1072#else /* make valgrind happy */
1073
1074 const uint8_t *k8 = (const uint8_t *)k;
1075 switch(length) /* all the case statements fall through */
1076 {
1077 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
1078 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
1079 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
1080 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
1081 case 8 : b+=k[1]; a+=k[0]; break;
1082 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
1083 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
1084 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
1085 case 4 : a+=k[0]; break;
1086 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
1087 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
1088 case 1 : a+=((uint32_t)k8[0])<<24; break;
1089 case 0 : return c;
1090 }
1091
1092#endif /* !VALGRIND */
1093
1094 } else { /* need to read the key one byte at a time */
1095 const uint8_t *k = (const uint8_t *)key;
1096
1097 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
1098 while (length > 12)
1099 {
1100 a += ((uint32_t)k[0])<<24;
1101 a += ((uint32_t)k[1])<<16;
1102 a += ((uint32_t)k[2])<<8;
1103 a += ((uint32_t)k[3]);
1104 b += ((uint32_t)k[4])<<24;
1105 b += ((uint32_t)k[5])<<16;
1106 b += ((uint32_t)k[6])<<8;
1107 b += ((uint32_t)k[7]);
1108 c += ((uint32_t)k[8])<<24;
1109 c += ((uint32_t)k[9])<<16;
1110 c += ((uint32_t)k[10])<<8;
1111 c += ((uint32_t)k[11]);
1112 mix(a,b,c);
1113 length -= 12;
1114 k += 12;
1115 }
1116
1117 /*-------------------------------- last block: affect all 32 bits of (c) */
1118 switch(length) /* all the case statements fall through */
1119 {
1120 case 12: c+=k[11]; /* fall through */
1121 case 11: c+=((uint32_t)k[10])<<8; /* fall through */
1122 case 10: c+=((uint32_t)k[9])<<16; /* fall through */
1123 case 9 : c+=((uint32_t)k[8])<<24; /* fall through */
1124 case 8 : b+=k[7]; /* fall through */
1125 case 7 : b+=((uint32_t)k[6])<<8; /* fall through */
1126 case 6 : b+=((uint32_t)k[5])<<16; /* fall through */
1127 case 5 : b+=((uint32_t)k[4])<<24; /* fall through */
1128 case 4 : a+=k[3]; /* fall through */
1129 case 3 : a+=((uint32_t)k[2])<<8; /* fall through */
1130 case 2 : a+=((uint32_t)k[1])<<16; /* fall through */
1131 case 1 : a+=((uint32_t)k[0])<<24;
1132 break;
1133 case 0 : return c;
1134 }
1135 }
1136
1137 final(a,b,c);
1138 return c;
1139}
1140
1141
1142#ifdef SELF_TEST
1143
1144/* used for timings */
1145void driver1(void)
1146{
1147 uint8_t buf[256];
1148 uint32_t i;
1149 uint32_t h=0;
1150 time_t a,z;
1151
1152 time(&a);
1153 for (i=0; i<256; ++i) buf[i] = 'x';
1154 for (i=0; i<1; ++i)
1155 {
1156 h = hashlittle(&buf[0],1,h);
1157 }
1158 time(&z);
1159 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
1160}
1161
1162/* check that every input bit changes every output bit half the time */
1163#define HASHSTATE 1
1164#define HASHLEN 1
1165#define MAXPAIR 60
1166#define MAXLEN 70
1167void driver2(void)
1168{
1169 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
1170 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
1171 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
1172 uint32_t x[HASHSTATE],y[HASHSTATE];
1173 uint32_t hlen;
1174
1175 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
1176 for (hlen=0; hlen < MAXLEN; ++hlen)
1177 {
1178 z=0;
1179 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
1180 {
1181 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
1182 {
1183 for (m = 1; m < 8; ++m) /*------------ for several possible initvals, */
1184 {
1185 for (l = 0; l < HASHSTATE; ++l)
1186 e[l] = f[l] = g[l] = h[l] = x[l] = y[l] = ~((uint32_t)0);
1187
1188 /*---- check that every output bit is affected by that input bit */
1189 for (k = 0; k < MAXPAIR; k += 2) {
1190 uint32_t finished = 1;
1191 /* keys have one bit different */
1192 for (l = 0; l < hlen + 1; ++l) {
1193 a[l] = b[l] = (uint8_t)0;
1194 }
1195 /* have a and b be two keys differing in only one bit */
1196 a[i] ^= (k << j);
1197 a[i] ^= (k >> (8 - j));
1198 c[0] = hashlittle(a, hlen, m);
1199 b[i] ^= ((k + 1) << j);
1200 b[i] ^= ((k + 1) >> (8 - j));
1201 d[0] = hashlittle(b, hlen, m);
1202 /* check every bit is 1, 0, set, and not set at least once */
1203 for (l = 0; l < HASHSTATE; ++l) {
1204 e[l] &= (c[l] ^ d[l]);
1205 f[l] &= ~(c[l] ^ d[l]);
1206 g[l] &= c[l];
1207 h[l] &= ~c[l];
1208 x[l] &= d[l];
1209 y[l] &= ~d[l];
1210 if (e[l] | f[l] | g[l] | h[l] | x[l] | y[l])
1211 finished = 0;
1212 }
1213 if (finished)
1214 break;
1215 }
1216 if (k > z)
1217 z = k;
1218 if (k == MAXPAIR) {
1219 printf("Some bit didn't change: ");
1220 printf("%.8x %.8x %.8x %.8x %.8x %.8x ", e[0], f[0], g[0], h[0], x[0], y[0]);
1221 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
1222 }
1223 if (z == MAXPAIR)
1224 goto done;
1225 }
1226 }
1227 }
1228 done:
1229 if (z < MAXPAIR)
1230 {
1231 printf("Mix success %2d bytes %2d initvals ",i,m);
1232 printf("required %d trials\n", z/2);
1233 }
1234 }
1235 printf("\n");
1236}
1237
1238/* Check for reading beyond the end of the buffer and alignment problems */
1239void driver3(void)
1240{
1241 uint8_t buf[MAXLEN+20], *b;
1242 uint32_t len;
1243 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
1244 uint32_t h;
1245 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
1246 uint32_t i;
1247 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
1248 uint32_t j;
1249 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
1250 uint32_t ref,x,y;
1251 uint8_t *p;
1252
1253 printf("Endianness. These lines should all be the same (for values filled in):\n");
1254 printf("%.8x %.8x %.8x\n",
1255 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
1256 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
1257 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
1258 p = q;
1259 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1260 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1261 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1262 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1263 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1264 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1265 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1266 p = &qq[1];
1267 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1268 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1269 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1270 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1271 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1272 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1273 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1274 p = &qqq[2];
1275 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1276 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1277 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1278 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1279 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1280 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1281 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1282 p = &qqqq[3];
1283 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1284 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1285 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1286 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1287 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1288 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1289 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1290 printf("\n");
1291
1292 /* check that hashlittle2 and hashlittle produce the same results */
1293 i=47; j=0;
1294 hashlittle2(q, sizeof(q), &i, &j);
1295 if (hashlittle(q, sizeof(q), 47) != i)
1296 printf("hashlittle2 and hashlittle mismatch\n");
1297
1298 /* check that hashword2 and hashword produce the same results */
1299 len = 0xdeadbeef;
1300 i=47, j=0;
1301 hashword2(&len, 1, &i, &j);
1302 if (hashword(&len, 1, 47) != i)
1303 printf("hashword2 and hashword mismatch %x %x\n",
1304 i, hashword(&len, 1, 47));
1305
1306 /* check hashlittle doesn't read before or after the ends of the string */
1307 for (h=0, b=buf+1; h<8; ++h, ++b)
1308 {
1309 for (i=0; i<MAXLEN; ++i)
1310 {
1311 len = i;
1312 for (j=0; j<i; ++j) *(b+j)=0;
1313
1314 /* these should all be equal */
1315 ref = hashlittle(b, len, (uint32_t)1);
1316 *(b+i)=(uint8_t)~0;
1317 *(b-1)=(uint8_t)~0;
1318 x = hashlittle(b, len, (uint32_t)1);
1319 y = hashlittle(b, len, (uint32_t)1);
1320 if ((ref != x) || (ref != y))
1321 {
1322 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1323 h, i);
1324 }
1325 }
1326 }
1327}
1328
1329/* check for problems with nulls */
1330void driver4(void)
1331{
1332 uint8_t buf[1];
1333 uint32_t h,i,state[HASHSTATE];
1334
1335
1336 buf[0] = ~0;
1337 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1338 printf("These should all be different\n");
1339 for (i=0, h=0; i<8; ++i)
1340 {
1341 h = hashlittle(buf, 0, h);
1342 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1343 }
1344}
1345
1346void driver5(void)
1347{
1348 uint32_t b,c;
1349 b=0, c=0, hashlittle2("", 0, &c, &b);
1350 printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
1351 b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
1352 printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
1353 b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
1354 printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
1355 b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1356 printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
1357 b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1358 printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
1359 b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
1360 printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
1361 c = hashlittle("Four score and seven years ago", 30, 0);
1362 printf("hash is %.8lx\n", c); /* 17770551 */
1363 c = hashlittle("Four score and seven years ago", 30, 1);
1364 printf("hash is %.8lx\n", c); /* cd628161 */
1365}
1366
1367int main(void)
1368{
1369 driver1(); /* test that the key is hashed: used for timings */
1370 driver2(); /* test that whole key is hashed thoroughly */
1371 driver3(); /* test that nothing but the key is hashed */
1372 driver4(); /* test hashing multiple buffers (all buffers are null) */
1373 driver5(); /* test the hash against known vectors */
1374 return 1;
1375}
1376
1377#endif /* SELF_TEST */
uint8_t len
SCMutex m
Definition flow-hash.h:6
int main(int argc, char **argv)
Definition main.c:20
uint32_t hashbig(const void *key, size_t length, uint32_t initval)
uint32_t hashlittle_safe(const void *key, size_t length, uint32_t initval)
#define HASH_BIG_ENDIAN
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval)
#define HASH_LITTLE_ENDIAN
uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
void hashlittle2_safe(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
void hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb)
#define mix(a, b, c)