On Wed, May 31, 2006 at 11:41:27AM -0700, David Miller ([email protected]) wrote: > > > Worse: he folded the jenkins algorith result with > > > > > > h ^= h >> 16; > > > h ^= h >> 8; > > > > > > Destroying the coverage of the function. > > > > It was done to simulate socket code which uses the same folding. > > Leaving 32bit space is just wrong, consider hash table size with that > > index. > > You absolutely show not do this shifting on the jenkins hash > result, you destroy the distribution entirely. Just mask > it with the hash mask and that's all you need to do. > > Brian is right, this is absolutely critical to using the Jenkins hash > correctly. You're "unmixing" the bits it worked so hard to mix. That is wrong. And I have a code and picture to show that, and you dont - prove me wrong :) Attached code and picture which shows _exactly_ the same distribution for folded and not folded Jenkins hash distribution, and it's artifact compared to XOR hash. Fairly distributed values being XORed produce still fairly distributed value. -- Evgeniy Polyakov
Attachment:
hash_comparison.png
Description: PNG image
#include <stdlib.h> #include <stdio.h> #include <errno.h> typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; typedef unsigned int __u32; typedef unsigned short __u16; typedef unsigned char __u8; #include "jhash.h" struct hash_entry { unsigned long counter; }; static inline num2ip(__u8 a1, __u8 a2, __u8 a3, __u8 a4) { __u32 a = 0; a |= a1; a << 8; a |= a2; a << 8; a |= a3; a << 8; a |= a4; return a; } static inline __u8 get_random_byte(void) { return 1 + (int) (255.0 * (rand() / (RAND_MAX + 1.0))); } static inline __u16 get_random_word(void) { return 1 + (int) (65536.0 * (rand() / (RAND_MAX + 1.0))); } unsigned int hash_addr(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport) { unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); h ^= h >> 16; h ^= h >> 8; return h; } unsigned int hash_addr1(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport) { u32 ports; unsigned int h; ports = lport; ports <<= 16; ports |= fport; h = jhash_2words(faddr, laddr, ports); //h ^= h >> 16; //h ^= h >> 8; return h; } int main() { struct hash_entry *table; __u32 saddr, daddr; __u16 sport, dport; unsigned int hash, i, *results; unsigned int hash_size = 65536, iter_num = 100; table = malloc(hash_size * sizeof(struct hash_entry)); if (!table) return -ENOMEM; results = malloc(hash_size * sizeof(unsigned int)); if (!results) return -ENOMEM; for (i=0; i<hash_size; ++i) { results[i] = 0; table[i].counter = 0; } srand(time(NULL)); daddr = num2ip(192, 168, 0, 1); dport = htons(80); for (i=0; i<hash_size*iter_num; ++i) { saddr = num2ip(get_random_byte(), get_random_byte(), get_random_byte(), get_random_byte()); sport = get_random_word(); hash = hash_addr(saddr, sport, daddr, dport); hash &= (hash_size - 1); table[hash].counter++; } for (i=0; i<hash_size; ++i) results[table[i].counter]++; for (i=0; i<hash_size; ++i) printf("%u %u\n", i, results[i]); //printf("%u %u\n", i, table[i].counter); }
- Follow-Ups:
- Re: Question about tcp hash function tcp_hashfn()
- From: Florian Weimer <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- From: "Brian F. G. Bidulock" <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- References:
- Re: Question about tcp hash function tcp_hashfn()
- From: Evgeniy Polyakov <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- From: "Brian F. G. Bidulock" <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- From: Evgeniy Polyakov <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- From: David Miller <[email protected]>
- Re: Question about tcp hash function tcp_hashfn()
- Prev by Date: Re: Why must NFS access metadata in synchronous mode?
- Next by Date: Re: Question about tcp hash function tcp_hashfn()
- Previous by thread: Re: Question about tcp hash function tcp_hashfn()
- Next by thread: Re: Question about tcp hash function tcp_hashfn()
- Index(es):