分享一个给字符串计算hash的开源库,谷歌出品。
源代码在:https://github.com/google/cityhash
可以自己下载:git clone https://github.com/google/cityhash.git
但是代码是C++的,编译出来的动态库不能在C语言中调用,需要稍做修改后,才能编译出来C语言的动态链接库。
可以按照下边 1.1节 的方式修改,也可以直接下载修改完的代码包。
用 tar -zxvf cityhash.tar.gz
的方式解压,然后从 1.2节 开始操作就可以。
编译安装
1.1 修改,使支持C语言调用
只需要修改 src/city.h 文件即可。找到需要在C语言调用的函数,在其前后增加 extern “C” { …. }。修改后的文件如下:
#ifndef CITY_HASH_H_
#define CITY_HASH_H_
#include <stdlib.h> // for size_t.
#include <stdint.h>
#include <utility>
typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }
#ifdef __cplusplus
extern "C" {
#endif
// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,
uint64 seed0, uint64 seed1);
// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);
#ifdef __cplusplus
} // extern "C"
#endif
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64 Hash128to64(const uint128& x) {
// Murmur-inspired hashing.
const uint64 kMul = 0x9ddfea08eb382d69ULL;
uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
a ^= (a >> 47);
uint64 b = (Uint128High64(x) ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
#endif // CITY_HASH_H_
1.2 编译和安装
查看CPU是否支持 sse4_2 指令

生成 Makefile 文件,命令:./configure --enable-sse4.2
。
如果cpu不支持 sse4_2 指令,可以不加参数 --enable-sse4.2
。

编译,命令:make all check CXXFLAGS="-g -O3 -msse4.2"
。
如果cpu不支持 sse4_2 指令,可以不加参数 -msse4.2

安装,命令:make install
动态库会安装在 /usr/local/lib/ 目录,头文件在 /usr/local/include/ 目录。

1.3 增加C语言可引入的头文件
在 /usr/local/include/ 目录下创建 cityhash.h 文件。内容如下:
#ifndef __CITY_HASH_C_H_
#define __CITY_HASH_C_H_
#include <stdint.h>
typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef struct { uint64_t val[2] ; } uint128;
// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,
uint64 seed0, uint64 seed1);
// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);
#endif //__CITY_HASH_C_H_
测试
2.1 创建测试文件 testch.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cityhash.h"
int test(char * data) ;
int main(int argc, char * argv[]){
char * data = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh" ;
test(data) ;
char * data2 = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh3333" ;
test(data2) ;
return 0 ;
}
int test(char * data) {
uint32 key32 = CityHash32(data, strlen(data) ) ;
printf("key32 [%u]\n", key32) ;
uint64 key64 = CityHash64(data, strlen(data) ) ;
printf("key64 [%lu]\n", key64) ;
key64 = CityHash64WithSeed(data, strlen(data), 123123123 ) ;
printf("key64 [%lu]\n", key64) ;
key64 = CityHash64WithSeeds(data, strlen(data), 123123123, 321321321212121) ;
printf("key64 [%lu]\n", key64) ;
uint128 key128 = CityHash128(data, strlen(data) ) ;
printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;
uint128 seed ;
seed.val[0] = 123123123123 ;
seed.val[1] = 33344455556666 ;
key128 = CityHash128WithSeed(data, strlen(data), seed) ;
printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;
return 0 ;
}
2.2 编译并运行测试程序

发表回复