cityhash–对字符串的哈希算法

cityhash–对字符串的哈希算法

分享一个给字符串计算hash的开源库,谷歌出品。

源代码在:https://github.com/google/cityhash

可以自己下载:git clone https://github.com/google/cityhash.git

但是代码是C++的,编译出来的动态库不能在C语言中调用,需要稍做修改后,才能编译出来C语言的动态链接库

可以按照下边 1.1节 的方式修改,也可以直接下载修改完的代码包。

tar -zxvf cityhash.tar.gz 的方式解压,然后从 1.2节 开始操作就可以。

编译安装

1.1 修改,使支持C语言调用

只需要修改 src/city.h 文件即可。找到需要在C语言调用的函数,在其前后增加 extern “C” { …. }。修改后的文件如下:

#ifndef CITY_HASH_H_
#define CITY_HASH_H_

#include <stdlib.h>  // for size_t.
#include <stdint.h>
#include <utility>

typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;

inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }

#ifdef __cplusplus
extern "C" {
#endif

// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);

// Hash function for a byte array.  For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);

// Hash function for a byte array.  For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,
                           uint64 seed0, uint64 seed1);

// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);

// Hash function for a byte array.  For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);

// Hash function for a byte array.  Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);

#ifdef __cplusplus
}    // extern "C"
#endif

// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64 Hash128to64(const uint128& x) {
  // Murmur-inspired hashing.
  const uint64 kMul = 0x9ddfea08eb382d69ULL;
  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  a ^= (a >> 47);
  uint64 b = (Uint128High64(x) ^ a) * kMul;
  b ^= (b >> 47);
  b *= kMul;
  return b;
}

#endif  // CITY_HASH_H_

1.2 编译和安装

查看CPU是否支持 sse4_2 指令

生成 Makefile 文件,命令:./configure --enable-sse4.2

如果cpu不支持 sse4_2 指令,可以不加参数 --enable-sse4.2

编译,命令:make all check CXXFLAGS="-g -O3 -msse4.2"

如果cpu不支持 sse4_2 指令,可以不加参数 -msse4.2

安装,命令:make install 动态库会安装在 /usr/local/lib/ 目录,头文件在 /usr/local/include/ 目录。

1.3 增加C语言可引入的头文件

在 /usr/local/include/ 目录下创建 cityhash.h 文件。内容如下:

#ifndef __CITY_HASH_C_H_
#define __CITY_HASH_C_H_


#include <stdint.h>

typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef struct { uint64_t val[2] ; } uint128;

// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);

// Hash function for a byte array.  For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);

// Hash function for a byte array.  For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,
                           uint64 seed0, uint64 seed1);

// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);

// Hash function for a byte array.  For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);

// Hash function for a byte array.  Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);


#endif  //__CITY_HASH_C_H_ 

测试

2.1 创建测试文件 testch.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "cityhash.h"

int test(char * data) ;


int main(int argc, char * argv[]){

        char * data = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh" ;

        test(data) ;

        char * data2 = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh3333" ;

        test(data2) ;

        return 0 ;
}


int test(char * data) {
        uint32 key32 = CityHash32(data, strlen(data) ) ;
        printf("key32 [%u]\n", key32) ;

        uint64 key64 = CityHash64(data, strlen(data) ) ;
        printf("key64 [%lu]\n", key64) ;

        key64 = CityHash64WithSeed(data, strlen(data), 123123123 ) ;
        printf("key64 [%lu]\n", key64) ;

        key64 = CityHash64WithSeeds(data, strlen(data), 123123123, 321321321212121) ;
        printf("key64 [%lu]\n", key64) ;

        uint128 key128 = CityHash128(data, strlen(data) ) ;
        printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;

        uint128 seed ;
        seed.val[0] = 123123123123 ;
        seed.val[1] = 33344455556666 ;
        key128 = CityHash128WithSeed(data, strlen(data),  seed) ;
        printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;

        return 0 ;
}

2.2 编译并运行测试程序

评论

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注