hyperscan是一个高性能的正则表达式匹配库,由 Intel 开发并开源,旨在提供高速、低延迟的模式匹配能力。
代码地址:https://github.com/intel/hyperscan
使用手册在:http://intel.github.io/hyperscan/dev-reference/index.html
官方说明文档:https://www.intel.com/content/www/us/en/developer/articles/technical/introduction-to-hyperscan.html
hyperscan编译:
hyperscan依赖以下几个库,需要确保这几个库都存在。

第一步:下载最新版本代码
git clone --branch v5.4.2 https://github.com/intel/hyperscan.git
第二步:构造项目
mkdir build && cd build && cmake -DBUILD_SHARED_LIBS=ON ..

第三步:编译安装
make -j 6 && make install

测试代码ths.c:
// gcc -Wall -o test ths.c -lhs
#include <stdio.h>
#include <string.h>
#include <hs/hs.h>
#define PATTERN_MAX 1024
typedef struct _exptns {
char * exppttns[PATTERN_MAX] ;
unsigned int expflgs[PATTERN_MAX] ;
unsigned int expids[PATTERN_MAX] ;
int num ;
} exptns_t ;
// 增加一个匹配模式
int add_pattern(exptns_t *exptns, char * pattern, unsigned int expflg, unsigned int expid) {
int id = exptns->num ;
if( id >= PATTERN_MAX || strlen(pattern) == 0 ) {
return -1 ;
}
// 1、添加表达式
exptns->exppttns[id] = (char *)calloc(1, strlen(pattern)+1) ;
memcpy(exptns->exppttns[id], pattern, strlen(pattern)) ;
// 2、表达式匹配方式
exptns->expflgs[id] = expflg ;
// 3、模式对应的ID号
exptns->expids[id] = expid ;
exptns->num++ ;
return 0 ;
}
// 匹配之后的回调函数
int match_cb(unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *context) {
printf("match id %u from %llu to %llu %.*s\n", id, from, to, (int)to-(int)from, ((char *)context+from) ) ;
return 0 ;
}
// 清理表达式、编译库和句柄
void free_hs_data(exptns_t *exptns, hs_database_t ** hsdb, hs_scratch_t ** hsscth){
// 1、清理表达式数据
if(exptns) {
for( int i=0; i<exptns->num; ++i) {
if (exptns->exppttns[i]) {
free(exptns->exppttns[i]) ;
exptns->exppttns[i] = NULL ;
exptns->expflgs[i] = 0 ;
exptns->expids[i] = 0 ;
}
}
exptns->num = 0 ;
}
// 2、删除句柄
if(hsscth)
if(*hsscth) {
hs_free_scratch(*hsscth) ;
*hsscth = NULL ;
}
// 3、删除编译后的模式库
if(hsdb)
if(*hsdb) {
hs_free_database(*hsdb) ;
*hsdb = NULL ;
}
}
int main(int argc, char *argv[]) {
exptns_t exptns ;
memset(&exptns, 0x00, sizeof(exptns)) ;
// 0、添加 3 个表达式
add_pattern(&exptns, "abcd", HS_FLAG_DOTALL | HS_FLAG_CASELESS | HS_FLAG_SOM_LEFTMOST | HS_FLAG_MULTILINE, 101) ;
add_pattern(&exptns, "opq(rst)u", HS_FLAG_DOTALL | HS_FLAG_CASELESS | HS_FLAG_SOM_LEFTMOST | HS_FLAG_MULTILINE, 102) ;
add_pattern(&exptns, "hij", HS_FLAG_DOTALL | HS_FLAG_CASELESS | HS_FLAG_SOM_LEFTMOST | HS_FLAG_MULTILINE, 103) ;
hs_database_t * hsdb = NULL ; // 用来接收编译后的 hs 库地址
hs_compile_error_t * cmplerr = NULL ;
hs_error_t hs_ret ;
// 1、编译
hs_ret = hs_compile_multi((const char * const *)exptns.exppttns, (const unsigned int*)exptns.expflgs,
(const unsigned int *)exptns.expids, exptns.num,
HS_MODE_BLOCK, NULL, &hsdb, &cmplerr) ;
if( hs_ret != HS_SUCCESS ) {
if(cmplerr->expression < 0 ) {
printf("compile multi failed : %s\n", cmplerr->message) ;
} else {
printf("compile multi failed : %s, error id %d rule %s\n", cmplerr->message,
exptns.expids[cmplerr->expression], exptns.exppttns[cmplerr->expression]) ;
}
hs_free_compile_error(cmplerr) ;
free_hs_data(&exptns, NULL, NULL) ;
return -1 ;
}
hs_scratch_t * hsscth = NULL ;
// 2、获取匹配句柄
hs_ret = hs_alloc_scratch(hsdb, &hsscth) ;
if( hs_ret != HS_SUCCESS ) {
printf("get hs scratch failed !\n") ;
free_hs_data(&exptns, &hsdb, NULL) ;
return -1 ;
}
// 3、匹配
char * str = "xxabcdefghijkLMNOPQrstUVWzyxabCDEFghiijklm" ;
hs_ret = hs_scan(hsdb, str, strlen(str), 0, hsscth, &match_cb, str) ;
if( hs_ret != HS_SUCCESS ) {
printf("get hs scan failed !\n") ;
return -1 ;
}
printf("\n===============================\n") ;
// 测试2
hs_scratch_t * hsscth2 = NULL ;
hs_ret = hs_alloc_scratch(hsdb, &hsscth2) ;
if( hs_ret != HS_SUCCESS ) {
printf("get hs scratch failed !\n") ;
free_hs_data(&exptns, &hsdb, &hsscth) ;
return -1 ;
}
hs_ret = hs_scan(hsdb, str, strlen(str), 0, hsscth2, &match_cb, str) ;
if( hs_ret != HS_SUCCESS ) {
printf("get hs scan failed !\n") ;
return -1 ;
}
printf("\n--------------------------------------------\n") ;
// 测试3
hs_scratch_t * hsscth3 = NULL ;
hs_ret = hs_clone_scratch(hsscth2, &hsscth3) ;
if( hs_ret != HS_SUCCESS ) {
printf("clone hs scratch failed !\n") ;
free_hs_data(&exptns, &hsdb, &hsscth) ;
return -1 ;
}
free_hs_data(NULL, NULL, &hsscth2) ;
hs_ret = hs_scan(hsdb, str, strlen(str), 0, hsscth3, &match_cb, str) ;
if( hs_ret != HS_SUCCESS ) {
printf("get hs scan failed !\n") ;
return -1 ;
}
free_hs_data(NULL, NULL, &hsscth3) ;
// 4、空间释放
free_hs_data(&exptns, &hsdb, &hsscth) ;
return 0 ;
}
编译和测试:

发表回复