suricata
util-spm-bm.c
Go to the documentation of this file.
1/* Copyright (C) 2007-2014 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18/**
19 * \file
20 *
21 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22 *
23 * Boyer Moore simple pattern matcher implementation
24 *
25 * Boyer Moore algorithm has a really good performance. It need two arrays
26 * of context for each pattern that hold applicable shifts on the text
27 * to search in, based on characters not available in the pattern
28 * and combinations of characters that start a suffix of the pattern.
29 * If possible, we should store the context of patterns that we are going
30 * to search for multiple times, so we don't spend time on rebuilding them.
31 */
32
33#include "suricata-common.h"
34#include "suricata.h"
35
36#include "util-spm-bm.h"
37#include "util-spm.h"
38#include "util-debug.h"
39#include "util-error.h"
40#include "util-memcpy.h"
41#include "util-validate.h"
42
43static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
44static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
46static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
47 uint16_t *suff);
48static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
49
50/**
51 * \brief Given a BmCtx structure, recreate the pre/suffixes for
52 * nocase
53 *
54 * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
55 * \param str pointer to the pattern string
56 * \param size length of the string
57 */
58void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
59{
60 /* Store the content as lower case to make searching faster */
61 memcpy_tolower(needle, needle, needle_len);
62
63 /* Prepare bad chars with nocase chars */
64 PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
65
66 /* Prepare good Suffixes with nocase chars */
67 PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
68}
69
70/**
71 * \brief Setup a Boyer Moore context.
72 *
73 * \param str pointer to the pattern string
74 * \param size length of the string
75 * \retval BmCtx pointer to the newly created Context for the pattern
76 * \initonly BoyerMoore contexts should be created at init
77 */
78BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
79{
80 BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
81 if (unlikely(new == NULL)) {
82 FatalError("Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83 }
84
85 /* Prepare bad chars */
86 PreBmBc(needle, needle_len, new->bmBc);
87
88 /* Prepare good Suffixes */
89 if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90 FatalError("Fatal error encountered in BoyerMoreCtxInit. Exiting...");
91 }
92
93
94 return new;
95}
96
97/**
98 * \brief Setup a Boyer Moore context for nocase search
99 *
100 * \param str pointer to the pattern string
101 * \param size length of the string
102 * \retval BmCtx pointer to the newly created Context for the pattern
103 * \initonly BoyerMoore contexts should be created at init
104 */
105BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
106{
107 BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
108
109 BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
110
111 return bm_ctx;
112}
113
114/**
115 * \brief Free the memory allocated to Boyer Moore context.
116 *
117 * \param bmCtx pointer to the Context for the pattern
118 */
120{
121 SCEnter();
122 if (bmctx == NULL)
123 SCReturn;
124
125 SCFree(bmctx);
126
127 SCReturn;
128}
129/**
130 * \brief Array setup function for bad characters that split the pattern
131 * Remember that the result array should be the length of ALPHABET_SIZE
132 *
133 * \param str pointer to the pattern string
134 * \param size length of the string
135 * \param result pointer to an empty array that will hold the badchars
136 */
137static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
138{
139 uint16_t i;
140
141 for (i = 0; i < 256; ++i) {
142 bmBc[i] = m;
143 }
144 for (i = 0; i < m - 1; ++i) {
145 bmBc[(unsigned char)x[i]] = m - i - 1;
146 }
147}
148
149/**
150 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
151 *
152 * \param x pointer to the pattern string
153 * \param m length of the string
154 * \param suff pointer to an empty array that will hold the prefixes (shifts)
155 */
156static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
157{
158 int32_t f = 0, g, i;
159 suff[m - 1] = m;
160 g = m - 1;
161 for (i = m - 2; i >= 0; --i) {
162 if (i > g && suff[i + m - 1 - f] < i - g)
163 suff[i] = suff[i + m - 1 - f];
164 else {
165 if (i < g)
166 g = i;
167 f = i;
168 while (g >= 0 && x[g] == x[g + m - 1 - f])
169 --g;
170 DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
171 suff[i] = (uint16_t)(f - g);
172 }
173 }
174}
175
176/**
177 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178 *
179 * \param x pointer to the pattern string
180 * \param m length of the string
181 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182 * \retval 0 ok, -1 failed
183 */
184static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185{
186 int32_t i, j;
187 uint16_t suff[m + 1];
188
189 BoyerMooreSuffixes(x, m, suff);
190
191 for (i = 0; i < m; ++i)
192 bmGs[i] = m;
193
194 j = 0;
195
196 for (i = m - 1; i >= -1; --i)
197 if (i == -1 || suff[i] == i + 1)
198 for (; j < m - 1 - i; ++j)
199 if (bmGs[j] == m)
200 bmGs[j] = (uint16_t)(m - 1 - i);
201
202 for (i = 0; i <= m - 2; ++i)
203 bmGs[m - 1 - suff[i]] = (uint16_t)(m - 1 - i);
204 return 0;
205}
206
207/**
208 * \brief Array setup function for bad characters that split the pattern
209 * Remember that the result array should be the length of ALPHABET_SIZE
210 *
211 * \param str pointer to the pattern string
212 * \param size length of the string
213 * \param result pointer to an empty array that will hold the badchars
214 */
215static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216{
217 uint16_t i;
218
219 for (i = 0; i < 256; ++i) {
220 bmBc[i] = m;
221 }
222 for (i = 0; i < m - 1; ++i) {
223 bmBc[u8_tolower(x[i])] = m - 1 - i;
224 bmBc[u8_toupper(x[i])] = m - 1 - i;
225 }
226}
227
228static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
229 uint16_t *suff)
230{
231 int32_t f = 0, g, i;
232
233 suff[m - 1] = m;
234 g = m - 1;
235 for (i = m - 2; i >= 0; --i) {
236 if (i > g && suff[i + m - 1 - f] < i - g) {
237 suff[i] = suff[i + m - 1 - f];
238 } else {
239 if (i < g) {
240 g = i;
241 }
242 f = i;
243 while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244 --g;
245 }
246 DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
247 suff[i] = (uint16_t)(f - g);
248 }
249 }
250}
251
252/**
253 * \brief Array setup function for building prefixes (shift for valid prefixes)
254 * for boyermoore context case less
255 *
256 * \param x pointer to the pattern string
257 * \param m length of the string
258 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
259 */
260static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
261{
262 uint16_t i, j;
263 uint16_t suff[m + 1];
264
265 BoyerMooreSuffixesNocase(x, m, suff);
266
267 for (i = 0; i < m; ++i) {
268 bmGs[i] = m;
269 }
270 j = 0;
271 for (i = m; i > 0; --i) {
272 if (suff[i - 1] == i) {
273 for (; j < m - i; ++j) {
274 if (bmGs[j] == m) {
275 bmGs[j] = m - i;
276 }
277 }
278 }
279 }
280 for (i = 0; i <= m - 2; ++i) {
281 bmGs[m - 1 - suff[i]] = m - 1 - i;
282 }
283}
284
285/**
286 * \brief Boyer Moore search algorithm
287 * Is better as the pattern length increases and for big buffers to search in.
288 * The algorithm needs a context of two arrays already prepared
289 * by prep_bad_chars() and prep_good_suffix()
290 *
291 * \param y pointer to the buffer to search in
292 * \param n length limit of the buffer
293 * \param x pointer to the pattern we ar searching for
294 * \param m length limit of the needle
295 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
296 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
297 *
298 * \retval ptr to start of the match; NULL if no match
299 */
300uint8_t *BoyerMoore(
301 const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
302{
303 const uint16_t *bmGs = bm_ctx->bmGs;
304 const uint16_t *bmBc = bm_ctx->bmBc;
305
306 int i, j, m1, m2;
307 // force casting to int32_t (if possible)
308 const int32_t int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
309 j = 0;
310 while (j <= int_n - m ) {
311 for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
312
313 if (i < 0) {
314 return (uint8_t *)(y + j);
315 } else {
316 j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i) ? m1 : m2;
317 }
318 }
319 return NULL;
320}
321
322
323/**
324 * \brief Boyer Moore search algorithm
325 * Is better as the pattern length increases and for big buffers to search in.
326 * The algorithm needs a context of two arrays already prepared
327 * by prep_bad_chars() and prep_good_suffix()
328 *
329 * \param y pointer to the buffer to search in
330 * \param n length limit of the buffer
331 * \param x pointer to the pattern we ar searching for
332 * \param m length limit of the needle
333 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
334 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
335 *
336 * \retval ptr to start of the match; NULL if no match
337 */
339 const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
340{
341 const uint16_t *bmGs = bm_ctx->bmGs;
342 const uint16_t *bmBc = bm_ctx->bmBc;
343 int i, j, m1, m2;
344 // force casting to int32_t (if possible)
345 const int32_t int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
346 j = 0;
347 while (j <= int_n - m ) {
348 /* x is stored in lowercase. */
349 for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
350
351 if (i < 0) {
352 return (uint8_t *)(y + j);
353 } else {
354 j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i) ? m1 : m2;
355 }
356 }
357 return NULL;
358}
359
360typedef struct SpmBmCtx_ {
362 uint8_t *needle;
363 uint16_t needle_len;
366
367static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
368 SpmGlobalThreadCtx *global_thread_ctx)
369{
370 SpmCtx *ctx = SCCalloc(1, sizeof(SpmCtx));
371 if (ctx == NULL) {
372 SCLogDebug("Unable to alloc SpmCtx.");
373 return NULL;
374 }
375 ctx->matcher = SPM_BM;
376
377 SpmBmCtx *sctx = SCCalloc(1, sizeof(SpmBmCtx));
378 if (sctx == NULL) {
379 SCLogDebug("Unable to alloc SpmBmCtx.");
380 SCFree(ctx);
381 return NULL;
382 }
383
384 sctx->needle = SCMalloc(needle_len);
385 if (sctx->needle == NULL) {
386 SCLogDebug("Unable to alloc string.");
387 SCFree(sctx);
388 SCFree(ctx);
389 return NULL;
390 }
391 memcpy(sctx->needle, needle, needle_len);
392 sctx->needle_len = needle_len;
393
394 if (nocase) {
395 sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
396 sctx->nocase = 1;
397 } else {
398 sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
399 sctx->nocase = 0;
400 }
401
402 ctx->ctx = sctx;
403 return ctx;
404}
405
406static void BMDestroyCtx(SpmCtx *ctx)
407{
408 if (ctx == NULL) {
409 return;
410 }
411
412 SpmBmCtx *sctx = ctx->ctx;
413 if (sctx != NULL) {
415 if (sctx->needle != NULL) {
416 SCFree(sctx->needle);
417 }
418 SCFree(sctx);
419 }
420
421 SCFree(ctx);
422}
423
424static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
425 const uint8_t *haystack, uint32_t haystack_len)
426{
427 const SpmBmCtx *sctx = ctx->ctx;
428
429 if (sctx->nocase) {
430 return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
431 haystack_len, sctx->bm_ctx);
432 } else {
433 return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
434 haystack_len, sctx->bm_ctx);
435 }
436}
437
438static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
439{
440 SpmGlobalThreadCtx *global_thread_ctx = SCCalloc(1, sizeof(SpmGlobalThreadCtx));
441 if (global_thread_ctx == NULL) {
442 SCLogDebug("Unable to alloc SpmThreadCtx.");
443 return NULL;
444 }
445 global_thread_ctx->matcher = SPM_BM;
446 return global_thread_ctx;
447}
448
449static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
450{
451 if (global_thread_ctx == NULL) {
452 return;
453 }
454 SCFree(global_thread_ctx);
455}
456
457static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
458{
459 if (thread_ctx == NULL) {
460 return;
461 }
462 SCFree(thread_ctx);
463}
464
465static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
466 SpmThreadCtx *thread_ctx = SCCalloc(1, sizeof(SpmThreadCtx));
467 if (thread_ctx == NULL) {
468 SCLogDebug("Unable to alloc SpmThreadCtx.");
469 return NULL;
470 }
471 thread_ctx->matcher = SPM_BM;
472 return thread_ctx;
473}
474
476{
477 spm_table[SPM_BM].name = "bm";
478 spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
479 spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
480 spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
481 spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
482 spm_table[SPM_BM].InitCtx = BMInitCtx;
483 spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
484 spm_table[SPM_BM].Scan = BMScan;
485}
SCMutex m
Definition flow-hash.h:6
struct Thresholds ctx
uint16_t bmGs[]
Definition util-spm-bm.h:36
uint16_t bmBc[ALPHABET_SIZE]
Definition util-spm-bm.h:34
BmCtx * bm_ctx
uint8_t * needle
uint16_t needle_len
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition util-spm.h:65
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition util-spm.h:61
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition util-spm.h:62
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition util-spm.h:64
const char * name
Definition util-spm.h:60
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition util-spm.h:68
void(* DestroyCtx)(SpmCtx *)
Definition util-spm.h:67
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition util-spm.h:63
uint8_t matcher
Definition util-spm.h:55
#define u8_tolower(c)
#define u8_toupper(c)
#define SCEnter(...)
Definition util-debug.h:277
#define FatalError(...)
Definition util-debug.h:510
#define SCLogDebug(...)
Definition util-debug.h:275
#define SCReturn
Definition util-debug.h:279
#define SCMalloc(sz)
Definition util-mem.h:47
#define SCFree(p)
Definition util-mem.h:61
#define SCCalloc(nm, sz)
Definition util-mem.h:53
#define unlikely(expr)
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context for nocase search.
uint8_t * BoyerMoore(const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition util-spm-bm.c:58
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Boyer Moore context.
struct SpmBmCtx_ SpmBmCtx
uint8_t * BoyerMooreNocase(const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context.
Definition util-spm-bm.c:78
void SpmBMRegister(void)
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition util-spm.c:62
@ SPM_BM
Definition util-spm.h:30
#define DEBUG_VALIDATE_BUG_ON(exp)