suricata
util-spm-bs2bm.c
Go to the documentation of this file.
1/* Copyright (C) 2007-2010 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18/**
19 * \file
20 *
21 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22 *
23 * Bs2Bm use a simple context array to determine the characters
24 * that are not present on the pattern. This way on partial matches
25 * broken by a char not present, we can skip to the next character
26 * making less checks
27 */
28
29#include "suricata-common.h"
30#include "suricata.h"
31
32#include "util-spm-bs2bm.h"
33
34/**
35 * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
36 *
37 * \param needle pointer to the pattern we ar searching for
38 * \param needle_len length limit of the needle
39 * \param badchars pointer to an empty array of bachars. The array prepared contains
40 * characters that can't be inside the needle_len. So the skips can be
41 * faster
42 */
43void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
44{
45 uint32_t i;
46 for (i = 0; i < ALPHABET_SIZE; i++)
47 badchars[i] = 1;
48
49 /* set to 0 the values where index as ascii is present
50 * because they are not badchars
51 */
52 for (i = 0; i < needle_len; i++)
53 badchars[needle[i]] = 0;
54}
55
56/**
57 * \brief Basic search with a bad characters array. The array badchars contains
58 * flags at character's ascii index that can't be inside the needle. So the skips can be
59 * faster
60 *
61 * \param haystack pointer to the buffer to search in
62 * \param haystack_len length limit of the buffer
63 * \param needle pointer to the pattern we ar searching for
64 * \param needle_len length limit of the needle
65 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
66 *
67 * \retval ptr to start of the match; NULL if no match
68 */
69uint8_t *Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle,
70 uint16_t needle_len, const uint8_t badchars[])
71{
72 const uint8_t *h, *n;
73 const uint8_t *hmax = haystack + haystack_len;
74 const uint8_t *nmax = needle + needle_len;
75
76 if (needle_len == 0 || needle_len > haystack_len)
77 return NULL;
78
79 for (n = needle; nmax - n <= hmax - haystack; haystack++) {
80 if (*haystack != *n) {
81 continue;
82 }
83 /* one byte needles */
84 if (needle_len == 1)
85 return (uint8_t *)haystack;
86
87 for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
88 if (*h != *n) {
89 if (badchars[*h] == 1) {
90 /* skip it! */
91 haystack = h;
92 }
93 break;
94 }
95 /* if we run out of needle we fully matched */
96 if (n == nmax - 1 ) {
97 return (uint8_t *)haystack;
98 }
99 }
100 n = needle;
101 }
102
103 return NULL;
104}
105
106/**
107 * \brief Basic search case less with a bad characters array. The array badchars contains
108 * flags at character's ascii index that can't be inside the needle. So the skips can be
109 * faster
110 *
111 * \param haystack pointer to the buffer to search in
112 * \param haystack_len length limit of the buffer
113 * \param needle pointer to the pattern we ar searching for
114 * \param needle_len length limit of the needle
115 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
116 *
117 * \retval ptr to start of the match; NULL if no match
118 */
119uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle,
120 uint16_t needle_len, const uint8_t badchars[])
121{
122 const uint8_t *h, *n;
123 const uint8_t *hmax = haystack + haystack_len;
124 const uint8_t *nmax = needle + needle_len;
125
126 if (needle_len == 0 || needle_len > haystack_len)
127 return NULL;
128
129 for (n = needle; nmax - n <= hmax - haystack; haystack++) {
130 if (u8_tolower(*haystack) != u8_tolower(*n)) {
131 continue;
132 }
133 /* one byte needles */
134 if (needle_len == 1)
135 return (uint8_t *)haystack;
136
137 for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
138 if (u8_tolower(*h) != u8_tolower(*n)) {
139 if (badchars[u8_tolower(*h)] == 1) {
140 /* skip it! */
141 haystack = h;
142 }
143 break;
144 }
145 /* if we run out of needle we fully matched */
146 if (n == nmax - 1) {
147 return (uint8_t *)haystack;
148 }
149 }
150 n = needle;
151 }
152
153 return NULL;
154}
#define u8_tolower(c)
#define ALPHABET_SIZE
Definition util-spm-bm.h:30
void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
Array setup function for Bs2Bm of bad characters index (not found at the needle)
uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, const uint8_t badchars[])
Basic search with a bad characters array. The array badchars contains flags at character's ascii inde...
uint8_t * Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, const uint8_t badchars[])
Basic search case less with a bad characters array. The array badchars contains flags at character's ...