suricata
util-ebpf.c
Go to the documentation of this file.
1/* Copyright (C) 2018-2021 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18/**
19 * \ingroup afppacket
20 *
21 * @{
22 */
23
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * eBPF utility
30 *
31 */
32
33#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
34
35#include "suricata-common.h"
36#include "flow-bypass.h"
37
38#ifdef HAVE_PACKET_EBPF
39
40#include <sys/time.h>
41#include <sys/resource.h>
42
43#include "util-ebpf.h"
44#include "util-affinity.h"
45#include "util-cpu.h"
46#include "util-device-private.h"
47
48#include "device-storage.h"
49#include "flow-storage.h"
50#include "flow.h"
51#include "flow-hash.h"
52#include "tm-threads.h"
53
54#include <bpf/libbpf.h>
55#include <bpf/bpf.h>
56#include <net/if.h>
57#include "autoconf.h"
58
59#define BPF_MAP_MAX_COUNT 16
60
61#define BYPASSED_FLOW_TIMEOUT 60
62
63static LiveDevStorageId g_livedev_storage_id = { .id = -1 };
64static FlowStorageId g_flow_storage_id = { .id = -1 };
65
66struct bpf_map_item {
67 char iface[IFNAMSIZ];
68 char * name;
69 int fd;
70 uint8_t to_unlink;
71};
72
73struct bpf_maps_info {
74 struct bpf_map_item array[BPF_MAP_MAX_COUNT];
75 int last;
76};
77
78typedef struct BypassedIfaceList_ {
79 LiveDevice *dev;
80 struct BypassedIfaceList_ *next;
81} BypassedIfaceList;
82
83static void BpfMapsInfoFree(void *bpf)
84{
85 struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf;
86 int i;
87 for (i = 0; i < bpfinfo->last; i ++) {
88 if (bpfinfo->array[i].name) {
89 if (bpfinfo->array[i].to_unlink) {
90 char pinnedpath[PATH_MAX];
91 int ret = snprintf(pinnedpath, sizeof(pinnedpath),
92 "/sys/fs/bpf/suricata-%s-%s",
93 bpfinfo->array[i].iface,
94 bpfinfo->array[i].name);
95 if (ret > 0) {
96 /* Unlink the pinned entry */
97 ret = unlink(pinnedpath);
98 if (ret == -1) {
99 int error = errno;
101 "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
102 }
103 } else {
104 SCLogWarning("Unable to remove map %s", bpfinfo->array[i].name);
105 }
106 }
107 SCFree(bpfinfo->array[i].name);
108 }
109 }
110 SCFree(bpfinfo);
111}
112
113static void BypassedListFree(void *ifl)
114{
115 BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
116 BypassedIfaceList *nifl;
117 while (mifl) {
118 nifl = mifl->next;
119 SCFree(mifl);
120 mifl = nifl;
121 }
122}
123
124void EBPFDeleteKey(int fd, void *key)
125{
126 int ret = bpf_map_delete_elem(fd, key);
127 if (ret < 0) {
128 SCLogWarning("Unable to delete entry: %s (%d)", strerror(errno), errno);
129 }
130}
131
132static struct bpf_maps_info *EBPFGetBpfMap(const char *iface)
133{
134 LiveDevice *livedev = LiveGetDevice(iface);
135 if (livedev == NULL)
136 return NULL;
137 void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id);
138
139 return (struct bpf_maps_info *)data;
140}
141
142/**
143 * Get file descriptor of a map in the scope of a interface
144 *
145 * \param iface the interface where the map need to be looked for
146 * \param name the name of the map
147 * \return the file descriptor or -1 in case of error
148 */
149int EBPFGetMapFDByName(const char *iface, const char *name)
150{
151 int i;
152
153 if (iface == NULL || name == NULL)
154 return -1;
155 struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
156 if (bpf_maps == NULL)
157 return -1;
158
159 for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
160 if (!bpf_maps->array[i].name)
161 continue;
162 if (!strcmp(bpf_maps->array[i].name, name)) {
163 SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
164 return bpf_maps->array[i].fd;
165 }
166 }
167
168 return -1;
169}
170
171static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file)
172{
173 char pinnedpath[1024];
174 snprintf(pinnedpath, sizeof(pinnedpath),
175 "/sys/fs/bpf/suricata-%s-%s",
176 livedev->dev,
177 file);
178
179 return bpf_obj_get(pinnedpath);
180}
181
182static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config)
183{
184 int fd_v4 = -1, fd_v6 = -1;
185
186 /* First try to load the eBPF check map and return if found */
187 if (config->pinned_maps_name) {
188 int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
189 if (ret == 0) {
190 /* pinned maps found, let's just exit as XDP filter is in place */
191 return ret;
192 }
193 }
194
195 if (config->mode == AFP_MODE_XDP_BYPASS) {
196 /* Get flow v4 table */
197 fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4");
198 if (fd_v4 < 0) {
199 return fd_v4;
200 }
201
202 /* Get flow v6 table */
203 fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6");
204 if (fd_v6 < 0) {
205 SCLogWarning("Found a flow_table_v4 map but no flow_table_v6 map");
206 return fd_v6;
207 }
208 }
209
210 struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
211 if (bpf_map_data == NULL) {
212 SCLogError("Can't allocate bpf map array");
213 return -1;
214 }
215
216 if (config->mode == AFP_MODE_XDP_BYPASS) {
217 bpf_map_data->array[0].fd = fd_v4;
218 bpf_map_data->array[0].name = SCStrdup("flow_table_v4");
219 if (bpf_map_data->array[0].name == NULL) {
220 goto alloc_error;
221 }
222 bpf_map_data->array[1].fd = fd_v6;
223 bpf_map_data->array[1].name = SCStrdup("flow_table_v6");
224 if (bpf_map_data->array[1].name == NULL) {
225 goto alloc_error;
226 }
227 bpf_map_data->last = 2;
228 } else {
229 bpf_map_data->last = 0;
230 }
231
232 /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */
233 int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map");
234 if (fd >= 0) {
235 bpf_map_data->array[bpf_map_data->last].fd = fd;
236 bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map");
237 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
238 goto alloc_error;
239 }
240 bpf_map_data->last++;
241 }
242 fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available");
243 if (fd >= 0) {
244 bpf_map_data->array[bpf_map_data->last].fd = fd;
245 bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available");
246 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
247 goto alloc_error;
248 }
249 bpf_map_data->last++;
250 }
251 fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer");
252 if (fd >= 0) {
253 bpf_map_data->array[bpf_map_data->last].fd = fd;
254 bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer");
255 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
256 goto alloc_error;
257 }
258 bpf_map_data->last++;
259 }
260 fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int");
261 if (fd >= 0) {
262 bpf_map_data->array[bpf_map_data->last].fd = fd;
263 bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int");
264 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
265 goto alloc_error;
266 }
267 bpf_map_data->last++;
268 }
269
270 /* Attach the bpf_maps_info to the LiveDevice via the device storage */
271 LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
272 /* Declare that device will use bypass stats */
273 LiveDevUseBypass(livedev);
274
275 return 0;
276
277alloc_error:
278 for (int i = 0; i < bpf_map_data->last; i++) {
279 SCFree(bpf_map_data->array[i].name);
280 }
281 bpf_map_data->last = 0;
282 SCLogError("Can't allocate bpf map name");
283 return -1;
284}
285
286/**
287 * Load a section of an eBPF file
288 *
289 * This function loads a section inside an eBPF and return
290 * via the parameter val the file descriptor that will be used to
291 * inject the eBPF code into the kernel via a syscall.
292 *
293 * \param path the path of the eBPF file to load
294 * \param section the section in the eBPF file to load
295 * \param val a pointer to an integer that will be the file desc
296 * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded
297 */
298int EBPFLoadFile(const char *iface, const char *path, const char * section,
299 int *val, struct ebpf_timeout_config *config)
300{
301 int err, pfd;
302 bool found = false;
303 struct bpf_object *bpfobj = NULL;
304 struct bpf_program *bpfprog = NULL;
305 struct bpf_map *map = NULL;
306
307 if (iface == NULL)
308 return -1;
309 LiveDevice *livedev = LiveGetDevice(iface);
310 if (livedev == NULL)
311 return -1;
312
313 if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
314 /* We try to get our flow table maps and if we have them we can simply return */
315 if (EBPFLoadPinnedMaps(livedev, config) == 0) {
316 SCLogInfo("Loaded pinned maps, will use already loaded eBPF filter");
317 return 1;
318 }
319 }
320
321 if (! path) {
322 SCLogError("No file defined to load eBPF from");
323 return -1;
324 }
325
326 /* Sending the eBPF code to the kernel requires a large amount of
327 * locked memory so we set it to unlimited to avoid a ENOPERM error */
328 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
329 if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
330 SCLogError("Unable to lock memory: %s (%d)", strerror(errno), errno);
331 return -1;
332 }
333
334 /* Open the eBPF file and parse it */
335 bpfobj = bpf_object__open(path);
336 long error = libbpf_get_error(bpfobj);
337 if (error) {
338 char err_buf[128];
339 libbpf_strerror(error, err_buf,
340 sizeof(err_buf));
341 SCLogError("Unable to load eBPF objects in '%s': %s", path, err_buf);
342 return -1;
343 }
344
345 if (config->flags & EBPF_XDP_HW_MODE) {
346 unsigned int ifindex = if_nametoindex(iface);
347 bpf_object__for_each_program(bpfprog, bpfobj) {
348 bpf_program__set_ifindex(bpfprog, ifindex);
349 }
350 bpf_map__for_each(map, bpfobj) {
351 bpf_map__set_ifindex(map, ifindex);
352 }
353 }
354
355 /* Let's check that our section is here */
356 bpf_object__for_each_program(bpfprog, bpfobj) {
357#ifdef HAVE_BPF_PROGRAM__SECTION_NAME
358 const char *title = bpf_program__section_name(bpfprog);
359#else
360 const char *title = bpf_program__title(bpfprog, 0);
361#endif
362 if (!strcmp(title, section)) {
363 if (config->flags & EBPF_SOCKET_FILTER) {
364#ifdef HAVE_BPF_PROGRAM__SET_TYPE
365 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
366#else
367 /* Fall back to legacy API */
368 bpf_program__set_socket_filter(bpfprog);
369#endif
370 } else {
371#ifdef HAVE_BPF_PROGRAM__SET_TYPE
372 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
373#else
374 /* Fall back to legacy API */
375 bpf_program__set_xdp(bpfprog);
376#endif
377 }
378 found = true;
379 break;
380 }
381 }
382
383 if (!found) {
384 SCLogError("No section '%s' in '%s' file. Will not be able to use the file", section, path);
385 return -1;
386 }
387
388 err = bpf_object__load(bpfobj);
389 if (err < 0) {
390 if (err == -EPERM) {
391 SCLogError("Permission issue when loading eBPF object"
392 " (check libbpf error on stdout)");
393 } else {
394 char buf[129];
395 libbpf_strerror(err, buf, sizeof(buf));
396 SCLogError("Unable to load eBPF object: %s (%d)", buf, err);
397 }
398 return -1;
399 }
400
401 /* Kernel and userspace are sharing data via map. Userspace access to the
402 * map via a file descriptor. So we need to store the map to fd info. For
403 * that we use bpf_maps_info:: */
404 struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
405 if (bpf_map_data == NULL) {
406 SCLogError("Can't allocate bpf map array");
407 return -1;
408 }
409
410 /* Store the maps in bpf_maps_info:: */
411 bpf_map__for_each(map, bpfobj) {
412 if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
413 SCLogError("Too many BPF maps in eBPF files");
414 break;
415 }
416 SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
417 bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
418 bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map));
419 snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
420 "%s", iface);
421 if (!bpf_map_data->array[bpf_map_data->last].name) {
422 SCLogError("Unable to duplicate map name");
423 BpfMapsInfoFree(bpf_map_data);
424 return -1;
425 }
426 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
427 if (config->flags & EBPF_PINNED_MAPS) {
428 SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
429 bpf_map_data->array[bpf_map_data->last].name);
430 char buf[1024];
431 snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface,
432 bpf_map_data->array[bpf_map_data->last].name);
433 int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
434 if (ret != 0) {
435 SCLogWarning("Can not pin: %s", strerror(errno));
436 }
437 /* Don't unlink pinned maps in XDP mode to avoid a state reset */
438 if (config->flags & EBPF_XDP_CODE) {
439 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
440 } else {
441 bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
442 }
443 }
444 bpf_map_data->last++;
445 }
446
447 /* Attach the bpf_maps_info to the LiveDevice via the device storage */
448 LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
449 LiveDevUseBypass(livedev);
450
451 /* Finally we get the file descriptor for our eBPF program. We will use
452 * the fd to attach the program to the socket (eBPF case) or to the device
453 * (XDP case). */
454 pfd = bpf_program__fd(bpfprog);
455 if (pfd == -1) {
456 SCLogError("Unable to find %s section", section);
457 return -1;
458 }
459
460 SCLogInfo("Successfully loaded eBPF file '%s' on '%s'", path, iface);
461 *val = pfd;
462 return 0;
463}
464
465/**
466 * Attach a XDP program identified by its file descriptor to a device
467 *
468 * \param iface the name of interface
469 * \param fd the eBPF/XDP program file descriptor
470 * \param a flag to pass to attach function mostly used to set XDP mode
471 * \return -1 in case of error, 0 if success
472 */
473int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
474{
475#ifdef HAVE_PACKET_XDP
476 unsigned int ifindex = if_nametoindex(iface);
477 if (ifindex == 0) {
478 SCLogError("Unknown interface '%s'", iface);
479 return -1;
480 }
481#ifdef HAVE_BPF_XDP_ATTACH
482 int err = bpf_xdp_attach(ifindex, fd, flags, NULL);
483#else
484 /* Fall back to legacy API */
485 int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
486#endif
487 if (err != 0) {
488 char buf[129];
489 libbpf_strerror(err, buf, sizeof(buf));
490 SCLogError("Unable to set XDP on '%s': %s (%d)", iface, buf, err);
491 return -1;
492 }
493#endif
494 return 0;
495}
496
497/**
498 * Create a Flow in the table for a Flowkey
499 *
500 * \return false (this create function never returns true)
501 */
502static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, LiveDevice *dev, void *key,
503 size_t skey, FlowKey *flow_key, struct timespec *ctime,
504 uint64_t pkts_cnt, uint64_t bytes_cnt,
505 int mapfd, int cpus_count)
506{
507 Flow *f = NULL;
508 uint32_t hash = FlowKeyGetHash(flow_key);
509
510 f = FlowGetFromFlowKey(flow_key, ctime, hash);
511 if (f == NULL)
512 return false;
513
514 /* set accounting, we can't know the direction, so let's just start to
515 * serve them if we already have something from server to client. We need
516 * these numbers as we will use it to see if we have new traffic coming
517 * on the flow */
519 if (fc == NULL) {
520 fc = SCCalloc(sizeof(FlowBypassInfo), 1);
521 if (fc) {
522 FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED);
524 fc->BypassUpdate = EBPFBypassUpdate;
525 fc->BypassFree = EBPFBypassFree;
526 fc->todstpktcnt = pkts_cnt;
527 fc->todstbytecnt = bytes_cnt;
528 f->livedev = dev;
529 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
530 if (eb == NULL) {
531 SCFree(fc);
533 return false;
534 }
535 void *mkey = SCCalloc(1, skey);
536 if (mkey == NULL) {
537 SCFree(fc);
538 SCFree(eb);
540 return false;
541 }
542 memcpy(mkey, key, skey);
543 eb->key[0] = mkey;
544 eb->mapfd = mapfd;
545 eb->cpus_count = cpus_count;
546 fc->bypass_data = eb;
547 flowstats->count++;
548 } else {
550 return false;
551 }
552 } else {
553 EBPFBypassData *eb = (EBPFBypassData *) fc->bypass_data;
554 if (eb == NULL) {
556 return false;
557 }
558 /* if both keys are here, then it is a flow bypassed by this
559 * instance so we ignore it */
560 if (eb->key[0] && eb->key[1]) {
562 return false;
563 }
564 fc->tosrcpktcnt = pkts_cnt;
565 fc->tosrcbytecnt = bytes_cnt;
566 void *mkey = SCCalloc(1, skey);
567 if (mkey == NULL) {
569 return false;
570 }
571 memcpy(mkey, key, skey);
572 eb->key[1] = mkey;
573 }
574 f->livedev = dev;
576 return false;
577}
578
579void EBPFBypassFree(void *data)
580{
581 EBPFBypassData *eb = (EBPFBypassData *)data;
582 if (eb == NULL)
583 return;
584 SCFree(eb->key[0]);
585 if (eb->key[1]) {
586 SCFree(eb->key[1]);
587 }
588 SCFree(eb);
589}
590
591/**
592 *
593 * Compare eBPF half flow to Flow
594 *
595 * \return true if entries have activity, false if not
596 */
597
598static bool EBPFBypassCheckHalfFlow(Flow *f, FlowBypassInfo *fc,
599 EBPFBypassData *eb, void *key,
600 int index)
601{
602 int i;
603 uint64_t pkts_cnt = 0;
604 uint64_t bytes_cnt = 0;
605 /* We use a per CPU structure so we will get a array of values. But if nr_cpus
606 * is 1 then we have a global hash. */
607 BPF_DECLARE_PERCPU(struct pair, values_array, eb->cpus_count);
608 memset(values_array, 0, sizeof(values_array));
609 int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
610 if (res < 0) {
611 SCLogDebug("errno: (%d) %s", errno, strerror(errno));
612 return false;
613 }
614 for (i = 0; i < eb->cpus_count; i++) {
615 /* let's start accumulating value so we can compute the counters */
616 SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
617 BPF_PERCPU(values_array, i).packets,
618 BPF_PERCPU(values_array, i).bytes);
619 pkts_cnt += BPF_PERCPU(values_array, i).packets;
620 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
621 }
622 if (index == 0) {
623 if (pkts_cnt != fc->todstpktcnt) {
624 fc->todstpktcnt = pkts_cnt;
625 fc->todstbytecnt = bytes_cnt;
626 return true;
627 }
628 } else {
629 if (pkts_cnt != fc->tosrcpktcnt) {
630 fc->tosrcpktcnt = pkts_cnt;
631 fc->tosrcbytecnt = bytes_cnt;
632 return true;
633 }
634 }
635
636 return false;
637}
638
639/** Check both half flows for update
640 *
641 * Update lastts in the flow and do accounting
642 *
643 * */
644bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec)
645{
646 EBPFBypassData *eb = (EBPFBypassData *)data;
647 if (eb == NULL) {
648 return false;
649 }
651 if (fc == NULL) {
652 return false;
653 }
654 bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
655 activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
656 if (!activity) {
657 SCLogDebug("Delete entry: %u (%" PRIu64 ")", FLOW_IS_IPV6(f), FlowGetId(f));
658 /* delete the entries if no time update */
659 EBPFDeleteKey(eb->mapfd, eb->key[0]);
660 EBPFDeleteKey(eb->mapfd, eb->key[1]);
661 SCLogDebug("Done delete entry: %u", FLOW_IS_IPV6(f));
662 } else {
663 f->lastts = SCTIME_FROM_SECS(tsec);
664 return true;
665 }
666 return false;
667}
668
669typedef bool (*OpFlowForKey)(struct flows_stats * flowstats, LiveDevice*dev, void *key,
670 size_t skey, FlowKey *flow_key, struct timespec *ctime,
671 uint64_t pkts_cnt, uint64_t bytes_cnt,
672 int mapfd, int cpus_count);
673
674/**
675 * Bypassed flows iterator for IPv4
676 *
677 * This function iterates on all the flows of the IPv4 table
678 * running a callback function on each flow.
679 */
680static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name,
681 struct timespec *ctime,
682 struct ebpf_timeout_config *tcfg,
683 OpFlowForKey EBPFOpFlowForKey
684 )
685{
686 struct flows_stats flowstats = { 0, 0, 0};
687 int mapfd = EBPFGetMapFDByName(dev->dev, name);
688 if (mapfd == -1)
689 return -1;
690
691 struct flowv4_keys key = {}, next_key;
692 int found = 0;
693 unsigned int i;
694 uint64_t hash_cnt = 0;
695
696 if (tcfg->cpus_count == 0) {
697 return 0;
698 }
699
700 bool dead_flow = false;
701 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
702 uint64_t bytes_cnt = 0;
703 uint64_t pkts_cnt = 0;
704 hash_cnt++;
705 if (dead_flow) {
706 EBPFDeleteKey(mapfd, &key);
707 dead_flow = false;
708 }
709 /* We use a per CPU structure so we will get a array of values. But if nr_cpus
710 * is 1 then we have a global hash. */
711 BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
712 memset(values_array, 0, sizeof(values_array));
713 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
714 if (res < 0) {
715 SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
716 SCLogDebug("errno: (%d) %s", errno, strerror(errno));
717 key = next_key;
718 continue;
719 }
720 for (i = 0; i < tcfg->cpus_count; i++) {
721 /* let's start accumulating value so we can compute the counters */
722 SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
723 BPF_PERCPU(values_array, i).packets,
724 BPF_PERCPU(values_array, i).bytes);
725 pkts_cnt += BPF_PERCPU(values_array, i).packets;
726 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
727 }
728 /* Get the corresponding Flow in the Flow table to compare and update
729 * its counters and lastseen if needed */
730 FlowKey flow_key;
731 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
732 flow_key.sp = ntohs(next_key.port16[0]);
733 flow_key.dp = ntohs(next_key.port16[1]);
734 flow_key.src.addr_data32[0] = next_key.src;
735 flow_key.dst.addr_data32[0] = next_key.dst;
736 } else {
737 flow_key.sp = next_key.port16[0];
738 flow_key.dp = next_key.port16[1];
739 flow_key.src.addr_data32[0] = ntohl(next_key.src);
740 flow_key.dst.addr_data32[0] = ntohl(next_key.dst);
741 }
742 flow_key.src.family = AF_INET;
743 flow_key.src.addr_data32[1] = 0;
744 flow_key.src.addr_data32[2] = 0;
745 flow_key.src.addr_data32[3] = 0;
746 flow_key.dst.family = AF_INET;
747 flow_key.dst.addr_data32[1] = 0;
748 flow_key.dst.addr_data32[2] = 0;
749 flow_key.dst.addr_data32[3] = 0;
750 flow_key.vlan_id[0] = next_key.vlan0;
751 flow_key.vlan_id[1] = next_key.vlan1;
752 flow_key.vlan_id[2] = next_key.vlan2;
753 if (next_key.ip_proto == 1) {
754 flow_key.proto = IPPROTO_TCP;
755 } else {
756 flow_key.proto = IPPROTO_UDP;
757 }
758 flow_key.recursion_level = 0;
759 flow_key.livedev_id = dev->id;
760 dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
761 ctime, pkts_cnt, bytes_cnt,
762 mapfd, tcfg->cpus_count);
763 if (dead_flow) {
764 found = 1;
765 }
766
767 if (TmThreadsCheckFlag(th_v, THV_KILL)) {
768 return 0;
769 }
770
771 key = next_key;
772 }
773 if (dead_flow) {
774 EBPFDeleteKey(mapfd, &key);
775 found = 1;
776 }
777 SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
778
779 LiveDevAddBypassStats(dev, flowstats.count, AF_INET);
780 SCLogInfo("IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
781
782 return found;
783}
784
785/**
786 * Bypassed flows iterator for IPv6
787 *
788 * This function iterates on all the flows of the IPv4 table
789 * running a callback function on each flow.
790 */
791static int EBPFForEachFlowV6Table(ThreadVars *th_v,
792 LiveDevice *dev, const char *name,
793 struct timespec *ctime,
794 struct ebpf_timeout_config *tcfg,
795 OpFlowForKey EBPFOpFlowForKey
796 )
797{
798 struct flows_stats flowstats = { 0, 0, 0};
799 int mapfd = EBPFGetMapFDByName(dev->dev, name);
800 if (mapfd == -1)
801 return -1;
802
803 struct flowv6_keys key = {}, next_key;
804 int found = 0;
805 unsigned int i;
806 uint64_t hash_cnt = 0;
807
808 if (tcfg->cpus_count == 0) {
809 SCLogWarning("CPU count should not be 0");
810 return 0;
811 }
812
813 uint64_t pkts_cnt = 0;
814 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
815 uint64_t bytes_cnt = 0;
816 hash_cnt++;
817 if (pkts_cnt > 0) {
818 EBPFDeleteKey(mapfd, &key);
819 }
820 pkts_cnt = 0;
821 /* We use a per CPU structure so we will get a array of values. But if nr_cpus
822 * is 1 then we have a global hash. */
823 BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
824 memset(values_array, 0, sizeof(values_array));
825 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
826 if (res < 0) {
827 SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
828 key = next_key;
829 continue;
830 }
831 for (i = 0; i < tcfg->cpus_count; i++) {
832 /* let's start accumulating value so we can compute the counters */
833 SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
834 BPF_PERCPU(values_array, i).packets,
835 BPF_PERCPU(values_array, i).bytes);
836 pkts_cnt += BPF_PERCPU(values_array, i).packets;
837 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
838 }
839 /* Get the corresponding Flow in the Flow table to compare and update
840 * its counters and lastseen if needed */
841 FlowKey flow_key;
842 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
843 flow_key.sp = ntohs(next_key.port16[0]);
844 flow_key.dp = ntohs(next_key.port16[1]);
845 flow_key.src.family = AF_INET6;
846 flow_key.src.addr_data32[0] = next_key.src[0];
847 flow_key.src.addr_data32[1] = next_key.src[1];
848 flow_key.src.addr_data32[2] = next_key.src[2];
849 flow_key.src.addr_data32[3] = next_key.src[3];
850 flow_key.dst.family = AF_INET6;
851 flow_key.dst.addr_data32[0] = next_key.dst[0];
852 flow_key.dst.addr_data32[1] = next_key.dst[1];
853 flow_key.dst.addr_data32[2] = next_key.dst[2];
854 flow_key.dst.addr_data32[3] = next_key.dst[3];
855 } else {
856 flow_key.sp = next_key.port16[0];
857 flow_key.dp = next_key.port16[1];
858 flow_key.src.family = AF_INET6;
859 flow_key.src.addr_data32[0] = ntohl(next_key.src[0]);
860 flow_key.src.addr_data32[1] = ntohl(next_key.src[1]);
861 flow_key.src.addr_data32[2] = ntohl(next_key.src[2]);
862 flow_key.src.addr_data32[3] = ntohl(next_key.src[3]);
863 flow_key.dst.family = AF_INET6;
864 flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]);
865 flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]);
866 flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]);
867 flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]);
868 }
869 flow_key.vlan_id[0] = next_key.vlan0;
870 flow_key.vlan_id[1] = next_key.vlan1;
871 flow_key.vlan_id[2] = next_key.vlan2;
872 if (next_key.ip_proto == 1) {
873 flow_key.proto = IPPROTO_TCP;
874 } else {
875 flow_key.proto = IPPROTO_UDP;
876 }
877 flow_key.recursion_level = 0;
878 flow_key.livedev_id = dev->id;
879 pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
880 ctime, pkts_cnt, bytes_cnt,
881 mapfd, tcfg->cpus_count);
882 if (pkts_cnt > 0) {
883 found = 1;
884 }
885
886 if (TmThreadsCheckFlag(th_v, THV_KILL)) {
887 return 0;
888 }
889
890 key = next_key;
891 }
892 if (pkts_cnt > 0) {
893 EBPFDeleteKey(mapfd, &key);
894 found = 1;
895 }
896 SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
897
898 LiveDevAddBypassStats(dev, flowstats.count, AF_INET6);
899 SCLogInfo("IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
900 return found;
901}
902
903
904int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data)
905{
906 LiveDevice *ldev = NULL, *ndev;
907 struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data;
908 while(LiveDeviceForEach(&ldev, &ndev)) {
909 EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4",
910 curtime,
911 cfg, EBPFCreateFlowForKey);
912 EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6",
913 curtime,
914 cfg, EBPFCreateFlowForKey);
915 }
916
917 return 0;
918}
919
920void EBPFRegisterExtension(void)
921{
922 g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree);
923 g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree);
924}
925
926
927#ifdef HAVE_PACKET_XDP
928
929static uint32_t g_redirect_iface_cpu_counter = 0;
930
931static int EBPFAddCPUToMap(const char *iface, uint32_t i)
932{
933 int cpumap = EBPFGetMapFDByName(iface, "cpu_map");
934 uint32_t queue_size = 4096;
935 int ret;
936
937 if (cpumap < 0) {
938 SCLogError("Can't find cpu_map");
939 return -1;
940 }
941 ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
942 if (ret) {
943 SCLogError("Create CPU entry failed (err:%d)", ret);
944 return -1;
945 }
946 int cpus_available = EBPFGetMapFDByName(iface, "cpus_available");
947 if (cpus_available < 0) {
948 SCLogError("Can't find cpus_available map");
949 return -1;
950 }
951
952 ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
953 if (ret) {
954 SCLogError("Create CPU entry failed (err:%d)", ret);
955 return -1;
956 }
957 return 0;
958}
959
960static void EBPFRedirectMapAddCPU(int i, void *data)
961{
962 if (EBPFAddCPUToMap(data, i) < 0) {
963 SCLogError("Unable to add CPU %d to set", i);
964 } else {
965 g_redirect_iface_cpu_counter++;
966 }
967}
968
969void EBPFBuildCPUSet(SCConfNode *node, char *iface)
970{
971 uint32_t key0 = 0;
972 int mapfd = EBPFGetMapFDByName(iface, "cpus_count");
973 if (mapfd < 0) {
974 SCLogError("Unable to find 'cpus_count' map");
975 return;
976 }
977 g_redirect_iface_cpu_counter = 0;
978 if (node == NULL) {
979 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
980 BPF_ANY);
981 return;
982 }
983 if (BuildCpusetWithCallback("xdp-cpu-redirect", node, EBPFRedirectMapAddCPU, iface) < 0) {
984 SCLogWarning("Failed to parse XDP CPU redirect configuration");
985 return;
986 }
987 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
988 BPF_ANY);
989}
990
991/**
992 * Setup peer interface in XDP system
993 *
994 * Ths function set up the peer interface in the XDP maps used by the
995 * bypass filter. The first map tx_peer has type device map and is
996 * used to store the peer. The second map tx_peer_int is used by the
997 * code to check if we have a peer defined for this interface.
998 *
999 * As the map are per device we just need maps with one single element.
1000 * In both case, we use the key 0 to enter element so XDP kernel code
1001 * is using the same key.
1002 */
1003int EBPFSetPeerIface(const char *iface, const char *out_iface)
1004{
1005 int mapfd = EBPFGetMapFDByName(iface, "tx_peer");
1006 if (mapfd < 0) {
1007 SCLogError("Unable to find 'tx_peer' map");
1008 return -1;
1009 }
1010 int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int");
1011 if (intmapfd < 0) {
1012 SCLogError("Unable to find 'tx_peer_int' map");
1013 return -1;
1014 }
1015
1016 int key0 = 0;
1017 unsigned int peer_index = if_nametoindex(out_iface);
1018 if (peer_index == 0) {
1019 SCLogError("No iface '%s'", out_iface);
1020 return -1;
1021 }
1022 int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1023 if (ret) {
1024 SCLogError("Create peer entry failed (err:%d)", ret);
1025 return -1;
1026 }
1027 ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1028 if (ret) {
1029 SCLogError("Create peer entry failed (err:%d)", ret);
1030 return -1;
1031 }
1032 return 0;
1033}
1034
1035/**
1036 * Bypass the flow on all ifaces it is seen on. This is used
1037 * in IPS mode.
1038 */
1039
1040int EBPFUpdateFlow(Flow *f, Packet *p, void *data)
1041{
1042 BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id);
1043 if (ifl == NULL) {
1044 ifl = SCCalloc(1, sizeof(*ifl));
1045 if (ifl == NULL) {
1046 return 0;
1047 }
1048 ifl->dev = p->livedev;
1049 FlowSetStorageById(f, g_flow_storage_id, ifl);
1050 return 1;
1051 }
1052 /* Look for packet iface in the list */
1053 BypassedIfaceList *ldev = ifl;
1054 while (ldev) {
1055 if (p->livedev == ldev->dev) {
1056 return 1;
1057 }
1058 ldev = ldev->next;
1059 }
1060 /* Call bypass function if ever not in the list */
1061 p->BypassPacketsFlow(p);
1062
1063 /* Add iface to the list */
1064 BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl));
1065 if (nifl == NULL) {
1066 return 0;
1067 }
1068 nifl->dev = p->livedev;
1069 nifl->next = ifl;
1070 FlowSetStorageById(f, g_flow_storage_id, nifl);
1071 return 1;
1072}
1073
1074#endif /* HAVE_PACKET_XDP */
1075
1076#endif
struct HtpBodyChunk_ * next
uint8_t flags
Definition decode-gre.h:0
Flow * FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash)
Get or create a Flow using a FlowKey.
Definition flow-hash.c:1097
uint32_t FlowKeyGetHash(FlowKey *fk)
Definition flow-hash.c:314
void * FlowGetStorageById(const Flow *f, FlowStorageId id)
int FlowSetStorageById(Flow *f, FlowStorageId id, void *ptr)
FlowStorageId FlowStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
FlowStorageId GetFlowBypassInfoID(void)
Definition flow-util.c:222
void FlowUpdateState(Flow *f, const enum FlowState s)
Definition flow.c:1162
#define FLOW_IS_IPV6(f)
Definition flow.h:172
#define FLOWLOCK_UNLOCK(fb)
Definition flow.h:273
int LiveDevSetStorageById(LiveDevice *d, LiveDevStorageId id, void *ptr)
Store a pointer in a given LiveDevice storage.
void * LiveDevGetStorageById(LiveDevice *d, LiveDevStorageId id)
Get a value from a given LiveDevice storage.
LiveDevStorageId LiveDevStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Register a LiveDevice storage.
char family
Definition decode.h:113
void(* BypassFree)(void *data)
Definition flow.h:531
void * bypass_data
Definition flow.h:532
uint64_t tosrcpktcnt
Definition flow.h:533
uint64_t tosrcbytecnt
Definition flow.h:534
uint64_t todstpktcnt
Definition flow.h:535
bool(* BypassUpdate)(Flow *f, void *data, time_t tsec)
Definition flow.h:530
uint64_t todstbytecnt
Definition flow.h:536
Port sp
Definition flow.h:311
uint8_t recursion_level
Definition flow.h:313
uint16_t vlan_id[VLAN_MAX_LAYERS]
Definition flow.h:315
Address dst
Definition flow.h:310
Port dp
Definition flow.h:311
uint16_t livedev_id
Definition flow.h:314
uint8_t proto
Definition flow.h:312
Address src
Definition flow.h:310
Flow data structure.
Definition flow.h:356
SCTime_t lastts
Definition flow.h:410
struct LiveDevice_ * livedev
Definition flow.h:398
struct LiveDevice_ * livedev
Definition decode.h:618
int(* BypassPacketsFlow)(struct Packet_ *)
Definition decode.h:594
Per thread variable structure.
Definition threadvars.h:58
uint64_t count
Definition flow-bypass.h:30
uint64_t packets
Definition flow-bypass.h:31
#define THV_KILL
Definition threadvars.h:40
int TmThreadsCheckFlag(ThreadVars *tv, uint32_t flag)
Check if a thread flag is set.
Definition tm-threads.c:93
const char * name
int BuildCpusetWithCallback(const char *name, SCConfNode *node, void(*Callback)(int i, void *data), void *data)
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
#define SCLogDebug(...)
Definition util-debug.h:275
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition util-debug.h:255
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition util-debug.h:225
#define SCLogError(...)
Macro used to log ERROR messages.
Definition util-debug.h:267
#define SCLogConfig(...)
Definition util-debug.h:229
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
LiveDevice * LiveDeviceForEach(LiveDevice **ldev, LiveDevice **ndev)
void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family)
int LiveDevUseBypass(LiveDevice *dev)
#define SCFree(p)
Definition util-mem.h:61
#define SCCalloc(nm, sz)
Definition util-mem.h:53
#define SCStrdup(s)
Definition util-mem.h:56
#define SCTIME_FROM_SECS(s)
Definition util-time.h:69