From 959a913bd685ebbb1789268ded9fcc2f8b529605 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sun, 8 Dec 2024 15:26:27 +0100 Subject: [PATCH 01/10] runmodes: query the active runmode with a function call --- src/runmodes.c | 10 ++++++++++ src/runmodes.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/runmodes.c b/src/runmodes.c index 006199bb94c6..a484aed15135 100644 --- a/src/runmodes.c +++ b/src/runmodes.c @@ -196,6 +196,16 @@ char *RunmodeGetActive(void) return active_runmode; } +bool RunmodeIsWorkers(void) +{ + return (strcmp(RunmodeGetActive(), "workers") == 0); +} + +bool RunmodeIsAutofp(void) +{ + return (strcmp(RunmodeGetActive(), "autofp") == 0); +} + /** * Return the running mode * diff --git a/src/runmodes.h b/src/runmodes.h index cce5fcbbaa42..56bbe76f7f3f 100644 --- a/src/runmodes.h +++ b/src/runmodes.h @@ -75,6 +75,8 @@ extern const char *thread_name_counter_stats; extern const char *thread_name_counter_wakeup; char *RunmodeGetActive(void); +bool RunmodeIsWorkers(void); +bool RunmodeIsAutofp(void); const char *RunModeGetMainMode(void); void RunModeListRunmodes(void); From 5bf2a43b6145cdb6a582409c6dafd6259c9a0267 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:31:13 +0100 Subject: [PATCH 02/10] dpdk: move DPDK socket retrieval to utils --- src/runmode-dpdk.c | 25 ++----------------------- src/util-dpdk.c | 43 +++++++++++++++++++++++++++++++++++++++++++ src/util-dpdk.h | 2 ++ 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index 5df81f685883..6bbe3c1f2ed6 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -1121,27 +1121,6 @@ static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu) #endif } -/** - * \param port_id - queried port - * \param socket_id - socket ID of the queried port - * \return non-negative number on success, negative on failure (errno) - */ -static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id) -{ - rte_errno = 0; - int retval = rte_eth_dev_socket_id(port_id); - *socket_id = retval; - -#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11 - retval = -rte_errno; -#else - if (retval == SOCKET_ID_ANY) - retval = 0; // DPDK couldn't determine socket ID of a port -#endif - - return retval; -} - static void PortConfSetInterruptMode(const DPDKIfaceConfig *iconf, struct rte_eth_conf *port_conf) { SCLogConfig("%s: interrupt mode is %s", iconf->iface, @@ -1383,7 +1362,7 @@ static int DeviceConfigureIPS(DPDKIfaceConfig *iconf) SCReturnInt(-ENODEV); } int32_t out_port_socket_id; - int retval = DeviceSetSocketID(iconf->out_port_id, &out_port_socket_id); + int retval = DPDKDeviceSetSocketID(iconf->out_port_id, &out_port_socket_id); if (retval < 0) { SCLogError("%s: invalid socket id: %s", iconf->out_iface, rte_strerror(-retval)); SCReturnInt(retval); @@ -1462,7 +1441,7 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf) SCReturnInt(-ENODEV); } - int32_t retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id); + int32_t retval = DPDKDeviceSetSocketID(iconf->port_id, &iconf->socket_id); if (retval < 0) { SCLogError("%s: invalid socket id: %s", iconf->iface, rte_strerror(-retval)); SCReturnInt(retval); diff --git a/src/util-dpdk.c b/src/util-dpdk.c index b5f46a30a5d8..dc54f7dcd87f 100644 --- a/src/util-dpdk.c +++ b/src/util-dpdk.c @@ -65,6 +65,49 @@ void DPDKFreeDevice(LiveDevice *ldev) #endif } +/** + * \param port_id - queried port + * \param socket_id - socket ID of the queried port + * \return non-negative number on success, negative on failure (errno) + */ +int32_t DPDKDeviceSetSocketID(uint16_t port_id, int32_t *socket_id) +{ +#ifdef HAVE_DPDK + rte_errno = 0; + int retval = rte_eth_dev_socket_id(port_id); + *socket_id = retval; + +#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11 + retval = -rte_errno; +#else + if (retval == SOCKET_ID_ANY) + retval = 0; // DPDK couldn't determine socket ID of a port +#endif + + return retval; +#endif /* HAVE_DPDK */ + return -ENOTSUP; +} + +/** + * \param iface_name - name of the queried interface + * \param socket_id - socket ID of the queried port + * \return non-negative number on success, negative on failure (errno) + */ +int32_t DPDKDeviceNameSetSocketID(char *iface_name, int32_t *socket_id) +{ +#ifdef HAVE_DPDK + uint16_t port_id = 0; + int r = rte_eth_dev_get_port_by_name(iface_name, &port_id); + if (r < 0) { + SCLogError("%s: interface not found: %s", iface_name, rte_strerror(-r)); + SCReturnInt(r); + } + return DPDKDeviceSetSocketID(port_id, socket_id); +#endif /* HAVE_DPDK */ + return -ENOTSUP; +} + #ifdef HAVE_DPDK /** * Retrieves name of the port from port id diff --git a/src/util-dpdk.h b/src/util-dpdk.h index 1fb3532f5d4d..0c72dfc269d6 100644 --- a/src/util-dpdk.h +++ b/src/util-dpdk.h @@ -121,6 +121,8 @@ void DPDKCleanupEAL(void); void DPDKCloseDevice(LiveDevice *ldev); void DPDKFreeDevice(LiveDevice *ldev); +int32_t DPDKDeviceSetSocketID(uint16_t port_id, int32_t *socket_id); +int32_t DPDKDeviceNameSetSocketID(char *iface_name, int32_t *socket_id); #ifdef HAVE_DPDK const char *DPDKGetPortNameByPortID(uint16_t pid); From 7ea388382a72c28bd49c3b7d59b447b0285c34f7 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Tue, 3 Sep 2024 13:23:44 +0200 Subject: [PATCH 03/10] github-ci: install hwloc as a mandatory dependency --- .github/workflows/builds.yml | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 04a2f1d5102c..77ffe8008bb7 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -102,6 +102,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -258,6 +260,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -353,6 +357,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -504,6 +510,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -601,6 +609,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -693,6 +703,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ hiredis-devel \ jansson-devel \ jq \ @@ -791,6 +803,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -889,6 +903,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -987,6 +1003,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1080,6 +1098,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1165,6 +1185,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1261,6 +1283,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ libasan \ libtool \ libyaml-devel \ @@ -1382,6 +1406,8 @@ jobs: coccinelle \ dpdk-dev \ git \ + hwloc \ + libhwloc-dev \ jq \ libcap-ng-dev \ libevent-dev \ @@ -1454,6 +1480,8 @@ jobs: clang-14 \ curl \ git \ + hwloc \ + libhwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -1591,6 +1619,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + libhwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -1696,6 +1726,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + libhwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -1835,6 +1867,8 @@ jobs: cbindgen \ clang-18 \ git \ + hwloc \ + libhwloc-dev \ jq \ libc++-dev \ libc++abi-dev \ @@ -1922,6 +1956,8 @@ jobs: cbindgen \ clang-18 \ git \ + hwloc \ + libhwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -2023,6 +2059,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + libhwloc-dev \ jq \ libc++-dev \ libc++abi-dev \ @@ -2122,6 +2160,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + libhwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2205,6 +2245,8 @@ jobs: apt -y install \ build-essential \ curl \ + hwloc \ + libhwloc-dev \ libtool \ libpcap-dev \ libnet1-dev \ @@ -2274,6 +2316,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + libhwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2358,6 +2402,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + libhwloc-dev \ libtool \ libpcap-dev \ libnet1-dev \ @@ -2423,6 +2469,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + libhwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2562,6 +2610,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + libhwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2660,6 +2710,8 @@ jobs: curl \ dpdk-dev \ git \ + hwloc \ + libhwloc-dev \ jq \ make \ libpcre3 \ @@ -2763,6 +2815,8 @@ jobs: cmake \ curl \ git \ + hwloc \ + libhwloc-dev \ jq \ make \ libpcre3 \ @@ -2844,6 +2898,8 @@ jobs: curl \ dpdk-dev \ git \ + hwloc \ + libhwloc-dev \ jq \ make \ libpcre3 \ @@ -2928,6 +2984,8 @@ jobs: ccache \ curl \ git \ + hwloc \ + libhwloc-dev \ jq \ libpcre2-dev \ libpcap-dev \ @@ -3001,6 +3059,8 @@ jobs: ccache \ curl \ git \ + hwloc \ + libhwloc-dev \ jq \ libpcre2-dev \ libpcap-dev \ @@ -3065,6 +3125,7 @@ jobs: cbindgen \ curl \ hiredis \ + hwloc \ jansson \ jq \ libmagic \ @@ -3294,6 +3355,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ libtool \ libyaml-devel \ From b83ee3832defbe87c85f0c1a99d294e8307a71de Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Thu, 19 Dec 2024 21:52:20 +0100 Subject: [PATCH 04/10] actions: test hwloc build --- .github/workflows/builds.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 77ffe8008bb7..e8a1591189b6 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -2674,7 +2674,7 @@ jobs: - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh - - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-dpdk + - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-dpdk --enable-hwloc - run: make -j ${{ env.CPUS }} - run: make check # IDS config From b856bc9589229a19b8013110054e6f1e28a0bf9b Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Thu, 2 Jan 2025 18:35:52 +0100 Subject: [PATCH 05/10] threading: refactor CPU affinity code Split the code into multiple functions for easier readability. --- src/util-affinity.c | 327 +++++++++++++++++++++++++++----------------- 1 file changed, 200 insertions(+), 127 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 8a224711e884..5ec027e69ae0 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -33,28 +33,28 @@ ThreadsAffinityType thread_affinity[MAX_CPU_SET] = { { - .name = "receive-cpu-set", - .mode_flag = EXCLUSIVE_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "receive-cpu-set", + .mode_flag = EXCLUSIVE_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = 0, }, { - .name = "worker-cpu-set", - .mode_flag = EXCLUSIVE_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "worker-cpu-set", + .mode_flag = EXCLUSIVE_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = 0, }, { - .name = "verdict-cpu-set", - .mode_flag = BALANCED_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "verdict-cpu-set", + .mode_flag = BALANCED_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = 0, }, { - .name = "management-cpu-set", - .mode_flag = BALANCED_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "management-cpu-set", + .mode_flag = BALANCED_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = 0, }, }; @@ -82,7 +82,7 @@ static void AffinitySetupInit(void) int i, j; int ncpu = UtilCpuGetNumProcessorsConfigured(); - SCLogDebug("Initialize affinity setup\n"); + SCLogDebug("Initialize CPU affinity setup"); /* be conservative relatively to OS: use all cpus by default */ for (i = 0; i < MAX_CPU_SET; i++) { cpu_set_t *cs = &thread_affinity[i].cpu_set; @@ -141,8 +141,9 @@ void BuildCpusetWithCallback(const char *name, ConfNode *node, for (i = a; i<= b; i++) { Callback(i, data); } - if (stop) + if (stop) { break; + } } } @@ -155,122 +156,198 @@ static void BuildCpuset(const char *name, ConfNode *node, cpu_set_t *cpu) { BuildCpusetWithCallback(name, node, AffinityCallback, (void *) cpu); } -#endif /* OS_WIN32 and __OpenBSD__ */ /** - * \brief Extract cpu affinity configuration from current config file + * \brief Get the appropriate set name for a given affinity value. + */ +static const char *GetAffinitySetName(const char *val) +{ + if (strcmp(val, "decode-cpu-set") == 0 || strcmp(val, "stream-cpu-set") == 0 || + strcmp(val, "reject-cpu-set") == 0 || strcmp(val, "output-cpu-set") == 0) { + return NULL; + } + + return (strcmp(val, "detect-cpu-set") == 0) ? "worker-cpu-set" : val; +} + +/** + * \brief Set up CPU sets for the given affinity type. + */ +static void SetupCpuSets(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) +{ + CPU_ZERO(&taf->cpu_set); + + ConfNode *cpu_node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); + if (cpu_node != NULL) { + BuildCpuset(setname, cpu_node, &taf->cpu_set); + } else { + SCLogWarning("Unable to find 'cpu' node for set %s", setname); + } +} + +/** + * \brief Build a priority CPU set for the given priority level. + */ +static void BuildPriorityCpuset(ThreadsAffinityType *taf, ConfNode *prio_node, const char *priority, + cpu_set_t *cpuset, const char *setname) +{ + ConfNode *node = ConfNodeLookupChild(prio_node, priority); + if (node != NULL) { + BuildCpuset(setname, node, cpuset); + } else { + SCLogDebug("Unable to find '%s' priority for set %s", priority, setname); + } +} + +/** + * \brief Set up the default priority for the given affinity type. */ +static void SetupDefaultPriority(ThreadsAffinityType *taf, ConfNode *prio_node, const char *setname) +{ + ConfNode *default_node = ConfNodeLookupChild(prio_node, "default"); + if (default_node == NULL) { + return; + } + + if (strcmp(default_node->val, "low") == 0) { + taf->prio = PRIO_LOW; + } else if (strcmp(default_node->val, "medium") == 0) { + taf->prio = PRIO_MEDIUM; + } else if (strcmp(default_node->val, "high") == 0) { + taf->prio = PRIO_HIGH; + } else { + FatalError("Unknown default CPU affinity priority: %s", default_node->val); + } + + SCLogConfig("Using default priority '%s' for set %s", default_node->val, setname); +} + +/** + * \brief Set up priority CPU sets for the given affinity type. + */ +static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) +{ + CPU_ZERO(&taf->lowprio_cpu); + CPU_ZERO(&taf->medprio_cpu); + CPU_ZERO(&taf->hiprio_cpu); + + ConfNode *prio_node = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); + if (prio_node == NULL) { + return; + } + + BuildPriorityCpuset(taf, prio_node, "low", &taf->lowprio_cpu, setname); + BuildPriorityCpuset(taf, prio_node, "medium", &taf->medprio_cpu, setname); + BuildPriorityCpuset(taf, prio_node, "high", &taf->hiprio_cpu, setname); + + SetupDefaultPriority(taf, prio_node, setname); +} + +/** + * \brief Set up CPU affinity mode for the given affinity type. + */ +static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) +{ + ConfNode *mode_node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); + if (mode_node == NULL) { + return; + } + + if (strcmp(mode_node->val, "exclusive") == 0) { + taf->mode_flag = EXCLUSIVE_AFFINITY; + } else if (strcmp(mode_node->val, "balanced") == 0) { + taf->mode_flag = BALANCED_AFFINITY; + } else { + FatalError("Unknown CPU affinity mode: %s", mode_node->val); + } +} + +/** + * \brief Set up the number of threads for the given affinity type. + */ +static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity) +{ + ConfNode *threads_node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); + if (threads_node == NULL) { + return; + } + + if (StringParseUint32(&taf->nb_threads, 10, 0, threads_node->val) < 0 || taf->nb_threads == 0) { + FatalError("Invalid thread count: %s", threads_node->val); + } +} + +static bool AllCPUsUsed(ThreadsAffinityType *taf) +{ + if (taf->lcpu < UtilCpuGetNumProcessorsOnline()) { + return false; + } + return true; +} + +static void ResetCPUs(ThreadsAffinityType *taf) +{ + taf->lcpu = 0; +} + +static uint16_t GetNextAvailableCPU(ThreadsAffinityType *taf) +{ + uint16_t cpu = taf->lcpu; + int attempts = 0; + + while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) { + cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline(); + if (cpu == 0) + attempts++; + } + + taf->lcpu = cpu + 1; + if (attempts == 2) { + SCLogError( + "cpu_set does not contain available CPUs, CPU affinity configuration is invalid"); + } + + return cpu; +} +#endif /* OS_WIN32 and __OpenBSD__ */ + +/** + * \brief Extract CPU affinity configuration from current config file + */ void AffinitySetupLoadFromConfig(void) { #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - ConfNode *root = ConfGetNode("threading.cpu-affinity"); - ConfNode *affinity; - if (thread_affinity_init_done == 0) { AffinitySetupInit(); thread_affinity_init_done = 1; } - SCLogDebug("Load affinity from config\n"); + SCLogDebug("Loading threading.cpu-affinity from config"); + ConfNode *root = ConfGetNode("threading.cpu-affinity"); if (root == NULL) { - SCLogInfo("can't get cpu-affinity node"); + SCLogInfo("Cannot find threading.cpu-affinity node in config"); return; } + ConfNode *affinity; TAILQ_FOREACH(affinity, &root->head, next) { - if (strcmp(affinity->val, "decode-cpu-set") == 0 || - strcmp(affinity->val, "stream-cpu-set") == 0 || - strcmp(affinity->val, "reject-cpu-set") == 0 || - strcmp(affinity->val, "output-cpu-set") == 0) { + const char *setname = GetAffinitySetName(affinity->val); + if (setname == NULL) { continue; } - const char *setname = affinity->val; - if (strcmp(affinity->val, "detect-cpu-set") == 0) - setname = "worker-cpu-set"; - ThreadsAffinityType *taf = GetAffinityTypeFromName(setname); - ConfNode *node = NULL; - ConfNode *nprio = NULL; - if (taf == NULL) { - FatalError("unknown cpu-affinity type"); - } else { - SCLogConfig("Found affinity definition for \"%s\"", setname); - } - - CPU_ZERO(&taf->cpu_set); - node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); - if (node == NULL) { - SCLogInfo("unable to find 'cpu'"); - } else { - BuildCpuset(setname, node, &taf->cpu_set); - } - - CPU_ZERO(&taf->lowprio_cpu); - CPU_ZERO(&taf->medprio_cpu); - CPU_ZERO(&taf->hiprio_cpu); - nprio = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); - if (nprio != NULL) { - node = ConfNodeLookupChild(nprio, "low"); - if (node == NULL) { - SCLogDebug("unable to find 'low' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->lowprio_cpu); - } - - node = ConfNodeLookupChild(nprio, "medium"); - if (node == NULL) { - SCLogDebug("unable to find 'medium' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->medprio_cpu); - } - - node = ConfNodeLookupChild(nprio, "high"); - if (node == NULL) { - SCLogDebug("unable to find 'high' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->hiprio_cpu); - } - node = ConfNodeLookupChild(nprio, "default"); - if (node != NULL) { - if (!strcmp(node->val, "low")) { - taf->prio = PRIO_LOW; - } else if (!strcmp(node->val, "medium")) { - taf->prio = PRIO_MEDIUM; - } else if (!strcmp(node->val, "high")) { - taf->prio = PRIO_HIGH; - } else { - FatalError("unknown cpu_affinity prio"); - } - SCLogConfig("Using default prio '%s' for set '%s'", - node->val, setname); - } + FatalError("Unknown CPU affinity type: %s", setname); } - node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); - if (node != NULL) { - if (!strcmp(node->val, "exclusive")) { - taf->mode_flag = EXCLUSIVE_AFFINITY; - } else if (!strcmp(node->val, "balanced")) { - taf->mode_flag = BALANCED_AFFINITY; - } else { - FatalError("unknown cpu_affinity node"); - } - } + SCLogConfig("Found CPU affinity definition for \"%s\"", setname); - node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); - if (node != NULL) { - if (StringParseUint32(&taf->nb_threads, 10, 0, (const char *)node->val) < 0) { - FatalError("invalid value for threads " - "count: '%s'", - node->val); - } - if (! taf->nb_threads) { - FatalError("bad value for threads count"); - } - } + SetupCpuSets(taf, affinity, setname); + SetupAffinityPriority(taf, affinity, setname); + SetupAffinityMode(taf, affinity); + SetupAffinityThreads(taf, affinity); } #endif /* OS_WIN32 and __OpenBSD__ */ } @@ -283,37 +360,32 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) { uint16_t ncpu = 0; #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - int iter = 0; SCMutexLock(&taf->taf_mutex); - ncpu = taf->lcpu; - while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { - ncpu++; - if (ncpu >= UtilCpuGetNumProcessorsOnline()) { - ncpu = 0; - iter++; - } - } - if (iter == 2) { - SCLogError("cpu_set does not contain " - "available cpus, cpu affinity conf is invalid"); + ncpu = GetNextAvailableCPU(taf); + + if (AllCPUsUsed(taf)) { + ResetCPUs(taf); } - taf->lcpu = ncpu + 1; - if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) - taf->lcpu = 0; - SCMutexUnlock(&taf->taf_mutex); + SCLogDebug("Setting affinity on CPU %d", ncpu); + SCMutexUnlock(&taf->taf_mutex); #endif /* OS_WIN32 and __OpenBSD__ */ return ncpu; } +/** + * \brief Return the total number of CPUs in a given affinity + * \retval the number of affined CPUs + */ uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf) { uint16_t ncpu = 0; #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun SCMutexLock(&taf->taf_mutex); for (int i = UtilCpuGetNumProcessorsOnline(); i >= 0; i--) - if (CPU_ISSET(i, &taf->cpu_set)) + if (CPU_ISSET(i, &taf->cpu_set)) { ncpu++; + } SCMutexUnlock(&taf->taf_mutex); #endif return ncpu; @@ -339,8 +411,9 @@ uint16_t UtilAffinityCpusOverlap(ThreadsAffinityType *taf1, ThreadsAffinityType SCMutexUnlock(&taf1->taf_mutex); for (int i = UtilCpuGetNumProcessorsOnline(); i >= 0; i--) - if (CPU_ISSET(i, &tmpcset)) + if (CPU_ISSET(i, &tmpcset)) { return 1; + } return 0; } From 3ac112cdcf63bba21126d99e9cf66aab5e9bb0a1 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:42:21 +0100 Subject: [PATCH 06/10] util-affinity: move properties of *-cpu-set node one layer up in YAML --- src/util-affinity.c | 11 ++++------- suricata.yaml.in | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 5ec027e69ae0..dc139824aba4 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -176,8 +176,7 @@ static const char *GetAffinitySetName(const char *val) static void SetupCpuSets(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) { CPU_ZERO(&taf->cpu_set); - - ConfNode *cpu_node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); + ConfNode *cpu_node = ConfNodeLookupChild(affinity, "cpu"); if (cpu_node != NULL) { BuildCpuset(setname, cpu_node, &taf->cpu_set); } else { @@ -230,8 +229,7 @@ static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, CPU_ZERO(&taf->lowprio_cpu); CPU_ZERO(&taf->medprio_cpu); CPU_ZERO(&taf->hiprio_cpu); - - ConfNode *prio_node = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); + ConfNode *prio_node = ConfNodeLookupChild(affinity, "prio"); if (prio_node == NULL) { return; } @@ -239,7 +237,6 @@ static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, BuildPriorityCpuset(taf, prio_node, "low", &taf->lowprio_cpu, setname); BuildPriorityCpuset(taf, prio_node, "medium", &taf->medprio_cpu, setname); BuildPriorityCpuset(taf, prio_node, "high", &taf->hiprio_cpu, setname); - SetupDefaultPriority(taf, prio_node, setname); } @@ -248,7 +245,7 @@ static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, */ static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) { - ConfNode *mode_node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); + ConfNode *mode_node = ConfNodeLookupChild(affinity, "mode"); if (mode_node == NULL) { return; } @@ -267,7 +264,7 @@ static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) */ static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity) { - ConfNode *threads_node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); + ConfNode *threads_node = ConfNodeLookupChild(affinity, "threads"); if (threads_node == NULL) { return; } diff --git a/suricata.yaml.in b/suricata.yaml.in index 4bc9e87aa2af..0eba8a376791 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1790,24 +1790,24 @@ threading: # cpu-affinity: - management-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings + cpu: [ 0 ] # include only these CPUs in affinity settings - receive-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings + cpu: [ 0 ] # include only these CPUs in affinity settings - worker-cpu-set: - cpu: [ "all" ] - mode: "exclusive" - # Use explicitly 3 threads and don't compute number by using - # detect-thread-ratio variable: - # threads: 3 - prio: - low: [ 0 ] - medium: [ "1-2" ] - high: [ 3 ] - default: "medium" + cpu: [ "all" ] + mode: "exclusive" + # Use explicitly 3 threads and don't compute number by using + # detect-thread-ratio variable: + # threads: 3 + prio: + low: [ 0 ] + medium: [ "1-2" ] + high: [ 3 ] + default: "medium" #- verdict-cpu-set: - # cpu: [ 0 ] - # prio: - # default: "high" + # cpu: [ 0 ] + # prio: + # default: "high" # # By default Suricata creates one "detect" thread per available CPU/CPU core. # This setting allows controlling this behaviour. A ratio setting of 2 will From fb5ae7cbf867215bbcfa4c02a27b0f76d035aa42 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:47:43 +0100 Subject: [PATCH 07/10] threading: transform *-cpu-set nodes from list items to nodes Part of Ticket 2321 work to remove unnecessary lists from the config file. Ticket: 2321 --- src/util-affinity.c | 2 +- suricata.yaml.in | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index dc139824aba4..196a0e3b420d 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -329,7 +329,7 @@ void AffinitySetupLoadFromConfig(void) ConfNode *affinity; TAILQ_FOREACH(affinity, &root->head, next) { - const char *setname = GetAffinitySetName(affinity->val); + const char *setname = GetAffinitySetName(affinity->name); if (setname == NULL) { continue; } diff --git a/suricata.yaml.in b/suricata.yaml.in index 0eba8a376791..c56a0778dfa8 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1789,11 +1789,11 @@ threading: # verdict-cpu-set is used for IPS verdict threads # cpu-affinity: - - management-cpu-set: + management-cpu-set: cpu: [ 0 ] # include only these CPUs in affinity settings - - receive-cpu-set: + receive-cpu-set: cpu: [ 0 ] # include only these CPUs in affinity settings - - worker-cpu-set: + worker-cpu-set: cpu: [ "all" ] mode: "exclusive" # Use explicitly 3 threads and don't compute number by using @@ -1804,7 +1804,7 @@ threading: medium: [ "1-2" ] high: [ 3 ] default: "medium" - #- verdict-cpu-set: + #verdict-cpu-set: # cpu: [ 0 ] # prio: # default: "high" From b810eec7c059c285a4f1a0ad4bfb4c95c86a24fa Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 3 Jan 2025 13:08:49 +0100 Subject: [PATCH 08/10] threading: support previous threading configuration format Provide backward compatibility with the previous configuration format to allow smooth transition to the new format. The commit adds docs about the new format and the introduced changes. --- doc/userguide/capture-hardware/dpdk.rst | 12 ++-- doc/userguide/configuration/suricata-yaml.rst | 14 +++-- .../performance/high-performance-config.rst | 44 +++++++------- .../setting-up-ipsinline-for-linux.rst | 8 +-- doc/userguide/upgrade.rst | 16 +++++ src/util-affinity.c | 58 +++++++++++++++++-- 6 files changed, 109 insertions(+), 43 deletions(-) diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 6be7278b8cbb..480fd6f92152 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -139,12 +139,12 @@ management and worker CPU set. threading: set-cpu-affinity: yes cpu-affinity: - - management-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings - - receive-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings - - worker-cpu-set: - cpu: [ 2,4,6,8 ] + management-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + receive-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + worker-cpu-set: + cpu: [ 2,4,6,8 ] ... Interrupt (power-saving) mode diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 8004ecce9167..165e1131e538 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -922,12 +922,14 @@ per available CPU/CPU core. :: - cpu-affinity: - - management-cpu-set: + threading: + set-cpu-affinity: yes + cpu-affinity: + management-cpu-set: cpu: [ 0 ] # include only these cpus in affinity settings - - receive-cpu-set: + receive-cpu-set: cpu: [ 0 ] # include only these cpus in affinity settings - - worker-cpu-set: + worker-cpu-set: cpu: [ "all" ] mode: "exclusive" # Use explicitly 3 threads and don't compute number by using @@ -938,7 +940,7 @@ per available CPU/CPU core. medium: [ "1-2" ] high: [ 3 ] default: "medium" - - verdict-cpu-set: + verdict-cpu-set: cpu: [ 0 ] prio: default: "high" @@ -2206,7 +2208,7 @@ missing, the corresponding value of the `default` interface is used. The worker threads must be assigned to specific cores. The configuration module `threading` must be used to set thread affinity. Worker threads can be pinned to cores in the array configured in -`threading.cpu-affinity["worker-cpu-set"]`. Performance-oriented setups have +`threading.cpu-affinity.worker-cpu-set`. Performance-oriented setups have everything (the NIC, memory, and CPU cores interacting with the NIC) based on one NUMA node. It is therefore required to know the layout of the server architecture to get the diff --git a/doc/userguide/performance/high-performance-config.rst b/doc/userguide/performance/high-performance-config.rst index 7d54f7b6d0e5..1070405140cd 100644 --- a/doc/userguide/performance/high-performance-config.rst +++ b/doc/userguide/performance/high-performance-config.rst @@ -202,18 +202,18 @@ In the cpu affinity section of suricata.yaml config: # Suricata is multi-threaded. Here the threading can be influenced. threading: cpu-affinity: - - management-cpu-set: - cpu: [ "1-10" ] # include only these CPUs in affinity settings - - receive-cpu-set: - cpu: [ "0-10" ] # include only these CPUs in affinity settings - - worker-cpu-set: - cpu: [ "18-35", "54-71" ] - mode: "exclusive" - prio: - low: [ 0 ] - medium: [ "1" ] - high: [ "18-35","54-71" ] - default: "high" + management-cpu-set: + cpu: [ "1-10" ] # include only these CPUs in affinity settings + receive-cpu-set: + cpu: [ "0-10" ] # include only these CPUs in affinity settings + worker-cpu-set: + cpu: [ "18-35", "54-71" ] + mode: "exclusive" + prio: + low: [ 0 ] + medium: [ "1" ] + high: [ "18-35","54-71" ] + default: "high" In the af-packet section of suricata.yaml config : @@ -326,16 +326,16 @@ In the cpu affinity section of suricata.yaml config : threading: set-cpu-affinity: yes cpu-affinity: - - management-cpu-set: - cpu: [ "120-127" ] # include only these cpus in affinity settings - - receive-cpu-set: - cpu: [ 0 ] # include only these cpus in affinity settings - - worker-cpu-set: - cpu: [ "8-55" ] - mode: "exclusive" - prio: - high: [ "8-55" ] - default: "high" + management-cpu-set: + cpu: [ "120-127" ] # include only these cpus in affinity settings + receive-cpu-set: + cpu: [ 0 ] # include only these cpus in affinity settings + worker-cpu-set: + cpu: [ "8-55" ] + mode: "exclusive" + prio: + high: [ "8-55" ] + default: "high" In the af-packet section of suricata.yaml config: diff --git a/doc/userguide/setting-up-ipsinline-for-linux.rst b/doc/userguide/setting-up-ipsinline-for-linux.rst index fd4fcb6b2be3..8952b414efac 100644 --- a/doc/userguide/setting-up-ipsinline-for-linux.rst +++ b/doc/userguide/setting-up-ipsinline-for-linux.rst @@ -342,10 +342,10 @@ The following snippet shows a possible :ref:`suricata-yaml-threading` configurat threading: set-cpu-affinity: yes cpu-affinity: - - management-cpu-set: - cpu: [ 0 ] - - worker-cpu-set: - cpu: [ 2,4,6,8,10,12,14,16 ] + management-cpu-set: + cpu: [ 0 ] + worker-cpu-set: + cpu: [ 2,4,6,8,10,12,14,16 ] Netmap IPS mode ~~~~~~~~~~~~~~~ diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index 4bf74b65284d..18ecabf221f6 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -82,6 +82,22 @@ Major changes - Unknown requirements in the ``requires`` keyword will now be treated as unmet requirements, causing the rule to not be loaded. See :ref:`keyword_requires`. +- The configuration structure of ``threading.cpu-affinity`` has been changed + from a list format to a dictionary format. Additionally, member properties of + `*-cpu-set` nodes have been moved one level up. + The support for list items such as `- worker-cpu-set`, `- management-cpu-set`, + etc. is deprecated and will be removed in Suricata 9.0. + Convert to the new configuration format as shown in the example below or as + described in :ref:`suricata-yaml-threading`. + + .. code-block:: diff + + threading: + cpu-affinity: + - - worker-cpu-set: + - cpu: [0, 1] + + worker-cpu-set: + + cpu: [0, 1] Removals ~~~~~~~~ diff --git a/src/util-affinity.c b/src/util-affinity.c index 196a0e3b420d..ee365372702a 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -23,6 +23,7 @@ */ #include "suricata-common.h" +#include "suricata.h" #define _THREAD_AFFINITY #include "util-affinity.h" #include "conf.h" @@ -307,6 +308,43 @@ static uint16_t GetNextAvailableCPU(ThreadsAffinityType *taf) return cpu; } + +/** + * \brief Check if CPU affinity configuration node follows format used in Suricata 7 and below + * \retval true if CPU affinity uses Suricata <=7.0, false if it uses the new format (Suricata + * >=8.0) + */ +static bool AffinityConfigIsDeprecated(void) +{ + static bool threading_affinity_deprecated = false; + static bool initialized = false; + + if (initialized) { + return threading_affinity_deprecated; + } + + ConfNode *root = ConfGetNode("threading.cpu-affinity"); + if (root == NULL) { + threading_affinity_deprecated = false; + initialized = true; + return threading_affinity_deprecated; + } + + ConfNode *affinity; + TAILQ_FOREACH (affinity, &root->head, next) { + // If a child does not contain "-cpu-set", then the conf is deprecated + // Names in the deprecated format (list of *-cpu-sets) contain + // list item IDs - "0" : "management-cpu-set", "1" : "worker-cpu-set" + if (strstr(affinity->name, "-cpu-set") == NULL) { + threading_affinity_deprecated = true; + initialized = true; + return threading_affinity_deprecated; + } + } + + initialized = true; + return threading_affinity_deprecated; +} #endif /* OS_WIN32 and __OpenBSD__ */ /** @@ -318,6 +356,13 @@ void AffinitySetupLoadFromConfig(void) if (thread_affinity_init_done == 0) { AffinitySetupInit(); thread_affinity_init_done = 1; + if (AffinityConfigIsDeprecated()) { + SCLogWarning("CPU affinity configuration uses a deprecated structure and will become " + "obsolete in a future major release (Suricata 9.0). Please update your " + "threading.cpu-affinity to the new format. " + "See notes in %s/upgrade.html#upgrading-7-0-to-8-0", + GetDocURL()); + } } SCLogDebug("Loading threading.cpu-affinity from config"); @@ -329,7 +374,8 @@ void AffinitySetupLoadFromConfig(void) ConfNode *affinity; TAILQ_FOREACH(affinity, &root->head, next) { - const char *setname = GetAffinitySetName(affinity->name); + char *v = AffinityConfigIsDeprecated() ? affinity->val : affinity->name; + const char *setname = GetAffinitySetName(v); if (setname == NULL) { continue; } @@ -341,10 +387,12 @@ void AffinitySetupLoadFromConfig(void) SCLogConfig("Found CPU affinity definition for \"%s\"", setname); - SetupCpuSets(taf, affinity, setname); - SetupAffinityPriority(taf, affinity, setname); - SetupAffinityMode(taf, affinity); - SetupAffinityThreads(taf, affinity); + ConfNode *aff_query_node = + AffinityConfigIsDeprecated() ? affinity->head.tqh_first : affinity; + SetupCpuSets(taf, aff_query_node, setname); + SetupAffinityPriority(taf, aff_query_node, setname); + SetupAffinityMode(taf, aff_query_node); + SetupAffinityThreads(taf, aff_query_node); } #endif /* OS_WIN32 and __OpenBSD__ */ } From 59d64d1bc1161ab80cf90269444001e501638961 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 3 Jan 2025 13:09:49 +0100 Subject: [PATCH 09/10] doc: remove title in threading section with no content --- doc/userguide/configuration/suricata-yaml.rst | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 165e1131e538..c6d299659f92 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -945,11 +945,8 @@ per available CPU/CPU core. prio: default: "high" -Relevant cpu-affinity settings for IDS/IPS modes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -IDS mode -~~~~~~~~ +Relevant cpu-affinity settings for IDS mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Runmode AutoFp:: @@ -963,8 +960,8 @@ Rumode Workers:: worker-cpu-set - used for receive,streamtcp,decode,detect,output(logging),respond/reject -IPS mode -~~~~~~~~ +Relevant cpu-affinity settings for IPS mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Runmode AutoFp:: From 2f0289074e80c1cda110460d6b148e167bebe3f1 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 3 Jan 2025 16:08:36 +0100 Subject: [PATCH 10/10] threading: support thread autopinning and interface-specific affinity Using the new configuration format, it is now possible to set CPU affinity settings per interface. The threading.autopin option has been added to automatically use CPUs from the same NUMA node as the interface. The autopin option requires hwloc-devel / hwloc-dev to be installed and --enable-hwloc flag in configure script. Ticket: 7036 --- configure.ac | 28 + doc/userguide/configuration/suricata-yaml.rst | 82 +++ doc/userguide/upgrade.rst | 14 + src/runmode-dpdk.c | 62 +- src/suricata.c | 4 + src/threadvars.h | 3 + src/tm-threads.c | 22 +- src/util-affinity.c | 630 ++++++++++++++++-- src/util-affinity.h | 31 +- src/util-device.c | 15 + src/util-device.h | 1 + src/util-runmodes.c | 16 + suricata.yaml.in | 15 + 13 files changed, 857 insertions(+), 66 deletions(-) diff --git a/configure.ac b/configure.ac index ca964d9039a0..3151bff17a92 100644 --- a/configure.ac +++ b/configure.ac @@ -741,6 +741,33 @@ exit 1 fi + LIBHWLOC="" + AC_ARG_ENABLE(hwloc, + AS_HELP_STRING([--enable-hwloc], [Enable hwloc support [default=no]]), + [enable_hwloc=$enableval],[enable_hwloc=no]) + AS_IF([test "x$enable_hwloc" = "xyes"], [ + PKG_CHECK_MODULES([HWLOC], [hwloc >= 2.0.0], + [AC_DEFINE([HAVE_HWLOC], [1], [Define if hwloc library is present and meets version requirements])], + LIBHWLOC="no") + + if test "$LIBHWLOC" = "no"; then + echo + echo " ERROR! hwloc library version > 2.0.0 not found, go get it" + echo " from https://www.open-mpi.org/projects/hwloc/ " + echo " or your distribution:" + echo + echo " Ubuntu: apt-get install hwloc libhwloc-dev" + echo " Fedora: dnf install hwloc hwloc-devel" + echo " CentOS/RHEL: yum install hwloc hwloc-devel" + echo + exit 1 + else + CFLAGS="${CFLAGS} ${HWLOC_CFLAGS}" + LDFLAGS="${LDFLAGS} ${HWLOC_LIBS}" + enable_hwloc="yes" + fi + ]) + # libpthread AC_ARG_WITH(libpthread_includes, [ --with-libpthread-includes=DIR libpthread include directory], @@ -2561,6 +2588,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: JA4 support: ${enable_ja4} Non-bundled htp: ${enable_non_bundled_htp} Hyperscan support: ${enable_hyperscan} + Hwloc support: ${enable_hwloc} Libnet support: ${enable_libnet} liblz4 support: ${enable_liblz4} Landlock support: ${enable_landlock} diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index c6d299659f92..8eccf94279a6 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -924,6 +924,7 @@ per available CPU/CPU core. threading: set-cpu-affinity: yes + autopin: no cpu-affinity: management-cpu-set: cpu: [ 0 ] # include only these cpus in affinity settings @@ -940,6 +941,13 @@ per available CPU/CPU core. medium: [ "1-2" ] high: [ 3 ] default: "medium" + interface-specific-cpu-set: + - interface: "enp4s0f0" # 0000:3b:00.0 # net_bonding0 # ens1f0 + cpu: [ 1,3,5,7,9 ] + mode: "exclusive" + prio: + high: [ "all" ] + default: "medium" verdict-cpu-set: cpu: [ 0 ] prio: @@ -976,6 +984,80 @@ Runmode Workers:: worker-cpu-set - used for receive,streamtcp,decode,detect,output(logging),respond/reject, verdict +Interface-specific CPU affinity settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the new configuration format introduced in Suricata 8.0 it is possible +to set CPU affinity settings per interface. This can be useful +when you have multiple interfaces and you want to dedicate specific CPU cores +to specific interfaces. This can be useful for example when Suricata runs on +multiple NUMA nodes and reads from interfaces on each NUMA node. + +Interface-specific affinity settings can be configured for the worker-cpu-set +and the receive-cpu-set (only used in autofp mode). +This feature is available for capture modes which work with interfaces +(af-packet, dpdk, etc.). The value of the interface key can be the kernel +interface name (e.g. eth0 for af-packet), the PCI address of the interface +(e.g. 0000:3b:00.0 for DPDK capture mode), or the name of the virtual device +interface (e.g. net_bonding0 for DPDK capture mode). +The interface names needs to be unique and be located under the capture mode +configuration. + +The interface-specific settings will override the global settings for the +worker-cpu-set and receive-cpu-set. The CPUs do not need to be contained in +the parent node settings. If the interface-specific settings are not defined, +the global settings will be used. + +:: + + threading: + set-cpu-affinity: yes + cpu-affinity: + worker-cpu-set: + interface-specific-cpu-set: + - interface: "eth0" # 0000:3b:00.0 # net_bonding0 + cpu: [ 1,3,5,7,9 ] + mode: "exclusive" + prio: + high: [ "all" ] + default: "medium" + +Automatic NUMA-aware CPU core pinning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When Suricata is running on a system with multiple NUMA nodes, it is possible +to automatically use CPUs from the same NUMA node as the network capture +interface. +CPU cores on the same NUMA nodes as the network capture interface have +reduced memory access latency and increased the performance of Suricata. +This is enabled by setting the `autopin` option to `yes` in the threading +section. This option is available for worker-cpu-set and receive-cpu-set. + +:: + + threading: + set-cpu-affinity: yes + autopin: yes + cpu-affinity: + worker-cpu-set: + cpu: [ "all" ] + mode: "exclusive" + prio: + high: [ "all" ] + +Consider 2 interfaces defined in the capture mode configuration, one on each +NUMA node. The `autopin` option is enabled to automatically use CPUs from the +same NUMA node as the interface. The worker-cpu-set is set to use all CPUs. +When interface on the first NUMA node is used, the worker threads will be +pinned to CPUs on the first NUMA node. When interface on the second NUMA node +is used, the worker threads will be pinned to CPUs on the second NUMA node. +If the number of CPU cores on a given NUMA node is exhausted then the worker +threads will be pinned to CPUs on the other NUMA node. + +The option `threading.autopin` can be combined with the interface-specific CPU +affinity settings. +To use the `autopin` option, the system must have the `hwloc` +dependency installed and pass `--enable-hwloc` to the configure script. IP Defrag --------- diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index 18ecabf221f6..0511b04c8910 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -99,6 +99,20 @@ Major changes + worker-cpu-set: + cpu: [0, 1] + - The `threading.cpu-affinity` configuration has been extended to support + interface-specific CPU affinity settings. This allows you to specify + CPU affinity settings for each interface separately. + The new configuration format is described in :ref:`suricata-yaml-threading`. + The old configuration format does not support this extension and will be + removed in Suricata 9.0. + - The `threading.cpu-affinity` configuration now supports autopinning + worker or receive threads to the same NUMA node as the network capture + interface is located on. + This can be enabled by setting `threading.autopin` to `yes`. + See :ref:`suricata-yaml-threading` for more information. + This requires hwloc dependency to be installed and `--enable-hwloc` + to be passed to configure script. + Removals ~~~~~~~~ - The ssh keywords ``ssh.protoversion`` and ``ssh.softwareversion`` have been removed. diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index 6bbe3c1f2ed6..32d8a1ff255b 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -368,12 +368,17 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) SCReturnInt(-EINVAL); } - ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set"); + bool wtaf_periface = true; + ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iconf->iface); if (wtaf == NULL) { - SCLogError("Specify worker-cpu-set list in the threading section"); - SCReturnInt(-EINVAL); + wtaf_periface = false; + wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL); // mandatory + if (wtaf == NULL) { + SCLogError("Specify worker-cpu-set list in the threading section"); + SCReturnInt(-EINVAL); + } } - ThreadsAffinityType *mtaf = GetAffinityTypeFromName("management-cpu-set"); + ThreadsAffinityType *mtaf = GetAffinityTypeForNameAndIface("management-cpu-set", NULL); if (mtaf == NULL) { SCLogError("Specify management-cpu-set list in the threading section"); SCReturnInt(-EINVAL); @@ -406,7 +411,12 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) } if (strcmp(entry_str, "auto") == 0) { - iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCount(); + if (wtaf_periface) { + iconf->threads = (uint16_t)sched_cpus; + SCLogConfig("%s: auto-assigned %u threads", iconf->iface, iconf->threads); + SCReturnInt(0); + } + iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCountWithoutAssignedThreading(); if (iconf->threads == 0) { SCLogError("Not enough worker CPU cores with affinity were configured"); SCReturnInt(-ERANGE); @@ -416,7 +426,8 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) iconf->threads++; remaining_auto_cpus--; } else if (remaining_auto_cpus == -1) { - remaining_auto_cpus = (int32_t)sched_cpus % LiveGetDeviceCount(); + remaining_auto_cpus = + (int32_t)sched_cpus % LiveGetDeviceCountWithoutAssignedThreading(); if (remaining_auto_cpus > 0) { iconf->threads++; remaining_auto_cpus--; @@ -844,23 +855,46 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) SCReturnInt(0); } -static int32_t ConfigValidateThreads(uint16_t iface_threads) +static bool ConfigThreadsGenericIsValid(uint16_t iface_threads, ThreadsAffinityType *wtaf) { static uint32_t total_cpus = 0; total_cpus += iface_threads; - ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set"); if (wtaf == NULL) { SCLogError("Specify worker-cpu-set list in the threading section"); - return -1; + return false; } if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) { - SCLogError("Interfaces requested more cores than configured in the threading section " - "(requested %d configured %d", + SCLogError("Interfaces requested more cores than configured in the worker-cpu-set " + "threading section (requested %d configured %d", total_cpus, UtilAffinityGetAffinedCPUNum(wtaf)); - return -1; + return false; } - return 0; + return true; +} + +static bool ConfigThreadsInterfaceIsValid(uint16_t iface_threads, ThreadsAffinityType *itaf) +{ + if (iface_threads > UtilAffinityGetAffinedCPUNum(itaf)) { + SCLogError("Interface requested more cores than configured in the interface-specific " + "threading section (requested %d configured %d", + iface_threads, UtilAffinityGetAffinedCPUNum(itaf)); + return false; + } + + return true; +} + +static bool ConfigIsThreadingValid(uint16_t iface_threads, const char *iface) +{ + ThreadsAffinityType *itaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iface); + ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL); + if (itaf && !ConfigThreadsInterfaceIsValid(iface_threads, itaf)) { + return false; + } else if (itaf == NULL && !ConfigThreadsGenericIsValid(iface_threads, wtaf)) { + return false; + } + return true; } static DPDKIfaceConfig *ConfigParse(const char *iface) @@ -873,7 +907,7 @@ static DPDKIfaceConfig *ConfigParse(const char *iface) ConfigInit(&iconf); retval = ConfigLoad(iconf, iface); - if (retval < 0 || ConfigValidateThreads(iconf->threads) != 0) { + if (retval < 0 || !ConfigIsThreadingValid(iconf->threads, iface)) { iconf->DerefFunc(iconf); SCReturnPtr(NULL, "void *"); } diff --git a/src/suricata.c b/src/suricata.c index 6a01b55dd3ca..a7e60147b569 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -111,6 +111,7 @@ #include "tmqh-packetpool.h" #include "tm-queuehandlers.h" +#include "util-affinity.h" #include "util-byte.h" #include "util-conf.h" #include "util-coredump-config.h" @@ -2297,6 +2298,9 @@ void PostRunDeinit(const int runmode, struct timeval *start_time) StreamTcpFreeConfig(STREAM_VERBOSE); DefragDestroy(); HttpRangeContainersDestroy(); +#ifdef HAVE_HWLOC + TopologyDestroy(); +#endif /* HAVE_HWLOC */ TmqResetQueues(); #ifdef PROFILING diff --git a/src/threadvars.h b/src/threadvars.h index 6f339e9839d5..471714a254c4 100644 --- a/src/threadvars.h +++ b/src/threadvars.h @@ -136,6 +136,9 @@ typedef struct ThreadVars_ { struct FlowQueue_ *flow_queue; bool break_loop; + /** Interface-specific thread affinity */ + char *iface_name; + Storage storage[]; } ThreadVars; diff --git a/src/tm-threads.c b/src/tm-threads.c index 07f9a9390df0..d5b504c16b7d 100644 --- a/src/tm-threads.c +++ b/src/tm-threads.c @@ -865,8 +865,24 @@ TmEcode TmThreadSetupOptions(ThreadVars *tv) TmThreadSetPrio(tv); if (tv->thread_setup_flags & THREAD_SET_AFFTYPE) { ThreadsAffinityType *taf = &thread_affinity[tv->cpu_affinity]; + bool use_iface_affinity = RunmodeIsAutofp() && tv->cpu_affinity == RECEIVE_CPU_SET && + FindAffinityByInterface(taf, tv->iface_name) != NULL; + use_iface_affinity |= RunmodeIsWorkers() && tv->cpu_affinity == WORKER_CPU_SET && + FindAffinityByInterface(taf, tv->iface_name) != NULL; + + if (use_iface_affinity) { + taf = FindAffinityByInterface(taf, tv->iface_name); + } + + if (UtilAffinityGetAffinedCPUNum(taf) == 0) { + if (!taf->nocpu_warned) { + SCLogWarning("No CPU affinity set for %s", AffinityGetYamlPath(taf)); + taf->nocpu_warned = true; + } + } + if (taf->mode_flag == EXCLUSIVE_AFFINITY) { - uint16_t cpu = AffinityGetNextCPU(taf); + uint16_t cpu = AffinityGetNextCPU(tv, taf); SetCPUAffinity(cpu); /* If CPU is in a set overwrite the default thread prio */ if (CPU_ISSET(cpu, &taf->lowprio_cpu)) { @@ -1600,6 +1616,10 @@ static void TmThreadFree(ThreadVars *tv) SCFree(tv->printable_name); } + if (tv->iface_name) { + SCFree(tv->iface_name); + } + if (tv->stream_pq_local) { BUG_ON(tv->stream_pq_local->len); SCMutexDestroy(&tv->stream_pq_local->mutex_q); diff --git a/src/util-affinity.c b/src/util-affinity.c index ee365372702a..e78fd7e8af61 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -31,50 +31,169 @@ #include "util-cpu.h" #include "util-byte.h" #include "util-debug.h" +#include "util-dpdk.h" ThreadsAffinityType thread_affinity[MAX_CPU_SET] = { { .name = "receive-cpu-set", .mode_flag = EXCLUSIVE_AFFINITY, .prio = PRIO_MEDIUM, - .lcpu = 0, + .lcpu = { 0 }, }, { .name = "worker-cpu-set", .mode_flag = EXCLUSIVE_AFFINITY, .prio = PRIO_MEDIUM, - .lcpu = 0, + .lcpu = { 0 }, }, { .name = "verdict-cpu-set", .mode_flag = BALANCED_AFFINITY, .prio = PRIO_MEDIUM, - .lcpu = 0, + .lcpu = { 0 }, }, { .name = "management-cpu-set", .mode_flag = BALANCED_AFFINITY, .prio = PRIO_MEDIUM, - .lcpu = 0, + .lcpu = { 0 }, }, }; int thread_affinity_init_done = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun +#ifdef HAVE_HWLOC +static hwloc_topology_t topology = NULL; +#endif /* HAVE_HWLOC */ +#endif /* OS_WIN32 and __OpenBSD__ */ + +static ThreadsAffinityType *AllocAndInitAffinityType( + const char *name, const char *interface_name, ThreadsAffinityType *parent) +{ + ThreadsAffinityType *new_affinity = SCCalloc(1, sizeof(ThreadsAffinityType)); + if (new_affinity == NULL) { + FatalError("Unable to allocate memory for new CPU affinity type"); + } + + new_affinity->name = SCStrdup(interface_name); + if (new_affinity->name == NULL) { + FatalError("Unable to allocate memory for new CPU affinity type name"); + } + new_affinity->parent = parent; + new_affinity->mode_flag = EXCLUSIVE_AFFINITY; + new_affinity->prio = PRIO_MEDIUM; + for (int i = 0; i < MAX_NUMA_NODES; i++) { + new_affinity->lcpu[i] = 0; + } + + if (parent != NULL) { + if (parent->nb_children == parent->nb_children_capacity) { + if (parent->nb_children_capacity == 0) { + parent->nb_children_capacity = 2; + } else { + parent->nb_children_capacity *= 2; + } + void *p = SCRealloc( + parent->children, parent->nb_children_capacity * sizeof(ThreadsAffinityType *)); + if (p == NULL) { + FatalError("Unable to reallocate memory for children CPU affinity types"); + } + parent->children = p; + } + parent->children[parent->nb_children++] = new_affinity; + } + + return new_affinity; +} + +ThreadsAffinityType *FindAffinityByInterface( + ThreadsAffinityType *parent, const char *interface_name) +{ + for (uint32_t i = 0; i < parent->nb_children; i++) { + if (interface_name && strcmp(parent->children[i]->name, interface_name) == 0) { + return parent->children[i]; + } + } + return NULL; +} + +/** + * \brief Find affinity by name (*-cpu-set name) and an interface name. + * \param name the name of the affinity (e.g. worker-cpu-set, receive-cpu-set). + * The name is required and cannot be NULL. + * \param interface_name the name of the interface. + * If NULL, the affinity is looked up by name only. + * \retval a pointer to the affinity or NULL if not found + */ +ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name) +{ + int i; + ThreadsAffinityType *parent_affinity = NULL; + + for (i = 0; i < MAX_CPU_SET; i++) { + if (strcmp(thread_affinity[i].name, name) == 0) { + parent_affinity = &thread_affinity[i]; + break; + } + } + + if (parent_affinity == NULL) { + SCLogError("CPU affinity with name \"%s\" not found", name); + return NULL; + } + + if (interface_name != NULL) { + ThreadsAffinityType *child_affinity = + FindAffinityByInterface(parent_affinity, interface_name); + // found or not found, it is returned + return child_affinity; + } + + return parent_affinity; +} + /** - * \brief find affinity by its name + * \brief Finds affinity by its name and interface name. + * Interfaces are children of cpu-set names. If the queried interface is not + * found, then it is allocated, initialized and assigned to the queried cpu-set. + * \param name the name of the affinity (e.g. worker-cpu-set, receive-cpu-set). + * The name is required and cannot be NULL. + * \param interface_name the name of the interface. + * If NULL, the affinity is looked up by name only. * \retval a pointer to the affinity or NULL if not found */ -ThreadsAffinityType * GetAffinityTypeFromName(const char *name) +ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName( + const char *name, const char *interface_name) { int i; + ThreadsAffinityType *parent_affinity = NULL; + for (i = 0; i < MAX_CPU_SET; i++) { - if (!strcmp(thread_affinity[i].name, name)) { - return &thread_affinity[i]; + if (strcmp(thread_affinity[i].name, name) == 0) { + parent_affinity = &thread_affinity[i]; + break; } } - return NULL; + + if (parent_affinity == NULL) { + SCLogError("CPU affinity with name \"%s\" not found", name); + return NULL; + } + + if (interface_name != NULL) { + ThreadsAffinityType *child_affinity = + FindAffinityByInterface(parent_affinity, interface_name); + if (child_affinity != NULL) { + return child_affinity; + } + + // If not found, allocate and initialize a new child affinity + return AllocAndInitAffinityType(name, interface_name, parent_affinity); + } + + return parent_affinity; } #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun @@ -275,38 +394,114 @@ static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity) } } -static bool AllCPUsUsed(ThreadsAffinityType *taf) +/** + * \brief Get the YAML path for the given affinity type. + * The path is built using the parent name (if available) and the affinity name. + * Do not free the returned string. + * \param taf the affinity type - if NULL, the path is built for the root node + * \return a string containing the YAML path, or NULL if the path is too long + */ +char *AffinityGetYamlPath(ThreadsAffinityType *taf) { - if (taf->lcpu < UtilCpuGetNumProcessorsOnline()) { - return false; + static char rootpath[] = "threading.cpu-affinity"; + static char path[1024] = { 0 }; + char subpath[256] = { 0 }; + + if (taf == NULL) { + return rootpath; + } + + if (taf->parent != NULL) { + long r = snprintf( + subpath, sizeof(subpath), "%s.interface-specific-cpu-set.", taf->parent->name); + if (r < 0 || r >= (long)sizeof(subpath)) { + FatalError("Unable to build YAML path for CPU affinity %s.%s", taf->parent->name, + taf->name); + } + } else { + subpath[0] = '\0'; } - return true; + + long r = snprintf(path, sizeof(path), "%s.%s%s", rootpath, subpath, taf->name); + if (r < 0 || r >= (long)sizeof(path)) { + FatalError("Unable to build YAML path for CPU affinity %s", taf->name); + } + + return path; } static void ResetCPUs(ThreadsAffinityType *taf) { - taf->lcpu = 0; + for (int i = 0; i < MAX_NUMA_NODES; i++) { + taf->lcpu[i] = 0; + } } -static uint16_t GetNextAvailableCPU(ThreadsAffinityType *taf) +/** + * \brief Check if the set name corresponds to a worker CPU set. + */ +static bool IsWorkerCpuSet(const char *setname) { - uint16_t cpu = taf->lcpu; - int attempts = 0; + return (strcmp(setname, "worker-cpu-set") == 0); +} - while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) { - cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline(); - if (cpu == 0) - attempts++; +/** + * \brief Check if the set name corresponds to a receive CPU set. + */ +static bool IsReceiveCpuSet(const char *setname) +{ + return (strcmp(setname, "receive-cpu-set") == 0); +} + +/** + * \brief Set up affinity configuration for a single interface. + */ +static void SetupSingleIfaceAffinity(ThreadsAffinityType *taf, ConfNode *iface_node) +{ + // offload to Setup function + ConfNode *child_node; + const char *interface_name = NULL; + TAILQ_FOREACH (child_node, &iface_node->head, next) { + if (strcmp(child_node->name, "interface") == 0) { + interface_name = child_node->val; + break; + } + } + if (interface_name == NULL) { + return; + } + + ThreadsAffinityType *iface_taf = + GetOrAllocAffinityTypeForIfaceOfName(taf->name, interface_name); + if (iface_taf == NULL) { + FatalError("Unknown CPU affinity type for interface: %s", interface_name); } - taf->lcpu = cpu + 1; + SetupCpuSets(iface_taf, iface_node, interface_name); + SetupAffinityPriority(iface_taf, iface_node, interface_name); + SetupAffinityMode(iface_taf, iface_node); + SetupAffinityThreads(iface_taf, iface_node); +} - if (attempts == 2) { - SCLogError( - "cpu_set does not contain available CPUs, CPU affinity configuration is invalid"); +/** + * \brief Set up per-interface affinity configurations. + */ +static void SetupPerIfaceAffinity(ThreadsAffinityType *taf, ConfNode *affinity) +{ + char if_af[] = "interface-specific-cpu-set"; + ConfNode *per_iface_node = ConfNodeLookupChild(affinity, if_af); + if (per_iface_node == NULL) { + return; } - return cpu; + ConfNode *iface_node; + TAILQ_FOREACH (iface_node, &per_iface_node->head, next) { + if (strcmp(iface_node->val, "interface") == 0) { + SetupSingleIfaceAffinity(taf, iface_node); + } else { + SCLogWarning("Unknown node in %s: %s", if_af, iface_node->name); + } + } } /** @@ -323,9 +518,8 @@ static bool AffinityConfigIsDeprecated(void) return threading_affinity_deprecated; } - ConfNode *root = ConfGetNode("threading.cpu-affinity"); + ConfNode *root = ConfGetNode(AffinityGetYamlPath(NULL)); if (root == NULL) { - threading_affinity_deprecated = false; initialized = true; return threading_affinity_deprecated; } @@ -357,18 +551,17 @@ void AffinitySetupLoadFromConfig(void) AffinitySetupInit(); thread_affinity_init_done = 1; if (AffinityConfigIsDeprecated()) { - SCLogWarning("CPU affinity configuration uses a deprecated structure and will become " - "obsolete in a future major release (Suricata 9.0). Please update your " - "threading.cpu-affinity to the new format. " - "See notes in %s/upgrade.html#upgrading-7-0-to-8-0", - GetDocURL()); + SCLogWarning("CPU affinity configuration uses a deprecated structure and will not be " + "supported in a future major release (Suricata 9.0). Please update your " + "%s to the new format. See notes in %s/upgrade.html#upgrading-7-0-to-8-0", + AffinityGetYamlPath(NULL), GetDocURL()); } } - SCLogDebug("Loading threading.cpu-affinity from config"); - ConfNode *root = ConfGetNode("threading.cpu-affinity"); + SCLogDebug("Loading %s from config", AffinityGetYamlPath(NULL)); + ConfNode *root = ConfGetNode(AffinityGetYamlPath(NULL)); if (root == NULL) { - SCLogInfo("Cannot find threading.cpu-affinity node in config"); + SCLogInfo("Cannot find %s node in config", AffinityGetYamlPath(NULL)); return; } @@ -380,7 +573,7 @@ void AffinitySetupLoadFromConfig(void) continue; } - ThreadsAffinityType *taf = GetAffinityTypeFromName(setname); + ThreadsAffinityType *taf = GetOrAllocAffinityTypeForIfaceOfName(setname, NULL); if (taf == NULL) { FatalError("Unknown CPU affinity type: %s", setname); } @@ -393,25 +586,372 @@ void AffinitySetupLoadFromConfig(void) SetupAffinityPriority(taf, aff_query_node, setname); SetupAffinityMode(taf, aff_query_node); SetupAffinityThreads(taf, aff_query_node); + + if (!AffinityConfigIsDeprecated() && + (IsWorkerCpuSet(setname) || IsReceiveCpuSet(setname))) { + SetupPerIfaceAffinity(taf, affinity); + } } #endif /* OS_WIN32 and __OpenBSD__ */ } +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun +#ifdef HAVE_HWLOC +static int HwLocDeviceNumaGet(hwloc_topology_t topo, hwloc_obj_t obj) +{ +#if HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5 + hwloc_obj_t nodes[MAX_NUMA_NODES]; + unsigned num_nodes = MAX_NUMA_NODES; + struct hwloc_location location; + + location.type = HWLOC_LOCATION_TYPE_OBJECT; + location.location.object = obj; + + int result = hwloc_get_local_numanode_objs(topo, &location, &num_nodes, nodes, 0); + if (result == 0 && num_nodes > 0 && num_nodes <= MAX_NUMA_NODES) { + return nodes[0]->logical_index; + } + return -1; +#endif /* HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5 */ + + hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topo, obj); + if (non_io_ancestor == NULL) { + return -1; + } + + // Iterate over NUMA nodes and check their nodeset + hwloc_obj_t numa_node = NULL; + while ((numa_node = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { + if (hwloc_bitmap_isset(non_io_ancestor->nodeset, numa_node->os_index)) { + return numa_node->logical_index; + } + } + + return -1; +} + +static hwloc_obj_t HwLocDeviceGetByKernelName(hwloc_topology_t topo, const char *interface_name) +{ + hwloc_obj_t obj = NULL; + + while ((obj = hwloc_get_next_osdev(topo, obj)) != NULL) { + if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK && + strcmp(obj->name, interface_name) == 0) { + hwloc_obj_t parent = obj->parent; + while (parent) { + if (parent->type == HWLOC_OBJ_PCI_DEVICE) { + return parent; + } + parent = parent->parent; + } + } + } + return NULL; +} + +// Static function to deparse PCIe interface string name to individual components /** - * \brief Return next cpu to use for a given thread family - * \retval the cpu to used given by its id + * \brief Parse PCIe address string to individual components + * \param[in] pcie_address PCIe address string + * \param[out] domain Domain component + * \param[out] bus Bus component + * \param[out] device Device component + * \param[out] function Function component */ -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) +static int PcieAddressToComponents(const char *pcie_address, unsigned int *domain, + unsigned int *bus, unsigned int *device, unsigned int *function) { - uint16_t ncpu = 0; -#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + // Handle both full and short PCIe address formats + if (sscanf(pcie_address, "%x:%x:%x.%x", domain, bus, device, function) != 4) { + if (sscanf(pcie_address, "%x:%x.%x", bus, device, function) != 3) { + return -1; + } + *domain = 0; // Default domain to 0 if not provided + } + return 0; +} + +// Function to convert PCIe address to hwloc object +static hwloc_obj_t HwLocDeviceGetByPcie(hwloc_topology_t topo, const char *pcie_address) +{ + hwloc_obj_t obj = NULL; + unsigned int domain, bus, device, function; + int r = PcieAddressToComponents(pcie_address, &domain, &bus, &device, &function); + if (r == 0) { + while ((obj = hwloc_get_next_pcidev(topo, obj)) != NULL) { + if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus && + obj->attr->pcidev.dev == device && obj->attr->pcidev.func == function) { + return obj; + } + } + } + return NULL; +} + +static void HwlocObjectDump(hwloc_obj_t obj, const char *iface_name) +{ + if (!obj) { + SCLogDebug("No object found for the given PCIe address.\n"); + return; + } + + static char pcie_address[32]; + snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj->attr->pcidev.domain, + obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func); + SCLogDebug("Interface (%s / %s) has NUMA ID %d", iface_name, pcie_address, + HwLocDeviceNumaGet(topology, obj)); + + SCLogDebug("Object type: %s\n", hwloc_obj_type_string(obj->type)); + SCLogDebug("Logical index: %u\n", obj->logical_index); + SCLogDebug("Depth: %u\n", obj->depth); + SCLogDebug("Attributes:\n"); + if (obj->type == HWLOC_OBJ_PCI_DEVICE) { + SCLogDebug(" Domain: %04x\n", obj->attr->pcidev.domain); + SCLogDebug(" Bus: %02x\n", obj->attr->pcidev.bus); + SCLogDebug(" Device: %02x\n", obj->attr->pcidev.dev); + SCLogDebug(" Function: %01x\n", obj->attr->pcidev.func); + SCLogDebug(" Class ID: %04x\n", obj->attr->pcidev.class_id); + SCLogDebug(" Vendor ID: %04x\n", obj->attr->pcidev.vendor_id); + SCLogDebug(" Device ID: %04x\n", obj->attr->pcidev.device_id); + SCLogDebug(" Subvendor ID: %04x\n", obj->attr->pcidev.subvendor_id); + SCLogDebug(" Subdevice ID: %04x\n", obj->attr->pcidev.subdevice_id); + SCLogDebug(" Revision: %02x\n", obj->attr->pcidev.revision); + SCLogDebug(" Link speed: %f GB/s\n", obj->attr->pcidev.linkspeed); + } else { + SCLogDebug(" No PCI device attributes available.\n"); + } +} + +static bool TopologyShouldAutopin(ThreadVars *tv, ThreadsAffinityType *taf) +{ + bool cond; SCMutexLock(&taf->taf_mutex); - ncpu = GetNextAvailableCPU(taf); + cond = tv->type == TVT_PPT && tv->iface_name && + (strcmp(tv->iface_name, taf->name) == 0 || + (strcmp("worker-cpu-set", taf->name) == 0 && RunmodeIsWorkers()) || + (strcmp("receive-cpu-set", taf->name) == 0 && RunmodeIsAutofp())); + SCMutexUnlock(&taf->taf_mutex); + return cond; +} + +static void TopologyInitialize(void) +{ + if (topology == NULL) { + if (hwloc_topology_init(&topology) == -1) { + FatalError("Failed to initialize topology"); + } + + if (hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) == -1 || + hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL) == -1 || + hwloc_topology_load(topology) == -1) { + FatalError("Failed to set/load topology"); + } + } +} + +void TopologyDestroy() +{ + if (topology != NULL) { + hwloc_topology_destroy(topology); + topology = NULL; + } +} + +static int InterfaceGetNumaNode(ThreadVars *tv) +{ + hwloc_obj_t if_obj = HwLocDeviceGetByKernelName(topology, tv->iface_name); + if (if_obj == NULL) { + if_obj = HwLocDeviceGetByPcie(topology, tv->iface_name); + } + + if (if_obj != NULL && SCLogGetLogLevel() == SC_LOG_DEBUG) { + HwlocObjectDump(if_obj, tv->iface_name); + } + + int32_t numa_id = HwLocDeviceNumaGet(topology, if_obj); + if (numa_id < 0 && SCRunmodeGet() == RUNMODE_DPDK) { + // DPDK fallback for e.g. net_bonding (vdev) PMDs + int32_t r = DPDKDeviceNameSetSocketID(tv->iface_name, &numa_id); + if (r < 0) { + numa_id = -1; + } + } + + if (numa_id < 0) { + SCLogDebug("Unable to find NUMA node for interface %s", tv->iface_name); + } + + return numa_id; +} +#endif /* HAVE_HWLOC */ + +static bool CPUIsFromNuma(uint16_t ncpu, uint16_t numa) +{ +#ifdef HAVE_HWLOC + int core_id = ncpu; + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t numa_node = NULL; + + while ((numa_node = hwloc_get_next_obj_by_depth(topology, depth, numa_node)) != NULL) { + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_copy(cpuset, numa_node->cpuset); + + if (hwloc_bitmap_isset(cpuset, core_id)) { + SCLogDebug("Core %d - NUMA %d", core_id, numa_node->logical_index); + hwloc_bitmap_free(cpuset); + break; + } + hwloc_bitmap_free(cpuset); + } + + if (numa == numa_node->logical_index) { + return true; + } + +#endif /* HAVE_HWLOC */ + + return false; +} + +static int16_t FindCPUInNumaNode(int numa_node, ThreadsAffinityType *taf) +{ + if (numa_node < 0) { + return -1; + } + + if (taf->lcpu[numa_node] >= UtilCpuGetNumProcessorsOnline()) { + return -1; + } + + uint16_t cpu = taf->lcpu[numa_node]; + while (cpu < UtilCpuGetNumProcessorsOnline() && + (!CPU_ISSET(cpu, &taf->cpu_set) || !CPUIsFromNuma(cpu, (uint16_t)numa_node))) { + cpu++; + } + + taf->lcpu[numa_node] = + (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, (uint16_t)numa_node)) + ? cpu + 1 + : UtilCpuGetNumProcessorsOnline(); + return (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, (uint16_t)numa_node)) ? (int16_t)cpu + : -1; +} + +static int16_t CPUSelectFromNuma(int iface_numa, ThreadsAffinityType *taf) +{ + if (iface_numa != -1) { + return FindCPUInNumaNode(iface_numa, taf); + } + return -1; +} + +static int16_t CPUSelectAlternative(int iface_numa, ThreadsAffinityType *taf) +{ + for (int nid = 0; nid < MAX_NUMA_NODES; nid++) { + if (iface_numa == nid) { + continue; + } + + int16_t cpu = FindCPUInNumaNode(nid, taf); + if (cpu != -1) { + SCLogPerf("CPU %d from NUMA %d assigned to a network interface located on NUMA %d", cpu, + nid, iface_numa); + return cpu; + } + } + return -1; +} + +/** + * \brief Select the next available CPU for the given affinity type. + * taf->cpu_set is a bit array where each bit represents a CPU core. + * The function iterates over the bit array and returns the first available CPU. + * If last used CPU core index is higher than the indexes of available cores, + * we reach the end of the array, and we reset the CPU selection. + * On the second reset attempt, the function bails out with a default value. + * The second attempt should only happen with an empty CPU set. + */ +static uint16_t CPUSelectDefault(ThreadsAffinityType *taf) +{ + uint16_t cpu = taf->lcpu[0]; + int attempts = 0; + while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) { + cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline(); + if (cpu == 0) { + attempts++; + } + } - if (AllCPUsUsed(taf)) { - ResetCPUs(taf); + taf->lcpu[0] = cpu + 1; + return cpu; +} + +static uint16_t CPUSelectFromNumaOrDefault(int iface_numa, ThreadsAffinityType *taf) +{ + uint16_t attempts = 0; + int16_t cpu = -1; + while (attempts < 2) { + cpu = CPUSelectFromNuma(iface_numa, taf); + if (cpu == -1) { + cpu = CPUSelectAlternative(iface_numa, taf); + if (cpu == -1) { + // All CPUs from all NUMAs are used at this point + ResetCPUs(taf); + attempts++; + } + } + + if (cpu >= 0) { + return (uint16_t)cpu; + } + } + return CPUSelectDefault(taf); +} + +static uint16_t GetNextAvailableCPU(int iface_numa, ThreadsAffinityType *taf) +{ + if (iface_numa < 0) { + return CPUSelectDefault(taf); } + return CPUSelectFromNumaOrDefault(iface_numa, taf); +} + +static bool AutopinEnabled(void) +{ + int autopin = 0; + if (ConfGetBool("threading.autopin", &autopin) != 1) { + return false; + } + return (bool)autopin; +} + +#endif /* OS_WIN32 and __OpenBSD__ */ + +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf) +{ + uint16_t ncpu = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + int iface_numa = -1; + if (AutopinEnabled()) { +#ifdef HAVE_HWLOC + if (TopologyShouldAutopin(tv, taf)) { + TopologyInitialize(); + iface_numa = InterfaceGetNumaNode(tv); + } +#else + static bool printed = false; + if (!printed) { + printed = true; + SCLogWarning( + "threading.autopin option is enabled but hwloc support is not compiled in. " + "Make sure to pass --enable-nfqueue to configure when building Suricata."); + } +#endif /* HAVE_HWLOC */ + } + + SCMutexLock(&taf->taf_mutex); + ncpu = GetNextAvailableCPU(iface_numa, taf); SCLogDebug("Setting affinity on CPU %d", ncpu); SCMutexUnlock(&taf->taf_mutex); #endif /* OS_WIN32 and __OpenBSD__ */ diff --git a/src/util-affinity.h b/src/util-affinity.h index 2fa4509ffa2c..cabd16515204 100644 --- a/src/util-affinity.h +++ b/src/util-affinity.h @@ -26,6 +26,11 @@ #include "suricata-common.h" #include "conf.h" #include "threads.h" +#include "threadvars.h" + +#ifdef HAVE_HWLOC +#include +#endif /* HAVE_HWLOC */ #if defined OS_FREEBSD #include @@ -62,12 +67,12 @@ enum { MAX_AFFINITY }; +#define MAX_NUMA_NODES 16 + typedef struct ThreadsAffinityType_ { const char *name; - uint8_t mode_flag; - uint16_t lcpu; /* use by exclusive mode */ - int prio; - uint32_t nb_threads; + struct ThreadsAffinityType_ **children; + struct ThreadsAffinityType_ *parent; // e.g. worker-cpu-set for interfaces SCMutex taf_mutex; #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun @@ -76,6 +81,14 @@ typedef struct ThreadsAffinityType_ { cpu_set_t medprio_cpu; cpu_set_t hiprio_cpu; #endif + int prio; + uint32_t nb_threads; + uint32_t nb_children; + uint32_t nb_children_capacity; + uint16_t lcpu[MAX_NUMA_NODES]; /* use by exclusive mode */ + uint8_t mode_flag; + // a flag to avoid multiple warnings when no CPU is set + bool nocpu_warned; } ThreadsAffinityType; /** store thread affinity mode for all type of threads */ @@ -83,10 +96,16 @@ typedef struct ThreadsAffinityType_ { extern ThreadsAffinityType thread_affinity[MAX_CPU_SET]; #endif +char *AffinityGetYamlPath(ThreadsAffinityType *taf); void AffinitySetupLoadFromConfig(void); -ThreadsAffinityType * GetAffinityTypeFromName(const char *name); +ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName( + const char *name, const char *interface_name); +ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name); +ThreadsAffinityType *FindAffinityByInterface( + ThreadsAffinityType *parent, const char *interface_name); -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf); +void TopologyDestroy(void); +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf); uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf); #ifdef HAVE_DPDK uint16_t UtilAffinityCpusOverlap(ThreadsAffinityType *taf1, ThreadsAffinityType *taf2); diff --git a/src/util-device.c b/src/util-device.c index fd4cf5685f0b..ec1e91b41374 100644 --- a/src/util-device.c +++ b/src/util-device.c @@ -24,6 +24,7 @@ #include "device-storage.h" #include "util-debug.h" +#include "util-affinity.h" #define MAX_DEVNAME 10 @@ -173,6 +174,20 @@ int LiveGetDeviceCount(void) return i; } +int LiveGetDeviceCountWithoutAssignedThreading(void) +{ + int i = 0; + LiveDevice *pd; + + TAILQ_FOREACH (pd, &live_devices, next) { + if (GetAffinityTypeForNameAndIface("worker-cpu-set", pd->dev) == NULL) { + i++; + } + } + + return i; +} + /** * \brief Get a pointer to the device name at idx * diff --git a/src/util-device.h b/src/util-device.h index 0774825385a3..075c21567c81 100644 --- a/src/util-device.h +++ b/src/util-device.h @@ -85,6 +85,7 @@ void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family); void LiveDevSubBypassStats(LiveDevice *dev, uint64_t cnt, int family); void LiveDevAddBypassFail(LiveDevice *dev, uint64_t cnt, int family); void LiveDevAddBypassSuccess(LiveDevice *dev, uint64_t cnt, int family); +int LiveGetDeviceCountWithoutAssignedThreading(void); int LiveGetDeviceCount(void); const char *LiveGetDeviceName(int number); LiveDevice *LiveGetDevice(const char *dev); diff --git a/src/util-runmodes.c b/src/util-runmodes.c index f78e857abfc6..be4da6bd49ee 100644 --- a/src/util-runmodes.c +++ b/src/util-runmodes.c @@ -175,6 +175,14 @@ int RunModeSetLiveCaptureAutoFp(ConfigIfaceParserFunc ConfigParser, FatalError("TmThreadsCreate failed"); } tv_receive->printable_name = printable_threadname; + if (dev) { + tv_receive->iface_name = SCStrdup(dev); + if (tv_receive->iface_name == NULL) { + FatalError("Failed to allocate memory for iface name"); + } + } else { + tv_receive->iface_name = NULL; + } TmModule *tm_module = TmModuleGetByName(recv_mod_name); if (tm_module == NULL) { FatalError("TmModuleGetByName failed for %s", recv_mod_name); @@ -283,6 +291,14 @@ static int RunModeSetLiveCaptureWorkersForDevice(ConfigIfaceThreadsCountFunc Mod FatalError("TmThreadsCreate failed"); } tv->printable_name = printable_threadname; + if (live_dev) { + tv->iface_name = SCStrdup(live_dev); + if (tv->iface_name == NULL) { + FatalError("Failed to allocate memory for iface name"); + } + } else { + tv->iface_name = NULL; + } tm_module = TmModuleGetByName(recv_mod_name); if (tm_module == NULL) { diff --git a/suricata.yaml.in b/suricata.yaml.in index c56a0778dfa8..9a2a64a4f422 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1777,6 +1777,7 @@ spm-algo: auto # Suricata is multi-threaded. Here the threading can be influenced. threading: set-cpu-affinity: no + autopin: no # Tune cpu affinity of threads. Each family of threads can be bound # to specific CPUs. # @@ -1793,6 +1794,13 @@ threading: cpu: [ 0 ] # include only these CPUs in affinity settings receive-cpu-set: cpu: [ 0 ] # include only these CPUs in affinity settings + # interface-specific-cpu-set: + # - interface: "enp4s0f0" + # cpu: [ 1,3,5,7,9 ] + # mode: "exclusive" + # prio: + # high: [ "all" ] + # default: "medium" worker-cpu-set: cpu: [ "all" ] mode: "exclusive" @@ -1804,6 +1812,13 @@ threading: medium: [ "1-2" ] high: [ 3 ] default: "medium" + interface-specific-cpu-set: + - interface: "enp4s0f0" # 0000:3b:00.0 # net_bonding0 # ens1f0 + cpu: [ 1,3,5,7,9 ] + mode: "exclusive" + prio: + high: [ "all" ] + default: "medium" #verdict-cpu-set: # cpu: [ 0 ] # prio: