diff --git a/doc/userguide/performance/hyperscan.rst b/doc/userguide/performance/hyperscan.rst index 055fa7f21b75..9b39bc22e95b 100644 --- a/doc/userguide/performance/hyperscan.rst +++ b/doc/userguide/performance/hyperscan.rst @@ -81,4 +81,28 @@ if it is present on the system in case of the "auto" setting. If the current suricata installation does not have hyperscan -support, refer to :ref:`installation` \ No newline at end of file +support, refer to :ref:`installation` + +Hyperscan caching +~~~~~~~~~~~~~~~~~ + +Upon startup, Hyperscan compiles and optimizes the ruleset into its own +internal structure. Suricata optimizes the startup process by saving +the Hyperscan internal structures to disk and loading them on the next start. +This prevents the recompilation of the ruleset and results in faster +initialization. If the ruleset is changed, new necessary cache files are +automatically created. + +To enable this function, in `suricata.yaml` configure: + +:: + + # Cache MPM contexts to the disk to avoid rule compilation at the startup. + # Cache files are created in the standard library directory. + sgh-mpm-caching: yes + sgh-mpm-caching-path: /var/lib/suricata/cache/hs + + +**Note**: +You might need to create and adjust permissions to the default caching folder +path, especially if you are running Suricata as a non-root user. diff --git a/src/Makefile.am b/src/Makefile.am index f9410ac4f9f9..56e761a5c2e9 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -529,6 +529,7 @@ noinst_HEADERS = \ util-mpm-ac-ks.h \ util-mpm.h \ util-mpm-hs.h \ + util-mpm-hs-cache.h \ util-mpm-hs-core.h \ util-optimize.h \ util-pages.h \ @@ -1077,6 +1078,7 @@ libsuricata_c_a_SOURCES = \ util-mpm-ac-ks-small.c \ util-mpm.c \ util-mpm-hs.c \ + util-mpm-hs-cache.c \ util-mpm-hs-core.c \ util-pages.c \ util-path.c \ diff --git a/src/util-mpm-hs-cache.c b/src/util-mpm-hs-cache.c new file mode 100644 index 000000000000..83107385696b --- /dev/null +++ b/src/util-mpm-hs-cache.c @@ -0,0 +1,255 @@ +/* Copyright (C) 2007-2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + * + * MPM pattern matcher that calls the Hyperscan regex matcher. + */ + +#include "suricata-common.h" +#include "suricata.h" +#include "detect-engine.h" +#include "util-debug.h" +#include "util-hash-lookup3.h" +#include "util-mpm-hs-core.h" +#include "util-mpm-hs-cache.h" +#include "util-path.h" + +#ifdef BUILD_HYPERSCAN + +#include + +static const char *HSCacheConstructFPath(uint64_t hs_db_hash) +{ + static char hash_file_path[PATH_MAX]; + + char hash_file_path_suffix[] = "_v1.hs"; + char filename[PATH_MAX]; + uint64_t r = + snprintf(filename, sizeof(filename), "%020lu%s", hs_db_hash, hash_file_path_suffix); + if (r != (uint64_t)(20 + strlen(hash_file_path_suffix))) + return NULL; + + r = PathMerge( + hash_file_path, sizeof(hash_file_path), DetectEngineMpmCachingGetPath(), filename); + if (r) + return NULL; + + return hash_file_path; +} + +static char *HSReadStream(const char *file_path, size_t *buffer_sz) +{ + FILE *file = fopen(file_path, "rb"); + if (!file) { + SCLogDebug("Failed to open file %s: %s", file_path, strerror(errno)); + return NULL; + } + + // Seek to the end of the file to determine its size + fseek(file, 0, SEEK_END); + long file_sz = ftell(file); + if (file_sz < 0) { + SCLogDebug("Failed to determine file size of %s: %s", file_path, strerror(errno)); + fclose(file); + return NULL; + } + + char *buffer = (char *)SCCalloc(file_sz, sizeof(char)); + if (!buffer) { + SCLogWarning("Failed to allocate memory"); + fclose(file); + return NULL; + } + + // Rewind file pointer and read the file into the buffer + rewind(file); + size_t bytes_read = fread(buffer, 1, file_sz, file); + if (bytes_read != (size_t)file_sz) { + SCLogDebug("Failed to read the entire file %s: %s", file_path, strerror(errno)); + SCFree(buffer); + fclose(file); + return NULL; + } + + *buffer_sz = file_sz; + fclose(file); + return buffer; +} + +/** + * Function to hash the searched pattern, only things relevant to Hyperscan + * compilation are hashed. + */ +static void SCHSCachePatternHash(const SCHSPattern *p, uint32_t *h1, uint32_t *h2) +{ + BUG_ON(p->original_pat == NULL); + hashlittle2_safe(&p->len, sizeof(p->len), h1, h2); + hashlittle2_safe(&p->flags, sizeof(p->flags), h1, h2); + hashlittle2_safe(p->original_pat, p->len, h1, h2); + hashlittle2_safe(&p->offset, sizeof(p->offset), h1, h2); + hashlittle2_safe(&p->depth, sizeof(p->depth), h1, h2); +} + +int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash) +{ + const char *hash_file_static = HSCacheConstructFPath(hs_db_hash); + if (hash_file_static == NULL) + return -1; + + SCLogDebug("Loading the cached HS DB from %s", hash_file_static); + if (!SCPathExists(hash_file_static)) + return -1; + + FILE *db_cache = fopen(hash_file_static, "r"); + char *buffer = NULL; + int ret = 0; + if (db_cache) { + size_t buffer_size; + buffer = HSReadStream(hash_file_static, &buffer_size); + if (!buffer) { + SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static); + ret = -1; + goto freeup; + } + + hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db); + if (error != HS_SUCCESS) { + SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static, + HSErrorToStr(error)); + ret = -1; + goto freeup; + } + + ret = 0; + goto freeup; + } + +freeup: + if (db_cache) + fclose(db_cache); + if (buffer) + SCFree(buffer); + return ret; +} + +static int HSSaveCache(hs_database_t *hs_db, uint64_t hs_db_hash) +{ + static bool notified = false; + char *db_stream = NULL; + size_t db_size; + int ret = -1; + + hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size); + if (err != HS_SUCCESS) { + SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err)); + goto cleanup; + } + + const char *hash_file_static = HSCacheConstructFPath(hs_db_hash); + SCLogDebug("Caching the compiled HS at %s", hash_file_static); + if (SCPathExists(hash_file_static)) { + // potentially signs that it might not work as expected as we got into + // hash collision. If this happens with older and not used caches it is + // fine. + // It is problematic when one ruleset yields two colliding MPM groups. + SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off " + "the caching", + hash_file_static); + } + + if (SCCreateDirectoryTree(DetectEngineMpmCachingGetPath(), true) != 0) { + if (!notified) { + SCLogWarning("Failed to create Hyperscan cache folder, make sure " + "the parent folder is writeable " + "or adjust sgh-mpm-caching-path setting (%s)", + DetectEngineMpmCachingGetPath()); + notified = true; + } + return -1; + } + + FILE *db_cache_out = fopen(hash_file_static, "w"); + if (!db_cache_out) { + if (!notified) { + SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is " + "writable or adjust sgh-mpm-caching-path setting (%s)", + hash_file_static); + notified = true; + } + goto cleanup; + } + size_t r = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out); + if (r > 0 && (size_t)r != db_size) { + SCLogWarning("Failed to write to file: %s", hash_file_static); + if (r != db_size) { + // possibly a corrupted DB cache was created + r = remove(hash_file_static); + if (r != 0) { + SCLogWarning("Failed to remove corrupted cache file: %s", hash_file_static); + } + } + } + ret = fclose(db_cache_out); + if (ret != 0) { + SCLogWarning("Failed to close file: %s", hash_file_static); + goto cleanup; + } + + ret = 0; +cleanup: + if (db_stream) + SCFree(db_stream); + return ret; +} + +uint64_t HSHashDb(const PatternDatabase *pd) +{ + uint64_t cached_hash = 0; + uint32_t *hash = (uint32_t *)(&cached_hash); + hashword2(&pd->pattern_cnt, 1, &hash[0], &hash[1]); + for (uint32_t i = 0; i < pd->pattern_cnt; i++) { + SCHSCachePatternHash(pd->parray[i], &hash[0], &hash[1]); + } + + return cached_hash; +} + +void HSSaveCacheIterator(void *data, void *aux) +{ + PatternDatabase *pd = (PatternDatabase *)data; + PatternDatabaseCache *pd_stats = (PatternDatabaseCache *)aux; + if (pd->no_cache) + return; + + // count only cacheable DBs + pd_stats->hs_cacheable_dbs_cnt++; + if (pd->cached) { + pd_stats->hs_dbs_cache_loaded_cnt++; + return; + } + + if (HSSaveCache(pd->hs_db, HSHashDb(pd)) == 0) { + pd->cached = true; // for rule reloads + pd_stats->hs_dbs_cache_saved_cnt++; + } +} + +#endif /* BUILD_HYPERSCAN */ diff --git a/src/util-mpm-hs-cache.h b/src/util-mpm-hs-cache.h new file mode 100644 index 000000000000..cc17d9846cc7 --- /dev/null +++ b/src/util-mpm-hs-cache.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + * + * Hyperscan caching logic for faster database compilation. + */ + +#ifndef SURICATA_UTIL_MPM_HS_CACHE__H +#define SURICATA_UTIL_MPM_HS_CACHE__H + +#include "util-mpm-hs-core.h" + +#ifdef BUILD_HYPERSCAN +int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash); +uint64_t HSHashDb(const PatternDatabase *pd); +void HSSaveCacheIterator(void *data, void *aux); +#endif /* BUILD_HYPERSCAN */ + +#endif /* SURICATA_UTIL_MPM_HS_CACHE__H */ diff --git a/src/util-mpm-hs-core.h b/src/util-mpm-hs-core.h index fc7c2d302848..699dd69568a3 100644 --- a/src/util-mpm-hs-core.h +++ b/src/util-mpm-hs-core.h @@ -35,44 +35,44 @@ #include typedef struct SCHSPattern_ { - /* length of the pattern */ + /** length of the pattern */ uint16_t len; - /* flags describing the pattern */ + /** flags describing the pattern */ uint8_t flags; - /* holds the original pattern that was added */ + /** holds the original pattern that was added */ uint8_t *original_pat; - /* pattern id */ + /** pattern id */ uint32_t id; uint16_t offset; uint16_t depth; - /* sid(s) for this pattern */ + /** sid(s) for this pattern */ uint32_t sids_size; SigIntId *sids; - /* only used at ctx init time, when this structure is part of a hash + /** only used at ctx init time, when this structure is part of a hash * table. */ struct SCHSPattern_ *next; } SCHSPattern; typedef struct SCHSCtx_ { - /* hash used during ctx initialization */ + /** hash used during ctx initialization */ SCHSPattern **init_hash; - /* pattern database and pattern arrays. */ + /** pattern database and pattern arrays. */ void *pattern_db; - /* size of database, for accounting. */ + /** size of database, for accounting. */ size_t hs_db_size; } SCHSCtx; typedef struct SCHSThreadCtx_ { - /* Hyperscan scratch space region for this thread, capable of handling any + /** Hyperscan scratch space region for this thread, capable of handling any * database that has been compiled. */ void *scratch; - /* size of scratch space, for accounting. */ + /** size of scratch space, for accounting. */ size_t scratch_size; } SCHSThreadCtx; @@ -81,12 +81,20 @@ typedef struct PatternDatabase_ { hs_database_t *hs_db; uint32_t pattern_cnt; - /* Reference count: number of MPM contexts using this pattern database. */ + /** Reference count: number of MPM contexts using this pattern database. */ uint32_t ref_cnt; - /* Signals if the matcher has loaded/saved the pattern database to disk */ + /** Signals if the matcher has loaded/saved the pattern database to disk */ bool cached; + /** Matcher will not cache this pattern DB */ + bool no_cache; } PatternDatabase; +typedef struct PatternDatabaseCache_ { + uint32_t hs_cacheable_dbs_cnt; + uint32_t hs_dbs_cache_loaded_cnt; + uint32_t hs_dbs_cache_saved_cnt; +} PatternDatabaseCache; + const char *HSErrorToStr(hs_error_t error_code); #endif /* BUILD_HYPERSCAN */ diff --git a/src/util-mpm-hs.c b/src/util-mpm-hs.c index 9246741cdf80..7d6dc44736f5 100644 --- a/src/util-mpm-hs.c +++ b/src/util-mpm-hs.c @@ -33,16 +33,19 @@ #include "detect-engine-build.h" #include "conf.h" +#include "util-conf.h" #include "util-debug.h" #include "util-unittest.h" #include "util-unittest-helper.h" #include "util-memcmp.h" #include "util-mpm-hs.h" +#include "util-mpm-hs-cache.h" #include "util-mpm-hs-core.h" #include "util-memcpy.h" #include "util-hash.h" #include "util-hash-lookup3.h" #include "util-hyperscan.h" +#include "util-path.h" #ifdef BUILD_HYPERSCAN @@ -551,6 +554,7 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt) pd->pattern_cnt = pattern_cnt; pd->ref_cnt = 0; pd->hs_db = NULL; + pd->cached = false; /* alloc the pattern array */ pd->parray = (SCHSPattern **)SCCalloc(pd->pattern_cnt, sizeof(SCHSPattern *)); @@ -675,7 +679,7 @@ static int CompileDataExtensionsInit(hs_expr_ext_t **ext, const SCHSPattern *p) * \param SCHSCompileData* [in] Pointer to the compile data. * \retval 0 On success, negative value on failure. */ -static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd) +static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd, bool disk_cache) { /* Check global hash table to see if we've seen this pattern database * before, and reuse the Hyperscan database if so. */ @@ -690,6 +694,26 @@ static int PatternDatabaseGetCached(PatternDatabase **pd, SCHSCompileData *cd) CompileDataFree(cd); *pd = pd_cached; return 0; + } else if (disk_cache) { + pd_cached = *pd; + uint64_t db_lookup_hash = HSHashDb(pd_cached); + if (HSLoadCache(&pd_cached->hs_db, db_lookup_hash) == 0) { + pd_cached->ref_cnt = 1; + pd_cached->cached = true; + if (HSScratchAlloc(pd_cached->hs_db) != 0) { + goto recover; + } + if (HashTableAdd(g_db_table, pd_cached, 1) < 0) { + goto recover; + } + CompileDataFree(cd); + return 0; + + recover: + pd_cached->ref_cnt = 0; + pd_cached->cached = false; + return -1; + } } return -1; // not cached @@ -750,6 +774,7 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx, bool cache_to_disk) } HSPatternArrayInit(ctx, pd); + pd->no_cache = cache_to_disk ? false : true; /* Serialise whole database compilation as a relatively easy way to ensure * dedupe is safe. */ SCMutexLock(&g_db_table_mutex); @@ -758,7 +783,7 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx, bool cache_to_disk) goto error; } - if (PatternDatabaseGetCached(&pd, cd) == 0 && pd != NULL) { + if (PatternDatabaseGetCached(&pd, cd, cache_to_disk) == 0 && pd != NULL) { ctx->pattern_db = pd; if (PatternDatabaseGetSize(pd, &ctx->hs_db_size) != 0) { SCMutexUnlock(&g_db_table_mutex); @@ -800,6 +825,22 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx, bool cache_to_disk) return -1; } +/** + * \brief Cache the loaded ruleset + */ +static int SCHSCacheRuleset(void) +{ + SCLogDebug("Caching the loaded ruleset "); + PatternDatabaseCache pd_stats = { 0 }; + SCMutexLock(&g_db_table_mutex); + HashTableIterate(g_db_table, HSSaveCacheIterator, &pd_stats); + SCMutexUnlock(&g_db_table_mutex); + SCLogInfo("%u rule groups cached (%u newly cached) of total %u cacheable groups", + pd_stats.hs_dbs_cache_loaded_cnt + pd_stats.hs_dbs_cache_saved_cnt, + pd_stats.hs_dbs_cache_saved_cnt, pd_stats.hs_cacheable_dbs_cnt); + return 0; +} + /** * \brief Init the mpm thread context. * @@ -1110,7 +1151,7 @@ void MpmHSRegister(void) mpm_table[MPM_HS].AddPattern = SCHSAddPatternCS; mpm_table[MPM_HS].AddPatternNocase = SCHSAddPatternCI; mpm_table[MPM_HS].Prepare = SCHSPreparePatterns; - mpm_table[MPM_HS].CacheRuleset = NULL; + mpm_table[MPM_HS].CacheRuleset = SCHSCacheRuleset; mpm_table[MPM_HS].Search = SCHSSearch; mpm_table[MPM_HS].PrintCtx = SCHSPrintInfo; mpm_table[MPM_HS].PrintThreadCtx = SCHSPrintSearchStats;