From 30704a57b2ba6e8765ccd0b8ec99ffa51d552872 Mon Sep 17 00:00:00 2001 From: Wang Xiang W Date: Thu, 8 Apr 2021 09:21:19 +0000 Subject: [PATCH] Adds hyperscan support to pm operator --- src/Makefile.am | 1 + src/operators/pm.cc | 168 ++++++++++++++++++++++++++++--- src/operators/pm.h | 18 ++-- src/operators/pm_from_file.cc | 11 ++- src/utils/acmp.cc | 128 ------------------------ src/utils/acmp.h | 2 - src/utils/hyperscan.cc | 179 ++++++++++++++++++++++++++++++++++ src/utils/hyperscan.h | 56 +++++++++++ 8 files changed, 406 insertions(+), 157 deletions(-) create mode 100644 src/utils/hyperscan.cc create mode 100644 src/utils/hyperscan.h diff --git a/src/Makefile.am b/src/Makefile.am index 4553dda7af..5f7b2914f0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -245,6 +245,7 @@ UTILS = \ utils/decode.cc \ utils/geo_lookup.cc \ utils/https_client.cc \ + utils/hyperscan.cc \ utils/ip_tree.cc \ utils/md5.cc \ utils/msc_tree.cc \ diff --git a/src/operators/pm.cc b/src/operators/pm.cc index e2212c0d7a..5eff3c7ca2 100644 --- a/src/operators/pm.cc +++ b/src/operators/pm.cc @@ -25,11 +25,6 @@ #include #include -#ifdef WITH_HS -#include -#endif - - #include "src/operators/operator.h" #ifndef WITH_HS #include "src/utils/acmp.h" @@ -41,6 +36,7 @@ namespace operators { Pm::~Pm() { #ifdef WITH_HS + m_hs = NULL; #else acmp_node_t *root = m_p->root_node; @@ -48,10 +44,10 @@ Pm::~Pm() { free(m_p); m_p = NULL; +#endif #ifdef MODSEC_MUTEX_ON_PM pthread_mutex_destroy(&m_lock); #endif -#endif } #ifndef WITH_HS @@ -95,14 +91,20 @@ void Pm::postOrderTraversal(acmp_btree_node_t *node) { bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule, const std::string &input, std::shared_ptr ruleMessage) { + int rc = 0; + const char *match = NULL; #ifdef WITH_HS - return 0; +#ifdef MODSEC_MUTEX_ON_PM + pthread_mutex_lock(&m_lock); +#endif + rc = m_hs->search(input.c_str(), input.length(), &match); +#ifdef MODSEC_MUTEX_ON_PM + pthread_mutex_unlock(&m_lock); +#endif #else - int rc; ACMPT pt; pt.parser = m_p; pt.ptr = NULL; - const char *match = NULL; #ifdef MODSEC_MUTEX_ON_PM pthread_mutex_lock(&m_lock); #endif @@ -110,7 +112,7 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule, #ifdef MODSEC_MUTEX_ON_PM pthread_mutex_unlock(&m_lock); #endif - +#endif if (rc >= 0 && transaction) { std::string match_(match?match:""); logOffset(ruleMessage, rc - match_.size() + 1, match_.size()); @@ -125,16 +127,138 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule, } return rc >= 0; +} + +static +char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) { + char *parm = NULL; + char *content; + unsigned short int offset = 0; +// char converted = 0; + int i, x; + unsigned char bin = 0, esc = 0, bin_offset = 0; + unsigned char c; + unsigned char bin_parm[3] = { 0 }; + char *processed = NULL; + + content = strdup(op_parm); + + if (content == NULL) { + *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); + return NULL; + } + + while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) { + offset++; + }; + + op_len = strlen(content); + + if (content[offset] == '\"' && content[op_len-1] == '\"') { + parm = strdup(content + offset + 1); + if (parm == NULL) { + *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); + free(content); + content = NULL; + return NULL; + } + parm[op_len - offset - 2] = '\0'; + } else { + parm = strdup(content + offset); + if (parm == NULL) { + free(content); + content = NULL; + *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); + return NULL; + } + } + + free(content); + content = NULL; + + op_len = strlen(parm); + + if (op_len == 0) { + *error_msg = "Content length is 0."; + free(parm); + return NULL; + } + + for (i = 0, x = 0; i < op_len; i++) { + if (parm[i] == '|') { + if (bin) { + bin = 0; + } else { + bin = 1; + } + } else if(!esc && parm[i] == '\\') { + esc = 1; + } else { + if (bin) { + if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 || + parm[i] == 3 || parm[i] == 4 || parm[i] == 5 || + parm[i] == 6 || parm[i] == 7 || parm[i] == 8 || + parm[i] == 9 || + parm[i] == 'A' || parm[i] == 'a' || + parm[i] == 'B' || parm[i] == 'b' || + parm[i] == 'C' || parm[i] == 'c' || + parm[i] == 'D' || parm[i] == 'd' || + parm[i] == 'E' || parm[i] == 'e' || + parm[i] == 'F' || parm[i] == 'f') + { + bin_parm[bin_offset] = (char)parm[i]; + bin_offset++; + if (bin_offset == 2) { + c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF; + bin_offset = 0; + parm[x] = c; + x++; + //converted = 1; + } + } else if (parm[i] == ' ') { + } + } else if (esc) { + if (parm[i] == ':' || + parm[i] == ';' || + parm[i] == '\\' || + parm[i] == '\"') + { + parm[x] = parm[i]; + x++; + } else { + *error_msg = std::string("Unsupported escape sequence.").c_str(); + free(parm); + return NULL; + } + esc = 0; + //converted = 1; + } else { + parm[x] = parm[i]; + x++; + } + } + } + +#if 0 + if (converted) { + op_len = x; + } #endif - return 0; -} + //processed = memcpy(processed, parm, op_len); + processed = strdup(parm); + free(parm); + parm = NULL; + if (processed == NULL) { + *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); + return NULL; + } + + return processed; +} bool Pm::init(const std::string &file, std::string *error) { -#ifdef WITH_HS - fprintf(stdout, "Sopport for HS is on the way: %s\n", hs_version()); -#else std::vector vec; std::istringstream *iss; const char *err = NULL; @@ -154,12 +278,25 @@ bool Pm::init(const std::string &file, std::string *error) { back_inserter(vec)); for (auto &a : vec) { +#ifdef WITH_HS + m_hs->addPattern(a.c_str(), a.length()); + } + if (m_hs->compile(error) == false) { + if (content) { + free(content); + content = NULL; + } + delete iss; + return false; + } +#else acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length()); } while (m_p->is_failtree_done == 0) { acmp_prepare(m_p); } +#endif if (content) { free(content); @@ -167,7 +304,6 @@ bool Pm::init(const std::string &file, std::string *error) { } delete iss; -#endif return true; } diff --git a/src/operators/pm.h b/src/operators/pm.h index f664378d45..94bd907fcd 100644 --- a/src/operators/pm.h +++ b/src/operators/pm.h @@ -22,8 +22,11 @@ #include #include "src/operators/operator.h" +#ifdef WITH_HS +#include "src/utils/hyperscan.h" +#else #include "src/utils/acmp.h" - +#endif namespace modsecurity { namespace operators { @@ -34,15 +37,13 @@ class Pm : public Operator { /** @ingroup ModSecurity_Operator */ explicit Pm(std::unique_ptr param) : Operator("Pm", std::move(param)) { -#ifdef WITH_HS -#else +#ifndef WITH_HS m_p = acmp_create(0); #endif } explicit Pm(const std::string &n, std::unique_ptr param) : Operator(n, std::move(param)) { -#ifdef WITH_HS -#else +#ifndef WITH_HS m_p = acmp_create(0); #endif } @@ -59,16 +60,17 @@ class Pm : public Operator { #endif protected: -#ifndef WITH_HS +#ifdef WITH_HS + std::shared_ptr m_hs = + std::make_shared(); +#else ACMP *m_p; #endif private: -#ifndef WITH_HS #ifdef MODSEC_MUTEX_ON_PM pthread_mutex_t m_lock; #endif -#endif }; diff --git a/src/operators/pm_from_file.cc b/src/operators/pm_from_file.cc index 309635ccde..2ff47319e6 100644 --- a/src/operators/pm_from_file.cc +++ b/src/operators/pm_from_file.cc @@ -69,13 +69,18 @@ bool PmFromFile::init(const std::string &config, std::string *error) { for (std::string line; std::getline(*iss, line); ) { if (isComment(line) == false) { #ifdef WITH_HS + m_hs->addPattern(line.c_str(), line.length()); + } + } + if (m_hs->compile(error) == false) { + delete iss; + return false; + } #else acmp_add_pattern(m_p, line.c_str(), NULL, NULL, line.length()); -#endif - } + } } -#ifndef WITH_HS while (m_p->is_failtree_done == 0) { acmp_prepare(m_p); } diff --git a/src/utils/acmp.cc b/src/utils/acmp.cc index 2bfe7acb90..526e74af09 100644 --- a/src/utils/acmp.cc +++ b/src/utils/acmp.cc @@ -35,134 +35,6 @@ */ extern "C" { -char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) { - char *parm = NULL; - char *content; - unsigned short int offset = 0; -// char converted = 0; - int i, x; - unsigned char bin = 0, esc = 0, bin_offset = 0; - unsigned char c = 0; - unsigned char bin_parm[3] = { 0 }; - char *processed = NULL; - - content = strdup(op_parm); - - if (content == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - - while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) { - offset++; - }; - - op_len = strlen(content); - - if (content[offset] == '\"' && content[op_len-1] == '\"') { - parm = strdup(content + offset + 1); - if (parm == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - free(content); - content = NULL; - return NULL; - } - parm[op_len - offset - 2] = '\0'; - } else { - parm = strdup(content + offset); - if (parm == NULL) { - free(content); - content = NULL; - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - } - - free(content); - content = NULL; - - op_len = strlen(parm); - - if (op_len == 0) { - *error_msg = "Content length is 0."; - free(parm); - return NULL; - } - - for (i = 0, x = 0; i < op_len; i++) { - if (parm[i] == '|') { - if (bin) { - bin = 0; - } else { - bin = 1; - } - } else if(!esc && parm[i] == '\\') { - esc = 1; - } else { - if (bin) { - if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 || - parm[i] == 3 || parm[i] == 4 || parm[i] == 5 || - parm[i] == 6 || parm[i] == 7 || parm[i] == 8 || - parm[i] == 9 || - parm[i] == 'A' || parm[i] == 'a' || - parm[i] == 'B' || parm[i] == 'b' || - parm[i] == 'C' || parm[i] == 'c' || - parm[i] == 'D' || parm[i] == 'd' || - parm[i] == 'E' || parm[i] == 'e' || - parm[i] == 'F' || parm[i] == 'f') - { - bin_parm[bin_offset] = (char)parm[i]; - bin_offset++; - if (bin_offset == 2) { - c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF; - bin_offset = 0; - parm[x] = c; - x++; - //converted = 1; - } - } else if (parm[i] == ' ') { - } - } else if (esc) { - if (parm[i] == ':' || - parm[i] == ';' || - parm[i] == '\\' || - parm[i] == '\"') - { - parm[x] = parm[i]; - x++; - } else { - *error_msg = std::string("Unsupported escape sequence.").c_str(); - free(parm); - return NULL; - } - esc = 0; - //converted = 1; - } else { - parm[x] = parm[i]; - x++; - } - } - } - -#if 0 - if (converted) { - op_len = x; - } -#endif - - //processed = memcpy(processed, parm, op_len); - processed = strdup(parm); - free(parm); - parm = NULL; - - if (processed == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - - return processed; -} - /* ******************************************************************************* ******************************************************************************* diff --git a/src/utils/acmp.h b/src/utils/acmp.h index 1af454e1df..be311be4ed 100644 --- a/src/utils/acmp.h +++ b/src/utils/acmp.h @@ -189,8 +189,6 @@ int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_ */ int acmp_prepare(ACMP *parser); -char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg); - } #endif /*ACMP_H_*/ diff --git a/src/utils/hyperscan.cc b/src/utils/hyperscan.cc new file mode 100644 index 0000000000..7cfd6b1bcc --- /dev/null +++ b/src/utils/hyperscan.cc @@ -0,0 +1,179 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include +#include + +#ifdef WITH_HS +#include "hyperscan.h" + +namespace modsecurity { +namespace Utils { + +// Render the given literal as a hex-escaped pattern. +static +std::string makeHex(const char *pat, const size_t patLen) { + std::string hexPattern; + + char hex[5]; + for (size_t i = 0; i < patLen; i++) { + snprintf(hex, 5, "\\x%02x", (unsigned char)pat[i]); + hexPattern += hex; + } + return hexPattern; +} + +HyperscanPattern::HyperscanPattern(const char *pat, size_t patLen, + unsigned int patId) : + pattern(pat), len(patLen), id(patId) {} + +HyperscanPm::~HyperscanPm() { + if (db) { + hs_free_database(db); + } + if (scratch) { + hs_free_scratch(scratch); + } +} + +void HyperscanPm::addPattern(const char *pat, size_t patLen) { + if (patLen == 0) { + return; + } + + HyperscanPattern p(pat, patLen, num_patterns++); + patterns.emplace_back(p); +} + +bool HyperscanPm::compile(std::string *error) { + if (patterns.empty()) { + return false; + } + + if (hs_valid_platform() != HS_SUCCESS ) + { + error->assign("This host does not support Hyperscan."); + return false; + } + + // Convert literal to its hex-escaped format. + std::vector hexPats; + for (const auto &p : patterns) { + hexPats.emplace_back(makeHex(p.pattern.c_str(), p.len)); + } + + // The Hyperscan compiler takes its patterns in a group of arrays. + std::vector pats; + std::vector flags(num_patterns, HS_FLAG_CASELESS); + std::vector ids; + + int i = 0; + for (const auto &p : patterns) { + pats.emplace_back(hexPats[i++].c_str()); + ids.emplace_back(p.id); + } + + hs_compile_error_t *compile_error = NULL; + hs_error_t hs_error = hs_compile_multi(&pats[0], &flags[0], &ids[0], + num_patterns, HS_MODE_BLOCK, NULL, &db, &compile_error); + + if (compile_error != NULL) { + std::string message(compile_error->message); + std::string expression = std::to_string(compile_error->expression); + error->assign("hs_compile_multi() failed: " + message + + "(expression: " + expression + ")"); + hs_free_compile_error(compile_error); + return false; + } + + if (hs_error != HS_SUCCESS) { + error->assign("hs_compile_multi() failed: error " + + std::to_string(hs_error)); + return false; + } + + // Allocate Hyperscan scratch space for this database. + hs_error = hs_alloc_scratch(db, &scratch); + + if (hs_error != HS_SUCCESS) { + error->assign("hs_alloc_scratch() failed: error " + + std::to_string(hs_error)); + return false; + } + + size_t scratch_size = 0; + hs_error = hs_scratch_size(scratch, &scratch_size); + if (hs_error != HS_SUCCESS) { + error->assign("hs_scratch_size() failed: error " + + std::to_string(hs_error)); + return false; + } + + size_t db_size = 0; + hs_error = hs_database_size(db, &db_size); + if (hs_error != HS_SUCCESS) { + error->assign("hs_database_size() failed: error " + + std::to_string(hs_error)); + return false; + } + + return true; +} + +// Context data used by Hyperscan match callback. +struct HyperscanCallbackContext { + HyperscanPm *pm; + unsigned int num_matches; + unsigned int offset; + const char **match; +}; + +// Match callback, called by hs_scan for every match. +static +int onMatch(unsigned int id, unsigned long long from, unsigned long long to, + unsigned int flags, void *hs_ctx) { + HyperscanCallbackContext *ctx = static_cast(hs_ctx); + + ctx->num_matches++; + ctx->offset = (unsigned int)to - 1; + *ctx->match = ctx->pm->getPatternById(id); + return 1; // Terminate matching. +} + +int HyperscanPm::search(const char *t, unsigned int tlen, const char **match) { + HyperscanCallbackContext ctx; + ctx.pm = this; + ctx.num_matches = 0; + ctx.offset = 0; + ctx.match = match; + + hs_error_t error = hs_scan(db, t, tlen, 0, scratch, onMatch, &ctx); + + if (error != HS_SCAN_TERMINATED) { + // TODO add debug output + return -1; + } + + return ctx.num_matches > 0 ? ctx.offset : -1; +} + +const char *HyperscanPm::getPatternById(unsigned int patId) const { + return patterns[patId].pattern.c_str(); +} + +} // namespace Utils +} // namespace modsecurity + +#endif diff --git a/src/utils/hyperscan.h b/src/utils/hyperscan.h new file mode 100644 index 0000000000..663f239efb --- /dev/null +++ b/src/utils/hyperscan.h @@ -0,0 +1,56 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include +#include + +#ifdef WITH_HS +#include + +namespace modsecurity { +namespace Utils { + +struct HyperscanPattern { + HyperscanPattern(const char *pat, size_t patLen, unsigned int patId); + + std::string pattern; + size_t len; + unsigned int id; /* actual pattern id */ +}; + +class HyperscanPm { + public: + ~HyperscanPm(); + + void addPattern(const char *pat, size_t patLen); + + bool compile(std::string *error); + + int search(const char *t, unsigned int tlen, const char **match); + + const char *getPatternById(unsigned int patId) const; + + private: + hs_database_t *db = nullptr; + // Scratch space for Hyperscan. + hs_scratch_t *scratch = nullptr; + unsigned int num_patterns = 0; // number of elements + std::vector patterns; +}; + +} // namespace Utils +} // namespace modsecurity + +#endif // WITH_HS