Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds hyperscan support to pm operator #2551

Open
wants to merge 1 commit into
base: v3/dev/hyperscan
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
@@ -245,6 +245,7 @@ UTILS = \
utils/decode.cc \
utils/geo_lookup.cc \
utils/https_client.cc \
utils/hyperscan.cc \
utils/ip_tree.cc \
utils/md5.cc \
utils/msc_tree.cc \
168 changes: 152 additions & 16 deletions src/operators/pm.cc
Original file line number Diff line number Diff line change
@@ -25,11 +25,6 @@
#include <list>
#include <memory>

#ifdef WITH_HS
#include <hs.h>
#endif


#include "src/operators/operator.h"
#ifndef WITH_HS
#include "src/utils/acmp.h"
@@ -41,17 +36,18 @@ namespace operators {

Pm::~Pm() {
#ifdef WITH_HS
m_hs = NULL;
#else
acmp_node_t *root = m_p->root_node;

cleanup(root);

free(m_p);
m_p = NULL;
#endif
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_destroy(&m_lock);
#endif
#endif
}

#ifndef WITH_HS
@@ -95,22 +91,28 @@ void Pm::postOrderTraversal(acmp_btree_node_t *node) {

bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule,
const std::string &input, std::shared_ptr<RuleMessage> ruleMessage) {
int rc = 0;
const char *match = NULL;
#ifdef WITH_HS
return 0;
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_lock(&m_lock);
#endif
rc = m_hs->search(input.c_str(), input.length(), &match);
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_unlock(&m_lock);
#endif
#else
int rc;
ACMPT pt;
pt.parser = m_p;
pt.ptr = NULL;
const char *match = NULL;
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_lock(&m_lock);
#endif
rc = acmp_process_quick(&pt, &match, input.c_str(), input.length());
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_unlock(&m_lock);
#endif

#endif
if (rc >= 0 && transaction) {
std::string match_(match?match:"");
logOffset(ruleMessage, rc - match_.size() + 1, match_.size());
@@ -125,16 +127,138 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule,
}

return rc >= 0;
}

static
char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) {
char *parm = NULL;
char *content;
unsigned short int offset = 0;
// char converted = 0;
int i, x;
unsigned char bin = 0, esc = 0, bin_offset = 0;
unsigned char c;
unsigned char bin_parm[3] = { 0 };
char *processed = NULL;

content = strdup(op_parm);

if (content == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}

while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) {
offset++;
};

op_len = strlen(content);

if (content[offset] == '\"' && content[op_len-1] == '\"') {
parm = strdup(content + offset + 1);
if (parm == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
free(content);
content = NULL;
return NULL;
}
parm[op_len - offset - 2] = '\0';
} else {
parm = strdup(content + offset);
if (parm == NULL) {
free(content);
content = NULL;
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
}

free(content);
content = NULL;

op_len = strlen(parm);

if (op_len == 0) {
*error_msg = "Content length is 0.";
free(parm);
return NULL;
}

for (i = 0, x = 0; i < op_len; i++) {
if (parm[i] == '|') {
if (bin) {
bin = 0;
} else {
bin = 1;
}
} else if(!esc && parm[i] == '\\') {
esc = 1;
} else {
if (bin) {
if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 ||
parm[i] == 3 || parm[i] == 4 || parm[i] == 5 ||
parm[i] == 6 || parm[i] == 7 || parm[i] == 8 ||
parm[i] == 9 ||
parm[i] == 'A' || parm[i] == 'a' ||
parm[i] == 'B' || parm[i] == 'b' ||
parm[i] == 'C' || parm[i] == 'c' ||
parm[i] == 'D' || parm[i] == 'd' ||
parm[i] == 'E' || parm[i] == 'e' ||
parm[i] == 'F' || parm[i] == 'f')
{
bin_parm[bin_offset] = (char)parm[i];
bin_offset++;
if (bin_offset == 2) {
c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF;
bin_offset = 0;
parm[x] = c;
x++;
//converted = 1;
}
} else if (parm[i] == ' ') {
}
} else if (esc) {
if (parm[i] == ':' ||
parm[i] == ';' ||
parm[i] == '\\' ||
parm[i] == '\"')
{
parm[x] = parm[i];
x++;
} else {
*error_msg = std::string("Unsupported escape sequence.").c_str();
free(parm);
return NULL;
}
esc = 0;
//converted = 1;
} else {
parm[x] = parm[i];
x++;
}
}
}

#if 0
if (converted) {
op_len = x;
}
#endif

return 0;
}
//processed = memcpy(processed, parm, op_len);
processed = strdup(parm);
free(parm);
parm = NULL;

if (processed == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}

return processed;
}

bool Pm::init(const std::string &file, std::string *error) {
#ifdef WITH_HS
fprintf(stdout, "Sopport for HS is on the way: %s\n", hs_version());
#else
std::vector<std::string> vec;
std::istringstream *iss;
const char *err = NULL;
@@ -154,20 +278,32 @@ bool Pm::init(const std::string &file, std::string *error) {
back_inserter(vec));

for (auto &a : vec) {
#ifdef WITH_HS
m_hs->addPattern(a.c_str(), a.length());
}
if (m_hs->compile(error) == false) {
if (content) {
free(content);
content = NULL;
}
delete iss;
return false;
}
#else
acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length());
}

while (m_p->is_failtree_done == 0) {
acmp_prepare(m_p);
}
#endif

if (content) {
free(content);
content = NULL;
}

delete iss;
#endif
return true;
}

18 changes: 10 additions & 8 deletions src/operators/pm.h
Original file line number Diff line number Diff line change
@@ -22,8 +22,11 @@
#include <utility>

#include "src/operators/operator.h"
#ifdef WITH_HS
#include "src/utils/hyperscan.h"
#else
#include "src/utils/acmp.h"

#endif

namespace modsecurity {
namespace operators {
@@ -34,15 +37,13 @@ class Pm : public Operator {
/** @ingroup ModSecurity_Operator */
explicit Pm(std::unique_ptr<RunTimeString> param)
: Operator("Pm", std::move(param)) {
#ifdef WITH_HS
#else
#ifndef WITH_HS
m_p = acmp_create(0);
#endif
}
explicit Pm(const std::string &n, std::unique_ptr<RunTimeString> param)
: Operator(n, std::move(param)) {
#ifdef WITH_HS
#else
#ifndef WITH_HS
m_p = acmp_create(0);
#endif
}
@@ -59,16 +60,17 @@ class Pm : public Operator {
#endif

protected:
#ifndef WITH_HS
#ifdef WITH_HS
std::shared_ptr<Utils::HyperscanPm> m_hs =
std::make_shared<Utils::HyperscanPm>();
#else
ACMP *m_p;
#endif

private:
#ifndef WITH_HS
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_t m_lock;
#endif
#endif

};

11 changes: 8 additions & 3 deletions src/operators/pm_from_file.cc
Original file line number Diff line number Diff line change
@@ -69,13 +69,18 @@ bool PmFromFile::init(const std::string &config, std::string *error) {
for (std::string line; std::getline(*iss, line); ) {
if (isComment(line) == false) {
#ifdef WITH_HS
m_hs->addPattern(line.c_str(), line.length());
}
}
if (m_hs->compile(error) == false) {
delete iss;
return false;
}
#else
acmp_add_pattern(m_p, line.c_str(), NULL, NULL, line.length());
#endif
}
}
}

#ifndef WITH_HS
while (m_p->is_failtree_done == 0) {
acmp_prepare(m_p);
}
128 changes: 0 additions & 128 deletions src/utils/acmp.cc
Original file line number Diff line number Diff line change
@@ -35,134 +35,6 @@
*/
extern "C" {

char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) {
char *parm = NULL;
char *content;
unsigned short int offset = 0;
// char converted = 0;
int i, x;
unsigned char bin = 0, esc = 0, bin_offset = 0;
unsigned char c = 0;
unsigned char bin_parm[3] = { 0 };
char *processed = NULL;

content = strdup(op_parm);

if (content == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}

while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) {
offset++;
};

op_len = strlen(content);

if (content[offset] == '\"' && content[op_len-1] == '\"') {
parm = strdup(content + offset + 1);
if (parm == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
free(content);
content = NULL;
return NULL;
}
parm[op_len - offset - 2] = '\0';
} else {
parm = strdup(content + offset);
if (parm == NULL) {
free(content);
content = NULL;
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
}

free(content);
content = NULL;

op_len = strlen(parm);

if (op_len == 0) {
*error_msg = "Content length is 0.";
free(parm);
return NULL;
}

for (i = 0, x = 0; i < op_len; i++) {
if (parm[i] == '|') {
if (bin) {
bin = 0;
} else {
bin = 1;
}
} else if(!esc && parm[i] == '\\') {
esc = 1;
} else {
if (bin) {
if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 ||
parm[i] == 3 || parm[i] == 4 || parm[i] == 5 ||
parm[i] == 6 || parm[i] == 7 || parm[i] == 8 ||
parm[i] == 9 ||
parm[i] == 'A' || parm[i] == 'a' ||
parm[i] == 'B' || parm[i] == 'b' ||
parm[i] == 'C' || parm[i] == 'c' ||
parm[i] == 'D' || parm[i] == 'd' ||
parm[i] == 'E' || parm[i] == 'e' ||
parm[i] == 'F' || parm[i] == 'f')
{
bin_parm[bin_offset] = (char)parm[i];
bin_offset++;
if (bin_offset == 2) {
c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF;
bin_offset = 0;
parm[x] = c;
x++;
//converted = 1;
}
} else if (parm[i] == ' ') {
}
} else if (esc) {
if (parm[i] == ':' ||
parm[i] == ';' ||
parm[i] == '\\' ||
parm[i] == '\"')
{
parm[x] = parm[i];
x++;
} else {
*error_msg = std::string("Unsupported escape sequence.").c_str();
free(parm);
return NULL;
}
esc = 0;
//converted = 1;
} else {
parm[x] = parm[i];
x++;
}
}
}

#if 0
if (converted) {
op_len = x;
}
#endif

//processed = memcpy(processed, parm, op_len);
processed = strdup(parm);
free(parm);
parm = NULL;

if (processed == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}

return processed;
}

/*
*******************************************************************************
*******************************************************************************
2 changes: 0 additions & 2 deletions src/utils/acmp.h
Original file line number Diff line number Diff line change
@@ -189,8 +189,6 @@ int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_
*/
int acmp_prepare(ACMP *parser);

char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg);

}

#endif /*ACMP_H_*/
179 changes: 179 additions & 0 deletions src/utils/hyperscan.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/

#include <string>
#include <vector>

#ifdef WITH_HS
#include "hyperscan.h"

namespace modsecurity {
namespace Utils {

// Render the given literal as a hex-escaped pattern.
static
std::string makeHex(const char *pat, const size_t patLen) {
std::string hexPattern;

char hex[5];
for (size_t i = 0; i < patLen; i++) {
snprintf(hex, 5, "\\x%02x", (unsigned char)pat[i]);
hexPattern += hex;
}
return hexPattern;
}

HyperscanPattern::HyperscanPattern(const char *pat, size_t patLen,
unsigned int patId) :
pattern(pat), len(patLen), id(patId) {}

HyperscanPm::~HyperscanPm() {
if (db) {
hs_free_database(db);
}
if (scratch) {
hs_free_scratch(scratch);
}
}

void HyperscanPm::addPattern(const char *pat, size_t patLen) {
if (patLen == 0) {
return;
}

HyperscanPattern p(pat, patLen, num_patterns++);
patterns.emplace_back(p);
}

bool HyperscanPm::compile(std::string *error) {
if (patterns.empty()) {
return false;
}

if (hs_valid_platform() != HS_SUCCESS )
{
error->assign("This host does not support Hyperscan.");
return false;
}

// Convert literal to its hex-escaped format.
std::vector<std::string> hexPats;
for (const auto &p : patterns) {
hexPats.emplace_back(makeHex(p.pattern.c_str(), p.len));
}

// The Hyperscan compiler takes its patterns in a group of arrays.
std::vector<const char *> pats;
std::vector<unsigned> flags(num_patterns, HS_FLAG_CASELESS);
std::vector<unsigned> ids;

int i = 0;
for (const auto &p : patterns) {
pats.emplace_back(hexPats[i++].c_str());
ids.emplace_back(p.id);
}

hs_compile_error_t *compile_error = NULL;
hs_error_t hs_error = hs_compile_multi(&pats[0], &flags[0], &ids[0],
num_patterns, HS_MODE_BLOCK, NULL, &db, &compile_error);

if (compile_error != NULL) {
std::string message(compile_error->message);
std::string expression = std::to_string(compile_error->expression);
error->assign("hs_compile_multi() failed: " + message +
"(expression: " + expression + ")");
hs_free_compile_error(compile_error);
return false;
}

if (hs_error != HS_SUCCESS) {
error->assign("hs_compile_multi() failed: error " +
std::to_string(hs_error));
return false;
}

// Allocate Hyperscan scratch space for this database.
hs_error = hs_alloc_scratch(db, &scratch);

if (hs_error != HS_SUCCESS) {
error->assign("hs_alloc_scratch() failed: error " +
std::to_string(hs_error));
return false;
}

size_t scratch_size = 0;
hs_error = hs_scratch_size(scratch, &scratch_size);
if (hs_error != HS_SUCCESS) {
error->assign("hs_scratch_size() failed: error " +
std::to_string(hs_error));
return false;
}

size_t db_size = 0;
hs_error = hs_database_size(db, &db_size);
if (hs_error != HS_SUCCESS) {
error->assign("hs_database_size() failed: error " +
std::to_string(hs_error));
return false;
}

return true;
}

// Context data used by Hyperscan match callback.
struct HyperscanCallbackContext {
HyperscanPm *pm;
unsigned int num_matches;
unsigned int offset;
const char **match;
};

// Match callback, called by hs_scan for every match.
static
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
unsigned int flags, void *hs_ctx) {
HyperscanCallbackContext *ctx = static_cast<HyperscanCallbackContext *>(hs_ctx);

ctx->num_matches++;
ctx->offset = (unsigned int)to - 1;
*ctx->match = ctx->pm->getPatternById(id);
return 1; // Terminate matching.
}

int HyperscanPm::search(const char *t, unsigned int tlen, const char **match) {
HyperscanCallbackContext ctx;
ctx.pm = this;
ctx.num_matches = 0;
ctx.offset = 0;
ctx.match = match;

hs_error_t error = hs_scan(db, t, tlen, 0, scratch, onMatch, &ctx);

if (error != HS_SCAN_TERMINATED) {
// TODO add debug output
return -1;
}

return ctx.num_matches > 0 ? ctx.offset : -1;
}

const char *HyperscanPm::getPatternById(unsigned int patId) const {
return patterns[patId].pattern.c_str();
}

} // namespace Utils
} // namespace modsecurity

#endif
56 changes: 56 additions & 0 deletions src/utils/hyperscan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/

#include <string>
#include <vector>

#ifdef WITH_HS
#include <hs.h>

namespace modsecurity {
namespace Utils {

struct HyperscanPattern {
HyperscanPattern(const char *pat, size_t patLen, unsigned int patId);

std::string pattern;
size_t len;
unsigned int id; /* actual pattern id */
};

class HyperscanPm {
public:
~HyperscanPm();

void addPattern(const char *pat, size_t patLen);

bool compile(std::string *error);

int search(const char *t, unsigned int tlen, const char **match);

const char *getPatternById(unsigned int patId) const;

private:
hs_database_t *db = nullptr;
// Scratch space for Hyperscan.
hs_scratch_t *scratch = nullptr;
unsigned int num_patterns = 0; // number of elements
std::vector<HyperscanPattern> patterns;
};

} // namespace Utils
} // namespace modsecurity

#endif // WITH_HS