Skip to content

Commit 590fb85

Browse files
authored
Adds new partial string matching to header_rewrite (#12213)
1 parent f8eaec3 commit 590fb85

File tree

5 files changed

+154
-43
lines changed

5 files changed

+154
-43
lines changed

doc/admin-guide/plugins/header_rewrite.en.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,17 @@ OR Indicates that either the current condition or the next one must be
752752
true, as contrasted with the default behavior from ``[AND]``.
753753
NOCASE Indicates that the string comparison, or regular expression, should be
754754
case-insensitive. The default is to be case-sensitive.
755+
PRE Make a prefix match on a string comparison.
756+
SUF Make a suffix match on a string comparison.
757+
MID Make a substring match on a string comparison.
758+
EXT The substring match only applies to the file extension following a dot.
759+
This is generally mostly useful for the ``URL:PATH`` part.
755760
====== ========================================================================
756761

762+
**Note**: At most, one of ``[PRE]``, ``[SUF]``, ``[MID]``, or ``[EXT]`` may be
763+
used at any time. They can however be used together with ``[NOCASE]]`` and the
764+
other flags.
765+
757766
Operators
758767
---------
759768

@@ -1594,3 +1603,12 @@ limiting to the request.::
15941603
cond %{REMAP_PSEUDO_HOOK} [AND]
15951604
cond %{CLIENT-HEADER:Some-Special-Header} ="yes"
15961605
run-plugin rate_limit.so "--limit=300 --error=429"
1606+
1607+
Check the ``PATH`` file extension
1608+
---------------------------------
1609+
1610+
This rule will deny all requests for URIs with the ``.php`` file extension::
1611+
1612+
cond %{REMAP_PSEUDO_HOOK} [AND]
1613+
cond %{CLIENT-URL:PATH} ="php" [EXT,NOCASE]
1614+
set-status 403

plugins/header_rewrite/condition.cc

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,50 @@ Condition::initialize(Parser &p)
7575
if (p.mod_exist("AND")) {
7676
TSError("[%s] Can't have both AND and OR in mods", PLUGIN_NAME);
7777
} else {
78-
_mods = static_cast<CondModifiers>(_mods | COND_OR);
78+
_mods |= CondModifiers::OR;
7979
}
8080
} else if (p.mod_exist("AND")) {
81-
_mods = static_cast<CondModifiers>(_mods | COND_AND);
81+
_mods |= CondModifiers::AND;
8282
}
8383

8484
if (p.mod_exist("NOT")) {
85-
_mods = static_cast<CondModifiers>(_mods | COND_NOT);
85+
_mods |= CondModifiers::NOT;
8686
}
8787

8888
// The NOCASE / CASE modifier is a bit special, since it ripples down into the Matchers for
8989
// strings and regexes.
90+
int _substr_seen = 0;
91+
9092
if (p.mod_exist("NOCASE")) {
91-
_mods = static_cast<CondModifiers>(_mods | COND_NOCASE);
93+
_mods |= CondModifiers::MOD_NOCASE;
9294
} else if (p.mod_exist("CASE")) {
93-
// Nothing to do, this is the default
95+
// Nothing to do — default is case-sensitive, but still allow this string for clearness.
96+
}
97+
98+
if (p.mod_exist("EXT")) {
99+
_mods |= CondModifiers::MOD_EXT;
100+
_substr_seen++;
101+
}
102+
if (p.mod_exist("SUF")) {
103+
_mods |= CondModifiers::MOD_SUF;
104+
_substr_seen++;
105+
}
106+
if (p.mod_exist("PRE")) {
107+
_mods |= CondModifiers::MOD_PRE;
108+
_substr_seen++;
109+
}
110+
if (p.mod_exist("MID")) {
111+
_mods |= CondModifiers::MOD_MID;
112+
_substr_seen++;
113+
}
114+
115+
if (_substr_seen > 1) {
116+
throw std::runtime_error("Only one substring modifier (EXT, SUF, PRE, MID) may be used.");
94117
}
95118

119+
// Deal with the "last" modifier as well.
96120
if (p.mod_exist("L")) {
97-
_mods = static_cast<CondModifiers>(_mods | COND_LAST);
121+
_mods |= CondModifiers::MOD_L;
98122
}
99123

100124
_cond_op = parse_matcher_op(p.get_arg());

plugins/header_rewrite/condition.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ class Condition : public Statement
5454
{
5555
bool rt = eval(res);
5656

57-
if (_mods & COND_NOT) {
57+
if (has_modifier(_mods, CondModifiers::NOT)) {
5858
rt = !rt;
5959
}
6060

6161
if (_next) {
62-
if (_mods & COND_OR) {
62+
if (has_modifier(_mods, CondModifiers::OR)) {
6363
return rt || (static_cast<Condition *>(_next)->do_eval(res));
6464
} else { // AND is the default
6565
// Short circuit if we're an AND and the first condition is FALSE.
@@ -79,7 +79,7 @@ class Condition : public Statement
7979
bool
8080
last() const
8181
{
82-
return _mods & COND_LAST;
82+
return has_modifier(_mods, CondModifiers::MOD_L);
8383
}
8484

8585
CondModifiers
@@ -129,5 +129,5 @@ class Condition : public Statement
129129
Matcher *_matcher = nullptr;
130130

131131
private:
132-
CondModifiers _mods = COND_NONE;
132+
CondModifiers _mods = CondModifiers::NONE;
133133
};

plugins/header_rewrite/matcher.cc

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@ void
3232
Matchers<std::string>::set(const std::string &d, CondModifiers mods)
3333
{
3434
_data = d;
35-
if (mods & COND_NOCASE) {
36-
_nocase = true;
37-
}
35+
_mods = mods;
3836

3937
if (_op == MATCH_REGULAR_EXPRESSION) {
40-
if (!_reHelper.setRegexMatch(_data, _nocase)) {
38+
if (!_reHelper.setRegexMatch(_data, has_modifier(_mods, CondModifiers::MOD_NOCASE))) {
4139
std::stringstream ss;
4240

4341
ss << _data;
@@ -50,28 +48,63 @@ Matchers<std::string>::set(const std::string &d, CondModifiers mods)
5048
}
5149
}
5250

53-
// Special case for strings, to allow for insensitive case comparisons for std::string matchers.
5451
template <>
5552
bool
5653
Matchers<std::string>::test_eq(const std::string &t) const
5754
{
58-
bool r = false;
59-
60-
if (_data.length() == t.length()) {
61-
if (_nocase) {
62-
// ToDo: in C++20, this would be nicer with std::range, e.g.
63-
// r = std::ranges::equal(_data, t, [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); });
64-
r = std::equal(_data.begin(), _data.end(), t.begin(), [](char c1, char c2) {
65-
return std::tolower(static_cast<unsigned char>(c1)) == std::tolower(static_cast<unsigned char>(c2));
66-
});
67-
} else {
68-
r = (t == _data);
55+
std::string_view lhs = _data;
56+
std::string_view rhs = t;
57+
bool result = false;
58+
59+
// ToDo: in C++20, we should be able to use std::ranges::equal, but this breaks on Ubuntu CI
60+
// return std::ranges::equal(a, b, [](char c1, char c2) {
61+
// return std::tolower(static_cast<unsigned char>(c1)) == std::tolower(static_cast<unsigned char>(c2));
62+
// });
63+
// Case-aware comparison
64+
auto compare = [&](const std::string_view a, const std::string_view b) -> bool {
65+
if (has_modifier(_mods, CondModifiers::MOD_NOCASE)) {
66+
return a.size() == b.size() && std::equal(a.begin(), a.end(), b.begin(), [](char c1, char c2) {
67+
return std::tolower(static_cast<unsigned char>(c1)) == std::tolower(static_cast<unsigned char>(c2));
68+
});
69+
}
70+
return a == b;
71+
};
72+
73+
// Case-aware substring match
74+
auto contains = [&](const std::string_view haystack, const std::string_view &needle) -> bool {
75+
if (!has_modifier(_mods, CondModifiers::MOD_NOCASE)) {
76+
return haystack.find(needle) != std::string_view::npos;
77+
}
78+
auto it = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end(), [](char c1, char c2) {
79+
return std::tolower(static_cast<unsigned char>(c1)) == std::tolower(static_cast<unsigned char>(c2));
80+
});
81+
return it != haystack.end();
82+
};
83+
84+
if (has_modifier(_mods, CondModifiers::MOD_EXT)) {
85+
auto dot = rhs.rfind('.');
86+
if (dot != std::string_view::npos && dot + 1 < rhs.size()) {
87+
result = compare(rhs.substr(dot + 1), lhs);
88+
}
89+
} else if (has_modifier(_mods, CondModifiers::MOD_SUF)) {
90+
if (rhs.size() >= lhs.size()) {
91+
result = compare(rhs.substr(rhs.size() - lhs.size()), lhs);
92+
}
93+
} else if (has_modifier(_mods, CondModifiers::MOD_PRE)) {
94+
if (rhs.size() >= lhs.size()) {
95+
result = compare(rhs.substr(0, lhs.size()), lhs);
96+
}
97+
} else if (has_modifier(_mods, CondModifiers::MOD_MID)) {
98+
result = contains(rhs, lhs);
99+
} else {
100+
if (rhs.size() == lhs.size()) {
101+
result = compare(rhs, lhs);
69102
}
70103
}
71104

72105
if (pi_dbg_ctl.on()) {
73-
debug_helper(t, " == ", r);
106+
debug_helper(t, " == ", result);
74107
}
75108

76-
return r;
109+
return result;
77110
}

plugins/header_rewrite/matcher.h

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <string>
2525
#include <sstream>
2626
#include <stdexcept>
27+
#include <type_traits>
2728

2829
#include "swoc/swoc_ip.h"
2930

@@ -44,16 +45,52 @@ enum MatcherOps {
4445
};
4546

4647
// Condition modifiers
47-
enum CondModifiers {
48-
COND_NONE = 0,
49-
COND_OR = 1,
50-
COND_AND = 2,
51-
COND_NOT = 4,
52-
COND_NOCASE = 8,
53-
COND_LAST = 16,
54-
COND_CHAIN = 32 // Not implemented
48+
enum class CondModifiers : int {
49+
NONE = 0,
50+
OR = 1 << 0,
51+
AND = 1 << 1,
52+
NOT = 1 << 2,
53+
MOD_NOCASE = 1 << 3,
54+
MOD_L = 1 << 4,
55+
MOD_EXT = 1 << 5,
56+
MOD_PRE = 1 << 6,
57+
MOD_SUF = 1 << 7,
58+
MOD_MID = 1 << 8, // Essentially a substring
5559
};
5660

61+
inline CondModifiers
62+
operator|(CondModifiers a, const CondModifiers b)
63+
{
64+
using U = std::underlying_type_t<CondModifiers>;
65+
return static_cast<CondModifiers>(static_cast<U>(a) | static_cast<U>(b));
66+
}
67+
68+
inline CondModifiers
69+
operator&(CondModifiers a, const CondModifiers b)
70+
{
71+
using U = std::underlying_type_t<CondModifiers>;
72+
return static_cast<CondModifiers>(static_cast<U>(a) & static_cast<U>(b));
73+
}
74+
75+
inline CondModifiers &
76+
operator|=(CondModifiers &a, const CondModifiers b)
77+
{
78+
return a = a | b;
79+
}
80+
81+
inline CondModifiers &
82+
operator&=(CondModifiers &a, const CondModifiers b)
83+
{
84+
return a = a & b;
85+
}
86+
87+
inline bool
88+
has_modifier(const CondModifiers flags, const CondModifiers bit)
89+
{
90+
using U = std::underlying_type_t<CondModifiers>;
91+
return static_cast<U>(flags) & static_cast<U>(bit);
92+
}
93+
5794
///////////////////////////////////////////////////////////////////////////////
5895
// Base class for all Matchers (this is also the interface)
5996
//
@@ -93,9 +130,7 @@ template <class T> class Matchers : public Matcher
93130
set(const T &d, CondModifiers mods)
94131
{
95132
_data = d;
96-
if (mods & COND_NOCASE) {
97-
_nocase = true;
98-
}
133+
_mods = mods;
99134
}
100135

101136
// Evaluate this matcher
@@ -191,7 +226,8 @@ template <class T> class Matchers : public Matcher
191226
bool
192227
test_reg(const std::string &t, const Resources &res) const
193228
{
194-
Dbg(pi_dbg_ctl, "Test regular expression %s : %s (NOCASE = %d)", _data.c_str(), t.c_str(), static_cast<int>(_nocase));
229+
Dbg(pi_dbg_ctl, "Test regular expression %s : %s (NOCASE = %s)", _data.c_str(), t.c_str(),
230+
has_modifier(_mods, CondModifiers::MOD_NOCASE) ? "true" : "false");
195231
int count = _reHelper.regexMatch(t.c_str(), t.length(), const_cast<Resources &>(res).ovector);
196232

197233
if (count > 0) {
@@ -205,9 +241,9 @@ template <class T> class Matchers : public Matcher
205241
return false;
206242
}
207243

208-
T _data;
209-
regexHelper _reHelper;
210-
bool _nocase = false;
244+
T _data;
245+
regexHelper _reHelper;
246+
CondModifiers _mods = CondModifiers::NONE;
211247
};
212248

213249
// Specializations for the strings, since they can be both strings and regexes

0 commit comments

Comments
 (0)