Skip to content

Commit

Permalink
refactor: try_parse_re for repetition logic
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanbohan committed Nov 9, 2023
1 parent 6d752e7 commit 8028359
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 111 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ jobs:
override: true
- uses: Swatinem/rust-cache@v2
- run: rustup component add clippy
- run: cargo clippy -- -D warnings
- run: cargo clippy --all-targets -- -D warnings
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "promql-parser"
readme = "README.md"
description = "Parse PromQL query into AST"
repository = "https://github.com/GreptimeTeam/promql-parser"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
authors = ["The GreptimeDB Project Developers"]
keywords = ["prometheus", "promql", "parser"]
Expand Down
168 changes: 82 additions & 86 deletions src/label/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::hash::{Hash, Hasher};

use regex::Regex;

use crate::parser::token::{TokenId, T_EQL, T_EQL_REGEX, T_NEQ, T_NEQ_REGEX};
use crate::parser::token::{token_display, TokenId, T_EQL, T_EQL_REGEX, T_NEQ, T_NEQ_REGEX};
use crate::util::join_vector;

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -95,80 +95,19 @@ impl Matcher {
// in Go the following is valid: `aaa{bbb}ccc`
// in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
// This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
fn convert_re(re: &str) -> String {
// (true, string) if its a valid repeat pattern (e.g. 1,2 or 2,)
fn is_repeat(chars: &mut std::str::Chars<'_>) -> (bool, String) {
let mut buf = String::new();
let mut comma = false;
for c in chars.by_ref() {
buf.push(c);

if c == ',' {
// two commas or {, are both invalid
if comma || buf == "," {
return (false, buf);
} else {
comma = true;
}
} else if c.is_ascii_digit() {
continue;
} else if c == '}' {
if buf == "}" {
return (false, buf);
} else {
return (true, buf);
}
} else {
return (false, buf);
}
}
(false, buf)
}

let mut result = String::new();
let mut chars = re.chars();

while let Some(c) = chars.next() {
if c != '{' {
result.push(c);
}

// if escaping, just push the next char as well
if c == '\\' {
if let Some(c) = chars.next() {
result.push(c);
}
} else if c == '{' {
match is_repeat(&mut chars) {
(true, s) => {
result.push('{');
result.push_str(&s);
}
(false, s) => {
result.push_str(r"\{");
result.push_str(&s);
}
}
}
}
result
fn try_parse_re(re: &str) -> Result<Regex, String> {
Regex::new(re)
.or_else(|_| Regex::new(&try_escape_for_repeat_re(re)))
.map_err(|_| format!("illegal regex for {re}",))
}

pub fn new_matcher(id: TokenId, name: String, value: String) -> Result<Matcher, String> {
let op = match id {
T_EQL => Ok(MatchOp::Equal),
T_NEQ => Ok(MatchOp::NotEqual),
T_EQL_REGEX => {
let value = Matcher::convert_re(&value);
let re = Regex::new(&value).map_err(|_| format!("illegal regex for {}", &value))?;
Ok(MatchOp::Re(re))
}
T_NEQ_REGEX => {
let value = Matcher::convert_re(&value);
let re = Regex::new(&value).map_err(|_| format!("illegal regex for {}", &value))?;
Ok(MatchOp::NotRe(re))
}
_ => Err(format!("invalid match op {id}")),
T_EQL_REGEX => Ok(MatchOp::Re(Matcher::try_parse_re(&value)?)),
T_NEQ_REGEX => Ok(MatchOp::NotRe(Matcher::try_parse_re(&value)?)),
_ => Err(format!("invalid match op {}", token_display(id))),
};

op.map(|op| Matcher { op, name, value })
Expand All @@ -181,6 +120,64 @@ impl fmt::Display for Matcher {
}
}

// Go and Rust handle the repeat pattern differently
// in Go the following is valid: `aaa{bbb}ccc`
// in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
// This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
fn try_escape_for_repeat_re(re: &str) -> String {
fn is_repeat(chars: &mut std::str::Chars<'_>) -> (bool, String) {
let mut buf = String::new();
let mut comma_seen = false;
for c in chars.by_ref() {
buf.push(c);
match c {
',' if comma_seen => {
return (false, buf); // ,, is invalid
}
',' if buf == "," => {
return (false, buf); // {, is invalid
}
',' if !comma_seen => comma_seen = true,
'}' if buf == "}" => {
return (false, buf); // {} is invalid
}
'}' => {
return (true, buf);
}
_ if c.is_ascii_digit() => continue,
_ => {
return (false, buf); // false if visit non-digit char
}
}
}
(false, buf) // not ended with }
}

let mut result = String::with_capacity(re.len() + 1);
let mut chars = re.chars();

while let Some(c) = chars.next() {
match c {
'\\' => {
if let Some(cc) = chars.next() {
result.push(c);
result.push(cc);
}
}
'{' => {
let (is, s) = is_repeat(&mut chars);
if !is {
result.push('\\');
}
result.push(c);
result.push_str(&s);
}
_ => result.push(c),
}
}
result
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Matchers {
pub matchers: Vec<Matcher>,
Expand Down Expand Up @@ -260,7 +257,7 @@ mod tests {
fn test_new_matcher() {
assert_eq!(
Matcher::new_matcher(token::T_ADD, "".into(), "".into()),
Err(format!("invalid match op {}", token::T_ADD))
Err(format!("invalid match op {}", token_display(token::T_ADD)))
)
}

Expand Down Expand Up @@ -386,7 +383,7 @@ mod tests {
#[test]
fn test_matcher_re() {
let value = "api/v1/.*";
let re = Regex::new(&value).unwrap();
let re = Regex::new(value).unwrap();
let op = MatchOp::Re(re);
let matcher = Matcher::new(op, "name", value);
assert!(matcher.is_match("api/v1/query"));
Expand Down Expand Up @@ -533,20 +530,19 @@ mod tests {

#[test]
fn test_convert_re() {
let convert = |s: &str| Matcher::convert_re(s);
assert_eq!(convert("abc{}"), r#"abc\{}"#);
assert_eq!(convert("abc{def}"), r#"abc\{def}"#);
assert_eq!(convert("abc{def"), r#"abc\{def"#);
assert_eq!(convert("abc{1}"), "abc{1}");
assert_eq!(convert("abc{1,}"), "abc{1,}");
assert_eq!(convert("abc{1,2}"), "abc{1,2}");
assert_eq!(convert("abc{,2}"), r#"abc\{,2}"#);
assert_eq!(convert("abc{{1,2}}"), r#"abc\{{1,2}}"#);
assert_eq!(convert(r#"abc\{abc"#), r#"abc\{abc"#);
assert_eq!(convert("abc{1a}"), r#"abc\{1a}"#);
assert_eq!(convert("abc{1,a}"), r#"abc\{1,a}"#);
assert_eq!(convert("abc{1,2a}"), r#"abc\{1,2a}"#);
assert_eq!(convert("abc{1,2,3}"), r#"abc\{1,2,3}"#);
assert_eq!(convert("abc{1,,2}"), r#"abc\{1,,2}"#);
assert_eq!(try_escape_for_repeat_re("abc{}"), r#"abc\{}"#);
assert_eq!(try_escape_for_repeat_re("abc{def}"), r#"abc\{def}"#);
assert_eq!(try_escape_for_repeat_re("abc{def"), r#"abc\{def"#);
assert_eq!(try_escape_for_repeat_re("abc{1}"), "abc{1}");
assert_eq!(try_escape_for_repeat_re("abc{1,}"), "abc{1,}");
assert_eq!(try_escape_for_repeat_re("abc{1,2}"), "abc{1,2}");
assert_eq!(try_escape_for_repeat_re("abc{,2}"), r#"abc\{,2}"#);
assert_eq!(try_escape_for_repeat_re("abc{{1,2}}"), r#"abc\{{1,2}}"#);
assert_eq!(try_escape_for_repeat_re(r#"abc\{abc"#), r#"abc\{abc"#);
assert_eq!(try_escape_for_repeat_re("abc{1a}"), r#"abc\{1a}"#);
assert_eq!(try_escape_for_repeat_re("abc{1,a}"), r#"abc\{1,a}"#);
assert_eq!(try_escape_for_repeat_re("abc{1,2a}"), r#"abc\{1,2a}"#);
assert_eq!(try_escape_for_repeat_re("abc{1,2,3}"), r#"abc\{1,2,3}"#);
assert_eq!(try_escape_for_repeat_re("abc{1,,2}"), r#"abc\{1,,2}"#);
}
}
16 changes: 8 additions & 8 deletions src/parser/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1907,7 +1907,7 @@ task:errors:rate10s{job="s"}))"#,
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand Down Expand Up @@ -1978,7 +1978,7 @@ task:errors:rate10s{job="s"}))"#,
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand Down Expand Up @@ -2059,7 +2059,7 @@ task:errors:rate10s{job="s"}))"#,
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand Down Expand Up @@ -2202,7 +2202,7 @@ task:errors:rate10s{job="s"}))"#,
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand Down Expand Up @@ -2261,7 +2261,7 @@ task:errors:rate10s{job="s"}))"#,
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand Down Expand Up @@ -2403,7 +2403,7 @@ or
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand All @@ -2417,7 +2417,7 @@ or
];

for (input, expect) in cases {
let expr = crate::parser::parse(&input);
let expr = crate::parser::parse(input);
assert_eq!(expect, expr.unwrap().pretty(0, 10));
}
}
Expand All @@ -2437,7 +2437,7 @@ or
];

for (input, expect) in cases {
assert_eq!(expect, crate::parser::parse(&input).unwrap().prettify());
assert_eq!(expect, crate::parser::parse(input).unwrap().prettify());
}
}
}
17 changes: 9 additions & 8 deletions src/parser/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,25 +703,26 @@ mod tests {
/// - MatchTuple.2 is the Err info if the input is invalid PromQL query
type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);

type Case = (
&'static str,
Vec<Result<LexemeType, String>>,
Vec<Result<LexemeType, String>>,
);

fn assert_matches(v: Vec<MatchTuple>) {
let cases: Vec<(
&str,
Vec<Result<LexemeType, String>>,
Vec<Result<LexemeType, String>>,
)> = v
let cases: Vec<Case> = v
.into_iter()
.map(|(input, lexemes, err)| {
let mut expected: Vec<Result<LexemeType, String>> = lexemes
.into_iter()
.map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
.collect();

if err.is_some() {
expected.push(Err(err.unwrap().to_string()));
if let Some(s) = err {
expected.push(Err(s.to_string()));
}

let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
.into_iter()
// in lex test cases, we don't compare the EOF token
.filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
.collect();
Expand Down
10 changes: 5 additions & 5 deletions src/parser/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,15 +385,15 @@ mod tests {
)
.and_then(|ex| {
Expr::new_binary_expr(
Expr::from(ex),
ex,
token::T_LUNLESS,
Some(BinModifier::default().with_card(VectorMatchCardinality::ManyToMany)),
Expr::from(VectorSelector::from("baz")),
)
})
.and_then(|ex| {
Expr::new_binary_expr(
Expr::from(ex),
ex,
token::T_LOR,
Some(BinModifier::default().with_card(VectorMatchCardinality::ManyToMany)),
Expr::from(VectorSelector::from("qux")),
Expand Down Expand Up @@ -1925,7 +1925,7 @@ mod tests {
None,
Expr::new_vector_selector(Some(String::from("bar")), matchers).unwrap(),
)
.and_then(|ex| Expr::new_paren_expr(ex))
.and_then(Expr::new_paren_expr)
.and_then(|ex| Expr::new_subquery_expr(ex, duration::MINUTE_DURATION * 5, None))
}),
(r#"(foo + bar{nm="val"})[5m:] offset 10m"#, {
Expand All @@ -1936,7 +1936,7 @@ mod tests {
None,
Expr::new_vector_selector(Some(String::from("bar")), matchers).unwrap(),
)
.and_then(|ex| Expr::new_paren_expr(ex))
.and_then(Expr::new_paren_expr)
.and_then(|ex| Expr::new_subquery_expr(ex, duration::MINUTE_DURATION * 5, None))
.and_then(|ex| ex.offset_expr(Offset::Pos(duration::MINUTE_DURATION * 10)))
}),
Expand All @@ -1952,7 +1952,7 @@ mod tests {
None,
rhs,
)
.and_then(|ex| Expr::new_paren_expr(ex))
.and_then(Expr::new_paren_expr)
.and_then(|ex| Expr::new_subquery_expr(ex, duration::MINUTE_DURATION * 5, None))
.and_then(|ex| ex.at_expr(At::try_from(1603775019_f64).unwrap()))
}),
Expand Down
4 changes: 2 additions & 2 deletions src/util/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ mod tests {
assert_eq!(parse_str_radix("-017").unwrap(), -15_f64);
assert_eq!(parse_str_radix("+017").unwrap(), 15_f64);
assert_eq!(parse_str_radix("2023.0128").unwrap(), 2023.0128_f64);
assert_eq!(parse_str_radix("-3.14").unwrap(), -3.14_f64);
assert_eq!(parse_str_radix("+2.718").unwrap(), 2.718_f64);
assert_eq!(parse_str_radix("-4.14").unwrap(), -4.14_f64);
assert_eq!(parse_str_radix("+3.718").unwrap(), 3.718_f64);
assert_eq!(parse_str_radix("-0.14").unwrap(), -0.14_f64);
assert_eq!(parse_str_radix("+0.718").unwrap(), 0.718_f64);
assert_eq!(parse_str_radix("0.718").unwrap(), 0.718_f64);
Expand Down

0 comments on commit 8028359

Please sign in to comment.