Skip to content

Commit

Permalink
Make regex dependency optional
Browse files Browse the repository at this point in the history
`regex` was used only in four trivial cases that could be implemented
more simply, either naively or using memchr, without losing performance.
As such the dependency needlessly increases build time, size of binary
and attack surface.

This change makes `regex` optional and defaults to `naive`/`memchr`
implementations. This *improves* performance a bit. The dependency
could've been removed entirely but was kept in case regression is
discovered on another platform and to make comparing the performance
easier. It can be removed in the future if the code is proven to be
reliable.
  • Loading branch information
Kixunil committed Jan 13, 2021
1 parent e1b197b commit 17d3c6f
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 23 deletions.
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ lazy_static = "^1.4"
libc = { version = "^0.2", optional = true }
parking_lot = "^0.11"
protobuf = { version = "^2.0", optional = true }
regex = "^1.3"
# DO NOT RELY ON THIS FEATURE TO STAY AVAILABLE!
# It doesn't change the API.
# Intended for testing/debugging only.
# It can affect the performance.
# Report any interesting findings, especially if the performance IMPROVES with `regex` turned ON.
regex = { version = "^1.3", optional = true }
memchr = "^2.3"
reqwest = { version = "^0.11", features = ["blocking"], optional = true }
thiserror = "^1.0"

Expand Down
65 changes: 52 additions & 13 deletions src/desc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,71 @@ use std::collections::{BTreeSet, HashMap};
use std::hash::Hasher;

use fnv::FnvHasher;
use regex::Regex;

use crate::errors::{Error, Result};
use crate::metrics::SEPARATOR_BYTE;
use crate::proto::LabelPair;

// Details of required format are at
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
fn is_valid_metric_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR: Regex =
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
#[cfg(not(feature = "regex"))]
mod validation {
fn matches_charset_without_colon(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
}

VALIDATOR.is_match(name)
fn matches_charset_with_colon(c: char) -> bool {
matches_charset_without_colon(c) || c == ':'
}

// check if there's at least one char
// the charset_validator returns true on zeroth char
// the charset_validator returns true on all remaining chars or they are digits if it returned
// false
// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator
fn is_valid_ident<F: FnMut(char) -> bool>(input: &str, mut charset_validator: F) -> bool {
let mut chars = input.chars();
let zeroth = chars.next();
zeroth
.and_then(|zeroth| if charset_validator(zeroth) { Some(chars.all(|c| charset_validator(c) || c.is_digit(10))) } else { None })
.unwrap_or(false)
}

// Details of required format are at
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
pub(super) fn is_valid_metric_name(name: &str) -> bool {
is_valid_ident(name, matches_charset_with_colon)
}

pub(super) fn is_valid_label_name(name: &str) -> bool {
is_valid_ident(name, matches_charset_without_colon)
}
}

fn is_valid_label_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR: Regex =
Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
#[cfg(feature = "regex")]
mod validation {
use regex::Regex;

pub(super) fn is_valid_metric_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR: Regex =
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
}

VALIDATOR.is_match(name)
}

VALIDATOR.is_match(name)
pub(super) fn is_valid_label_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR:
Regex = Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
}

VALIDATOR.is_match(name)

}
}

use validation::*;

/// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially
/// the immutable meta-data of a metric. The normal metric implementations
/// included in this package manage their [`Desc`] under the hood.
Expand Down
33 changes: 24 additions & 9 deletions src/encoder/text.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.

use regex::{Match, Regex};
use std::borrow::Cow;
use std::io::Write;

Expand Down Expand Up @@ -216,26 +215,42 @@ fn label_pairs_to_text(
Ok(())
}

/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
/// `include_double_quote` is true.
///
/// Implementation adapted from
/// https://lise-henry.github.io/articles/optimising_strings.html
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
#[cfg(feature = "regex")]
fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
use regex::{Match, Regex};

// Regex compilation is expensive. Use `lazy_static` to compile the regexes
// once per process lifetime and not once per function invocation.
lazy_static! {
static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid.");
static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid.");
}

let first_occurence = if include_double_quote {
if include_double_quote {
QUOTED_ESCAPER.find(v)
} else {
ESCAPER.find(v)
}
.as_ref()
.map(Match::start);
.map(Match::start)
}

#[cfg(not(feature = "regex"))]
fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
if include_double_quote {
memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes())
} else {
memchr::memchr2(b'\\', b'\n', v.as_bytes())
}
}

/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
/// `include_double_quote` is true.
///
/// Implementation adapted from
/// https://lise-henry.github.io/articles/optimising_strings.html
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
let first_occurence = find_first_occurence(v, include_double_quote);

if let Some(first) = first_occurence {
let mut escaped = String::with_capacity(v.len() * 2);
Expand Down

0 comments on commit 17d3c6f

Please sign in to comment.