From 0c3ad821bc71469884f44ad02dfeec0e609ba686 Mon Sep 17 00:00:00 2001 From: Ben Sully Date: Mon, 20 Jan 2025 16:58:08 +0000 Subject: [PATCH] perf: improve perf of encoding with new UTF-8 validation This makes four changes: 1. The `EscapingScheme` and `ValidationScheme` enums are now `Copy` since they are very small and cheap to copy. They're passed by value rather than by reference. 2. The `escape_name` function now returns a `Cow` rather than a `String` to avoid allocations in many cases. 3. `escape_name` also preallocates a buffer for the escaped name rather than starting with an empty `String` and growing it, to amortize the allocations. 4. Use `is_ascii_alphabetic` and `is_ascii_digit` to check for characters that are valid in metric and label names. Based on profiles I suspect that #2 has the highest impact but haven't split these out to see how much of a difference it makes. Signed-off-by: Ben Sully --- src/encoding.rs | 49 ++++++++++---------- src/encoding/text.rs | 105 +++++++++++++++++++++---------------------- src/registry.rs | 4 +- 3 files changed, 76 insertions(+), 82 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index e715cbf..4b9bdbd 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -770,7 +770,7 @@ impl ExemplarValueEncoder<'_> { /// Enum for determining how metric and label names will /// be validated. -#[derive(Debug, PartialEq, Default, Clone)] +#[derive(Debug, PartialEq, Default, Clone, Copy)] pub enum ValidationScheme { /// Setting that requires that metric and label names /// conform to the original OpenMetrics character requirements. @@ -807,9 +807,9 @@ fn is_valid_legacy_prefix(prefix: Option<&Prefix>) -> bool { fn is_quoted_metric_name( name: &str, prefix: Option<&Prefix>, - validation_scheme: &ValidationScheme, + validation_scheme: ValidationScheme, ) -> bool { - *validation_scheme == ValidationScheme::UTF8Validation + validation_scheme == ValidationScheme::UTF8Validation && (!is_valid_legacy_metric_name(name) || !is_valid_legacy_prefix(prefix)) } @@ -818,24 +818,20 @@ fn is_valid_legacy_label_name(label_name: &str) -> bool { return false; } for (i, b) in label_name.chars().enumerate() { - if !((b >= 'a' && b <= 'z') - || (b >= 'A' && b <= 'Z') - || b == '_' - || (b >= '0' && b <= '9' && i > 0)) - { + if !(b.is_ascii_alphabetic() || b == '_' || (b.is_ascii_digit() && i > 0)) { return false; } } true } -fn is_quoted_label_name(name: &str, validation_scheme: &ValidationScheme) -> bool { - *validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name) +fn is_quoted_label_name(name: &str, validation_scheme: ValidationScheme) -> bool { + validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name) } /// Enum for determining how metric and label names will /// be escaped. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, Copy)] pub enum EscapingScheme { /// Replaces all legacy-invalid characters with underscores. #[default] @@ -863,17 +859,19 @@ impl EscapingScheme { } } -fn escape_name(name: &str, scheme: &EscapingScheme) -> String { +fn escape_name(name: &str, scheme: EscapingScheme) -> Cow<'_, str> { if name.is_empty() { - return name.to_string(); + return name.into(); } - let mut escaped = String::new(); match scheme { - EscapingScheme::NoEscaping => return name.to_string(), + EscapingScheme::NoEscaping => name.into(), + EscapingScheme::UnderscoreEscaping | EscapingScheme::ValueEncodingEscaping + if is_valid_legacy_metric_name(name) => + { + name.into() + } EscapingScheme::UnderscoreEscaping => { - if is_valid_legacy_metric_name(name) { - return name.to_string(); - } + let mut escaped = String::with_capacity(name.len()); for (i, b) in name.chars().enumerate() { if is_valid_legacy_char(b, i) { escaped.push(b); @@ -881,8 +879,10 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String { escaped.push('_'); } } + escaped.into() } EscapingScheme::DotsEscaping => { + let mut escaped = String::with_capacity(name.len()); for (i, b) in name.chars().enumerate() { if b == '_' { escaped.push_str("__"); @@ -894,11 +894,10 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String { escaped.push_str("__"); } } + escaped.into() } EscapingScheme::ValueEncodingEscaping => { - if is_valid_legacy_metric_name(name) { - return name.to_string(); - } + let mut escaped = String::with_capacity(name.len()); escaped.push_str("U__"); for (i, b) in name.chars().enumerate() { if b == '_' { @@ -909,9 +908,9 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String { write!(escaped, "_{:x}_", b as i64).unwrap(); } } + escaped.into() } } - escaped } /// Returns the escaping scheme to use based on the given header. @@ -1120,13 +1119,13 @@ mod tests { ]; for scenario in scenarios { - let result = escape_name(scenario.input, &EscapingScheme::UnderscoreEscaping); + let result = escape_name(scenario.input, EscapingScheme::UnderscoreEscaping); assert_eq!(result, scenario.expected_underscores, "{}", scenario.name); - let result = escape_name(scenario.input, &EscapingScheme::DotsEscaping); + let result = escape_name(scenario.input, EscapingScheme::DotsEscaping); assert_eq!(result, scenario.expected_dots, "{}", scenario.name); - let result = escape_name(scenario.input, &EscapingScheme::ValueEncodingEscaping); + let result = escape_name(scenario.input, EscapingScheme::ValueEncodingEscaping); assert_eq!(result, scenario.expected_value, "{}", scenario.name); } } diff --git a/src/encoding/text.rs b/src/encoding/text.rs index ac6b2e5..63d139d 100644 --- a/src/encoding/text.rs +++ b/src/encoding/text.rs @@ -148,8 +148,8 @@ where registry.encode( &mut DescriptorEncoder::new( writer, - ®istry.name_validation_scheme(), - ®istry.escaping_scheme(), + registry.name_validation_scheme(), + registry.escaping_scheme(), ) .into(), ) @@ -196,8 +196,8 @@ pub(crate) struct DescriptorEncoder<'a> { writer: &'a mut dyn Write, prefix: Option<&'a Prefix>, labels: &'a [(Cow<'static, str>, Cow<'static, str>)], - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, } impl std::fmt::Debug for DescriptorEncoder<'_> { @@ -207,11 +207,11 @@ impl std::fmt::Debug for DescriptorEncoder<'_> { } impl DescriptorEncoder<'_> { - pub(crate) fn new<'a>( - writer: &'a mut dyn Write, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, - ) -> DescriptorEncoder<'a> { + pub(crate) fn new( + writer: &'_ mut dyn Write, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, + ) -> DescriptorEncoder<'_> { DescriptorEncoder { writer, prefix: Default::default(), @@ -230,8 +230,8 @@ impl DescriptorEncoder<'_> { prefix, labels, writer: self.writer, - name_validation_scheme: &self.name_validation_scheme, - escaping_scheme: &self.escaping_scheme, + name_validation_scheme: self.name_validation_scheme, + escaping_scheme: self.escaping_scheme, } } @@ -246,14 +246,12 @@ impl DescriptorEncoder<'_> { let mut escaped_prefix: Option<&Prefix> = None; let escaped_prefix_value: Prefix; if let Some(prefix) = self.prefix { - escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme)); + escaped_prefix_value = + Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned()); escaped_prefix = Some(&escaped_prefix_value); } - let is_quoted_metric_name = is_quoted_metric_name( - escaped_name.as_str(), - escaped_prefix, - self.name_validation_scheme, - ); + let is_quoted_metric_name = + is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme); self.writer.write_str("# HELP ")?; if is_quoted_metric_name { self.writer.write_str("\"")?; @@ -262,7 +260,7 @@ impl DescriptorEncoder<'_> { self.writer.write_str(prefix.as_str())?; self.writer.write_str("_")?; } - self.writer.write_str(escaped_name.as_str())?; + self.writer.write_str(&escaped_name)?; if let Some(unit) = unit { self.writer.write_str("_")?; self.writer.write_str(unit.as_str())?; @@ -282,7 +280,7 @@ impl DescriptorEncoder<'_> { self.writer.write_str(prefix.as_str())?; self.writer.write_str("_")?; } - self.writer.write_str(escaped_name.as_str())?; + self.writer.write_str(&escaped_name)?; if let Some(unit) = unit { self.writer.write_str("_")?; self.writer.write_str(unit.as_str())?; @@ -303,7 +301,7 @@ impl DescriptorEncoder<'_> { self.writer.write_str(prefix.as_str())?; self.writer.write_str("_")?; } - self.writer.write_str(escaped_name.as_str())?; + self.writer.write_str(&escaped_name)?; self.writer.write_str("_")?; self.writer.write_str(unit.as_str())?; if is_quoted_metric_name { @@ -343,8 +341,8 @@ pub(crate) struct MetricEncoder<'a> { unit: Option<&'a Unit>, const_labels: &'a [(Cow<'static, str>, Cow<'static, str>)], family_labels: Option<&'a dyn super::EncodeLabelSet>, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, } impl std::fmt::Debug for MetricEncoder<'_> { @@ -534,14 +532,12 @@ impl MetricEncoder<'_> { let mut escaped_prefix: Option<&Prefix> = None; let escaped_prefix_value: Prefix; if let Some(prefix) = self.prefix { - escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme)); + escaped_prefix_value = + Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned()); escaped_prefix = Some(&escaped_prefix_value); } - let is_quoted_metric_name = is_quoted_metric_name( - escaped_name.as_str(), - escaped_prefix, - self.name_validation_scheme, - ); + let is_quoted_metric_name = + is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme); if is_quoted_metric_name { self.writer.write_str("{")?; self.writer.write_str("\"")?; @@ -550,7 +546,7 @@ impl MetricEncoder<'_> { self.writer.write_str(prefix.as_str())?; self.writer.write_str("_")?; } - self.writer.write_str(escaped_name.as_str())?; + self.writer.write_str(&escaped_name)?; if let Some(unit) = self.unit { self.writer.write_str("_")?; self.writer.write_str(unit.as_str())?; @@ -576,14 +572,12 @@ impl MetricEncoder<'_> { let mut escaped_prefix: Option<&Prefix> = None; let escaped_prefix_value: Prefix; if let Some(prefix) = self.prefix { - escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme)); + escaped_prefix_value = + Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned()); escaped_prefix = Some(&escaped_prefix_value); } - let is_quoted_metric_name = is_quoted_metric_name( - escaped_name.as_str(), - escaped_prefix, - self.name_validation_scheme, - ); + let is_quoted_metric_name = + is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme); if self.const_labels.is_empty() && additional_labels.is_none() && self.family_labels.is_none() @@ -750,8 +744,8 @@ impl ExemplarValueEncoder<'_> { pub(crate) struct LabelSetEncoder<'a> { writer: &'a mut dyn Write, first: bool, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, } impl std::fmt::Debug for LabelSetEncoder<'_> { @@ -765,8 +759,8 @@ impl std::fmt::Debug for LabelSetEncoder<'_> { impl<'a> LabelSetEncoder<'a> { fn new( writer: &'a mut dyn Write, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, ) -> Self { Self { writer, @@ -791,8 +785,8 @@ impl<'a> LabelSetEncoder<'a> { pub(crate) struct LabelEncoder<'a> { writer: &'a mut dyn Write, first: bool, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, } impl std::fmt::Debug for LabelEncoder<'_> { @@ -818,8 +812,8 @@ impl LabelEncoder<'_> { pub(crate) struct LabelKeyEncoder<'a> { writer: &'a mut dyn Write, - name_validation_scheme: &'a ValidationScheme, - escaping_scheme: &'a EscapingScheme, + name_validation_scheme: ValidationScheme, + escaping_scheme: EscapingScheme, } impl std::fmt::Debug for LabelKeyEncoder<'_> { @@ -840,12 +834,11 @@ impl<'a> LabelKeyEncoder<'a> { impl std::fmt::Write for LabelKeyEncoder<'_> { fn write_str(&mut self, s: &str) -> std::fmt::Result { let escaped_name = escape_name(s, self.escaping_scheme); - let is_quoted_label_name = - is_quoted_label_name(escaped_name.as_str(), self.name_validation_scheme); + let is_quoted_label_name = is_quoted_label_name(&escaped_name, self.name_validation_scheme); if is_quoted_label_name { self.writer.write_str("\"")?; } - self.writer.write_str(escaped_name.as_str())?; + self.writer.write_str(&escaped_name)?; if is_quoted_label_name { self.writer.write_str("\"")?; } @@ -1043,8 +1036,7 @@ mod tests { encode(&mut encoded, ®istry).unwrap(); - let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n" - .to_owned() + let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n".to_owned() + "# TYPE \"my.gauge\"\" gauge\n" + "{\"my.gauge\"\"} inf\n" + "# EOF\n"; @@ -1646,8 +1638,8 @@ mod tests { unit: None, const_labels: &[], family_labels: None, - name_validation_scheme: &UTF8Validation, - escaping_scheme: &NoEscaping, + name_validation_scheme: UTF8Validation, + escaping_scheme: NoEscaping, }; encoder.encode_labels::(None).unwrap(); @@ -1667,8 +1659,8 @@ mod tests { unit: None, const_labels: &const_labels, family_labels: Some(&family_labels), - name_validation_scheme: &UTF8Validation, - escaping_scheme: &NoEscaping, + name_validation_scheme: UTF8Validation, + escaping_scheme: NoEscaping, }; encoder.encode_labels(Some(&additional_labels)).unwrap(); @@ -1688,12 +1680,15 @@ mod tests { unit: None, const_labels: &const_labels, family_labels: Some(&family_labels), - name_validation_scheme: &UTF8Validation, - escaping_scheme: &NoEscaping, + name_validation_scheme: UTF8Validation, + escaping_scheme: NoEscaping, }; encoder.encode_labels(Some(&additional_labels)).unwrap(); - assert_eq!(buffer, "{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}"); + assert_eq!( + buffer, + "{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}" + ); } fn parse_with_python_client(input: String) { diff --git a/src/registry.rs b/src/registry.rs index 2d08298..f5899f2 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -101,12 +101,12 @@ impl Registry { /// Returns the given Registry's name validation scheme. pub fn name_validation_scheme(&self) -> ValidationScheme { - self.name_validation_scheme.clone() + self.name_validation_scheme } /// Returns the given Registry's escaping scheme. pub fn escaping_scheme(&self) -> EscapingScheme { - self.escaping_scheme.clone() + self.escaping_scheme } /// Sets the escaping scheme for the [`RegistryBuilder`].