From b87a827fdf0d2e4d53ee37579852d1d880de3d8f Mon Sep 17 00:00:00 2001 From: Zhongyang Wu Date: Sun, 23 Jul 2023 23:11:20 -0700 Subject: [PATCH] fix: unit conversion in prom exporter (#1157) * fix: unit conversion in prom exporter * re-generate && make fmt * add change log * fix: proto changes * use split_once --- .gitmodules | 2 +- opentelemetry-prometheus/CHANGELOG.md | 1 + opentelemetry-prometheus/src/lib.rs | 109 ++------------ opentelemetry-prometheus/src/utils.rs | 196 ++++++++++++++++++++++++++ 4 files changed, 210 insertions(+), 98 deletions(-) create mode 100644 opentelemetry-prometheus/src/utils.rs diff --git a/.gitmodules b/.gitmodules index f393014010..bd4ed5eb06 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "opentelemetry-proto/src/proto/opentelemetry-proto"] path = opentelemetry-proto/src/proto/opentelemetry-proto url = https://github.com/open-telemetry/opentelemetry-proto - branch = tags/v0.19.0 + branch = tags/v1.0.0 diff --git a/opentelemetry-prometheus/CHANGELOG.md b/opentelemetry-prometheus/CHANGELOG.md index a76ee7206b..1d00891d37 100644 --- a/opentelemetry-prometheus/CHANGELOG.md +++ b/opentelemetry-prometheus/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added - Add `with_namespace` option to exporter config. +- Add more units conversions between OTEL metrics and prometheus metrics [#1157](https://github.com/open-telemetry/opentelemetry-rust/pull/1157). - Add `without_counter_suffixes` option to exporter config. ## v0.12.0 diff --git a/opentelemetry-prometheus/src/lib.rs b/opentelemetry-prometheus/src/lib.rs index 0a0c650096..4291c9dd6f 100644 --- a/opentelemetry-prometheus/src/lib.rs +++ b/opentelemetry-prometheus/src/lib.rs @@ -96,7 +96,7 @@ use once_cell::sync::{Lazy, OnceCell}; use opentelemetry_api::{ global, - metrics::{MetricsError, Result, Unit}, + metrics::{MetricsError, Result}, Context, Key, Value, }; use opentelemetry_sdk::{ @@ -132,6 +132,7 @@ const SCOPE_INFO_KEYS: [&str; 2] = ["otel_scope_name", "otel_scope_version"]; const COUNTER_SUFFIX: &str = "_total"; mod config; +mod utils; pub use config::ExporterBuilder; @@ -259,14 +260,16 @@ impl Collector { } fn get_name(&self, m: &data::Metric) -> Cow<'static, str> { - let name = sanitize_name(&m.name); - match ( - &self.namespace, - get_unit_suffixes(&m.unit).filter(|_| !self.without_units), - ) { - (Some(namespace), Some(suffix)) => Cow::Owned(format!("{namespace}{name}{suffix}")), + let name = utils::sanitize_name(&m.name); + let unit_suffixes = if self.without_units { + None + } else { + utils::get_unit_suffixes(&m.unit) + }; + match (&self.namespace, unit_suffixes) { + (Some(namespace), Some(suffix)) => Cow::Owned(format!("{namespace}{name}_{suffix}")), (Some(namespace), None) => Cow::Owned(format!("{namespace}{name}")), - (None, Some(suffix)) => Cow::Owned(format!("{name}{suffix}")), + (None, Some(suffix)) => Cow::Owned(format!("{name}_{suffix}")), (None, None) => name, } } @@ -378,7 +381,7 @@ impl prometheus::core::Collector for Collector { fn get_attrs(kvs: &mut dyn Iterator, extra: &[LabelPair]) -> Vec { let mut keys_map = BTreeMap::>::new(); for (key, value) in kvs { - let key = sanitize_prom_kv(key.as_str()); + let key = utils::sanitize_prom_kv(key.as_str()); keys_map .entry(key) .and_modify(|v| v.push(value.to_string())) @@ -587,63 +590,6 @@ fn create_scope_info_metric(scope: &Scope) -> MetricFamily { mf } -fn get_unit_suffixes(unit: &Unit) -> Option<&'static str> { - match unit.as_str() { - "1" => Some("_ratio"), - "By" => Some("_bytes"), - "ms" => Some("_milliseconds"), - _ => None, - } -} - -#[allow(clippy::ptr_arg)] -fn sanitize_name(s: &Cow<'static, str>) -> Cow<'static, str> { - // prefix chars to add in case name starts with number - let mut prefix = ""; - - // Find first invalid char - if let Some((replace_idx, _)) = s.char_indices().find(|(i, c)| { - if *i == 0 && c.is_ascii_digit() { - // first char is number, add prefix and replace reset of chars - prefix = "_"; - true - } else { - // keep checking - !c.is_alphanumeric() && *c != '_' && *c != ':' - } - }) { - // up to `replace_idx` have been validated, convert the rest - let (valid, rest) = s.split_at(replace_idx); - Cow::Owned( - prefix - .chars() - .chain(valid.chars()) - .chain(rest.chars().map(|c| { - if c.is_ascii_alphanumeric() || c == '_' || c == ':' { - c - } else { - '_' - } - })) - .collect(), - ) - } else { - s.clone() // no invalid chars found, return existing - } -} - -fn sanitize_prom_kv(s: &str) -> String { - s.chars() - .map(|c| { - if c.is_ascii_alphanumeric() || c == ':' { - c - } else { - '_' - } - }) - .collect() -} - trait Numeric: fmt::Debug { // lossy at large values for u64 and i64 but prometheus only handles floats fn as_f64(&self) -> f64; @@ -666,34 +612,3 @@ impl Numeric for f64 { *self } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn name_sanitization() { - let tests = vec![ - ("nam€_with_3_width_rune", "nam__with_3_width_rune"), - ("`", "_"), - ( - r##"! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~"##, - "________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____", - ), - - ("Avalid_23name", "Avalid_23name"), - ("_Avalid_23name", "_Avalid_23name"), - ("1valid_23name", "_1valid_23name"), - ("avalid_23name", "avalid_23name"), - ("Ava:lid_23name", "Ava:lid_23name"), - ("a lid_23name", "a_lid_23name"), - (":leading_colon", ":leading_colon"), - ("colon:in:the:middle", "colon:in:the:middle"), - ("", ""), - ]; - - for (input, want) in tests { - assert_eq!(want, sanitize_name(&input.into()), "input: {input}") - } - } -} diff --git a/opentelemetry-prometheus/src/utils.rs b/opentelemetry-prometheus/src/utils.rs new file mode 100644 index 0000000000..fd30cebd01 --- /dev/null +++ b/opentelemetry-prometheus/src/utils.rs @@ -0,0 +1,196 @@ +use opentelemetry_api::metrics::Unit; +use std::borrow::Cow; + +const NON_APPLICABLE_ON_PER_UNIT: [&str; 8] = ["1", "d", "h", "min", "s", "ms", "us", "ns"]; + +pub(crate) fn get_unit_suffixes(unit: &Unit) -> Option> { + // no unit return early + if unit.as_str().is_empty() { + return None; + } + + // direct match with known units + if let Some(matched) = get_prom_units(unit.as_str()) { + return Some(Cow::Borrowed(matched)); + } + + // converting foo/bar to foo_per_bar + // split the string by the first '/' + // if the first part is empty, we just return the second part if it's a match with known per unit + // e.g + // "test/y" => "per_year" + // "km/s" => "kilometers_per_second" + if let Some((first, second)) = unit.as_str().split_once('/') { + return match ( + NON_APPLICABLE_ON_PER_UNIT.contains(&first), + get_prom_units(first), + get_prom_per_unit(second), + ) { + (true, _, Some(second_part)) | (false, None, Some(second_part)) => { + Some(Cow::Owned(format!("per_{}", second_part))) + } + (false, Some(first_part), Some(second_part)) => { + Some(Cow::Owned(format!("{}_per_{}", first_part, second_part))) + } + _ => None, + }; + } + + None +} + +fn get_prom_units(unit: &str) -> Option<&'static str> { + match unit { + // Time + "d" => Some("days"), + "h" => Some("hours"), + "min" => Some("minutes"), + "s" => Some("seconds"), + "ms" => Some("milliseconds"), + "us" => Some("microseconds"), + "ns" => Some("nanoseconds"), + "By" => Some("bytes"), + + // Bytes + "KiBy" => Some("kibibytes"), + "MiBy" => Some("mebibytes"), + "GiBy" => Some("gibibytes"), + "TiBy" => Some("tibibytes"), + "KBy" => Some("kilobytes"), + "MBy" => Some("megabytes"), + "GBy" => Some("gigabytes"), + "TBy" => Some("terabytes"), + "B" => Some("bytes"), + "KB" => Some("kilobytes"), + "MB" => Some("megabytes"), + "GB" => Some("gigabytes"), + "TB" => Some("terabytes"), + + // SI + "m" => Some("meters"), + "V" => Some("volts"), + "A" => Some("amperes"), + "J" => Some("joules"), + "W" => Some("watts"), + "g" => Some("grams"), + + // Misc + "Cel" => Some("celsius"), + "Hz" => Some("hertz"), + "1" => Some("ratio"), + "%" => Some("percent"), + "$" => Some("dollars"), + _ => None, + } +} + +fn get_prom_per_unit(unit: &str) -> Option<&'static str> { + match unit { + "s" => Some("second"), + "m" => Some("minute"), + "h" => Some("hour"), + "d" => Some("day"), + "w" => Some("week"), + "mo" => Some("month"), + "y" => Some("year"), + _ => None, + } +} + +#[allow(clippy::ptr_arg)] +pub(crate) fn sanitize_name(s: &Cow<'static, str>) -> Cow<'static, str> { + // prefix chars to add in case name starts with number + let mut prefix = ""; + + // Find first invalid char + if let Some((replace_idx, _)) = s.char_indices().find(|(i, c)| { + if *i == 0 && c.is_ascii_digit() { + // first char is number, add prefix and replace reset of chars + prefix = "_"; + true + } else { + // keep checking + !c.is_alphanumeric() && *c != '_' && *c != ':' + } + }) { + // up to `replace_idx` have been validated, convert the rest + let (valid, rest) = s.split_at(replace_idx); + Cow::Owned( + prefix + .chars() + .chain(valid.chars()) + .chain(rest.chars().map(|c| { + if c.is_ascii_alphanumeric() || c == '_' || c == ':' { + c + } else { + '_' + } + })) + .collect(), + ) + } else { + s.clone() // no invalid chars found, return existing + } +} + +pub(crate) fn sanitize_prom_kv(s: &str) -> String { + s.chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == ':' { + c + } else { + '_' + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn name_sanitization() { + let tests = vec![ + ("nam€_with_3_width_rune", "nam__with_3_width_rune"), + ("`", "_"), + ( + r##"! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~"##, + "________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____", + ), + + ("Avalid_23name", "Avalid_23name"), + ("_Avalid_23name", "_Avalid_23name"), + ("1valid_23name", "_1valid_23name"), + ("avalid_23name", "avalid_23name"), + ("Ava:lid_23name", "Ava:lid_23name"), + ("a lid_23name", "a_lid_23name"), + (":leading_colon", ":leading_colon"), + ("colon:in:the:middle", "colon:in:the:middle"), + ("", ""), + ]; + + for (input, want) in tests { + assert_eq!(want, sanitize_name(&input.into()), "input: {input}") + } + } + + #[test] + fn test_get_unit_suffixes() { + let test_cases = vec![ + // Direct match + ("g", Some(Cow::Borrowed("grams"))), + // Per unit + ("test/y", Some(Cow::Owned("per_year".to_owned()))), + ("1/y", Some(Cow::Owned("per_year".to_owned()))), + ("m/s", Some(Cow::Owned("meters_per_second".to_owned()))), + // No match + ("invalid", None), + ("", None), + ]; + for (unit_str, expected_suffix) in test_cases { + let unit = Unit::new(unit_str); + assert_eq!(get_unit_suffixes(&unit), expected_suffix); + } + } +}