Skip to content

Commit

Permalink
Add support for sourced metrics. (paritytech#6895)
Browse files Browse the repository at this point in the history
* Add support for sourced metrics.

A sourced metric is a metric that obtains its values
from an existing source, rather than the values being
independently recorded. It thus allows collecting
metrics from existing counters or gauges without
having to duplicate them in a dedicated prometheus
counter or gauge (and hence another atomic value).

The first use-case is to feed the bandwidth counters
from libp2p directly into prometheus.

* Tabs, not spaces.

* Tweak bandwidth counter registration.

* Add debug assertion for variable labels and values.

* Document monotonicity requirement for sourced counters.

* CI

* Update client/network/src/service.rs

Co-authored-by: Max Inden <[email protected]>

Co-authored-by: Max Inden <[email protected]>
  • Loading branch information
romanb and mxinden authored Aug 18, 2020
1 parent 8e1ed7d commit f8c83bd
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 18 deletions.
59 changes: 41 additions & 18 deletions client/network/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ use parking_lot::Mutex;
use prometheus_endpoint::{
register, Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, Opts,
PrometheusError, Registry, U64,
SourcedCounter, MetricSource
};
use sc_peerset::PeersetHandle;
use sp_consensus::import_queue::{BlockImportError, BlockImportResult, ImportQueue, Link};
Expand Down Expand Up @@ -240,12 +241,6 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
local_peer_id_legacy
);

// Initialize the metrics.
let metrics = match &params.metrics_registry {
Some(registry) => Some(Metrics::register(&registry)?),
None => None
};

let checker = params.on_demand.as_ref()
.map(|od| od.checker().clone())
.unwrap_or_else(|| Arc::new(AlwaysBadChecker));
Expand Down Expand Up @@ -353,6 +348,17 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
(builder.build(), bandwidth)
};

// Initialize the metrics.
let metrics = match &params.metrics_registry {
Some(registry) => {
// Sourced metrics.
BandwidthCounters::register(registry, bandwidth.clone())?;
// Other (i.e. new) metrics.
Some(Metrics::register(registry)?)
}
None => None
};

// Listen on multiaddresses.
for addr in &params.network_config.listen_addresses {
if let Err(err) = Swarm::<B, H>::listen_on(&mut swarm, addr.clone()) {
Expand Down Expand Up @@ -1152,9 +1158,6 @@ struct Metrics {
kbuckets_num_nodes: GaugeVec<U64>,
listeners_local_addresses: Gauge<U64>,
listeners_errors_total: Counter<U64>,
// Note: `network_bytes_total` is a monotonic gauge obtained by
// sampling an existing counter.
network_bytes_total: GaugeVec<U64>,
notifications_sizes: HistogramVec,
notifications_streams_closed_total: CounterVec<U64>,
notifications_streams_opened_total: CounterVec<U64>,
Expand All @@ -1168,6 +1171,35 @@ struct Metrics {
requests_out_started_total: CounterVec<U64>,
}

/// The source for bandwidth metrics.
#[derive(Clone)]
struct BandwidthCounters(Arc<transport::BandwidthSinks>);

impl BandwidthCounters {
fn register(registry: &Registry, sinks: Arc<transport::BandwidthSinks>)
-> Result<(), PrometheusError>
{
register(SourcedCounter::new(
&Opts::new(
"sub_libp2p_network_bytes_total",
"Total bandwidth usage"
).variable_label("direction"),
BandwidthCounters(sinks),
)?, registry)?;

Ok(())
}
}

impl MetricSource for BandwidthCounters {
type N = u64;

fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
set(&[&"in"], self.0.total_inbound());
set(&[&"out"], self.0.total_outbound());
}
}

impl Metrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
Expand Down Expand Up @@ -1271,13 +1303,6 @@ impl Metrics {
"sub_libp2p_listeners_errors_total",
"Total number of non-fatal errors reported by a listener"
)?, registry)?,
network_bytes_total: register(GaugeVec::new(
Opts::new(
"sub_libp2p_network_bytes_total",
"Total bandwidth usage"
),
&["direction"]
)?, registry)?,
notifications_sizes: register(HistogramVec::new(
HistogramOpts {
common_opts: Opts::new(
Expand Down Expand Up @@ -1725,8 +1750,6 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
this.is_major_syncing.store(is_major_syncing, Ordering::Relaxed);

if let Some(metrics) = this.metrics.as_ref() {
metrics.network_bytes_total.with_label_values(&["in"]).set(this.service.bandwidth.total_inbound());
metrics.network_bytes_total.with_label_values(&["out"]).set(this.service.bandwidth.total_outbound());
metrics.is_major_syncing.set(is_major_syncing as u64);
for (proto, num_entries) in this.network_service.num_kbuckets_entries() {
let proto = maybe_utf8_bytes_to_string(proto.as_bytes());
Expand Down
3 changes: 3 additions & 0 deletions utils/prometheus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ use std::net::SocketAddr;

#[cfg(not(target_os = "unknown"))]
mod networking;
mod sourced;

pub use sourced::{SourcedCounter, SourcedGauge, MetricSource};

#[cfg(target_os = "unknown")]
pub use unknown_os::init_prometheus;
Expand Down
143 changes: 143 additions & 0 deletions utils/prometheus/src/sourced.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.

// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.

//! Metrics that are collected from existing sources.
use prometheus::core::{Collector, Desc, Describer, Number, Opts};
use prometheus::proto;
use std::{cmp::Ordering, marker::PhantomData};

/// A counter whose values are obtained from an existing source.
///
/// > **Note*: The counter values provided by the source `S`
/// > must be monotonically increasing. Otherwise use a
/// > [`SourcedGauge`] instead.
pub type SourcedCounter<S> = SourcedMetric<Counter, S>;

/// A gauge whose values are obtained from an existing source.
pub type SourcedGauge<S> = SourcedMetric<Gauge, S>;

/// The type of a sourced counter.
#[derive(Copy, Clone)]
pub enum Counter {}

/// The type of a sourced gauge.
#[derive(Copy, Clone)]
pub enum Gauge {}

/// A metric whose values are obtained from an existing source,
/// instead of being independently recorded.
#[derive(Debug, Clone)]
pub struct SourcedMetric<T, S> {
source: S,
desc: Desc,
_type: PhantomData<T>,
}

/// A source of values for a [`SourcedMetric`].
pub trait MetricSource: Sync + Send + Clone {
/// The type of the collected values.
type N: Number;
/// Collects the current values of the metrics from the source.
fn collect(&self, set: impl FnMut(&[&str], Self::N));
}

impl<T: SourcedType, S: MetricSource> SourcedMetric<T, S> {
/// Creates a new metric that obtains its values from the given source.
pub fn new(opts: &Opts, source: S) -> prometheus::Result<Self> {
let desc = opts.describe()?;
Ok(Self { source, desc, _type: PhantomData })
}
}

impl<T: SourcedType, S: MetricSource> Collector for SourcedMetric<T, S> {
fn desc(&self) -> Vec<&Desc> {
vec![&self.desc]
}

fn collect(&self) -> Vec<proto::MetricFamily> {
let mut counters = Vec::new();

self.source.collect(|label_values, value| {
let mut m = proto::Metric::default();

match T::proto() {
proto::MetricType::COUNTER => {
let mut c = proto::Counter::default();
c.set_value(value.into_f64());
m.set_counter(c);
}
proto::MetricType::GAUGE => {
let mut g = proto::Gauge::default();
g.set_value(value.into_f64());
m.set_gauge(g);
}
t => {
log::error!("Unsupported sourced metric type: {:?}", t);
}
}

debug_assert_eq!(self.desc.variable_labels.len(), label_values.len());
match self.desc.variable_labels.len().cmp(&label_values.len()) {
Ordering::Greater =>
log::warn!("Missing label values for sourced metric {}", self.desc.fq_name),
Ordering::Less =>
log::warn!("Too many label values for sourced metric {}", self.desc.fq_name),
Ordering::Equal => {}
}

m.set_label(self.desc.variable_labels.iter().zip(label_values)
.map(|(l_name, l_value)| {
let mut l = proto::LabelPair::default();
l.set_name(l_name.to_string());
l.set_value(l_value.to_string());
l
})
.chain(self.desc.const_label_pairs.iter().cloned())
.collect::<Vec<_>>());

counters.push(m);
});

let mut m = proto::MetricFamily::default();
m.set_name(self.desc.fq_name.clone());
m.set_help(self.desc.help.clone());
m.set_field_type(T::proto());
m.set_metric(counters);

vec![m]
}
}

/// Types of metrics that can obtain their values from an existing source.
pub trait SourcedType: private::Sealed + Sync + Send {
#[doc(hidden)]
fn proto() -> proto::MetricType;
}

impl SourcedType for Counter {
fn proto() -> proto::MetricType { proto::MetricType::COUNTER }
}

impl SourcedType for Gauge {
fn proto() -> proto::MetricType { proto::MetricType::GAUGE }
}

mod private {
pub trait Sealed {}
impl Sealed for super::Counter {}
impl Sealed for super::Gauge {}
}

0 comments on commit f8c83bd

Please sign in to comment.