Skip to content

Commit

Permalink
metrics: add cpu counters (#26796)
Browse files Browse the repository at this point in the history
This PR adds counter metrics for the CPU system and the Geth process.
Currently the only metrics available for these items are gauges. Gauges are
fine when the consumer scrapes metrics data at the same interval as Geth
produces new values (every 3 seconds), but it is likely that most consumers
will not scrape that often. Intervals of 10, 15, or maybe even 30 seconds
are probably more common.

So the problem is, how does the consumer estimate what the CPU was doing in
between scrapes. With a counter, it's easy ... you just subtract two
successive values and divide by the time to get a nice, accurate average.
But with a gauge, you can't do that. A gauge reading is an instantaneous
picture of what was happening at that moment, but it gives you no idea
about what was going on between scrapes. Taking an average of values is
meaningless.
  • Loading branch information
turboboost55 authored Mar 23, 2023
1 parent 8990c92 commit 7dc1007
Show file tree
Hide file tree
Showing 17 changed files with 312 additions and 13 deletions.
153 changes: 153 additions & 0 deletions metrics/counter_float64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package metrics

import (
"sync"
)

// CounterFloat64 holds a float64 value that can be incremented and decremented.
type CounterFloat64 interface {
Clear()
Count() float64
Dec(float64)
Inc(float64)
Snapshot() CounterFloat64
}

// GetOrRegisterCounterFloat64 returns an existing CounterFloat64 or constructs and registers
// a new StandardCounterFloat64.
func GetOrRegisterCounterFloat64(name string, r Registry) CounterFloat64 {
if nil == r {
r = DefaultRegistry
}
return r.GetOrRegister(name, NewCounterFloat64).(CounterFloat64)
}

// GetOrRegisterCounterFloat64Forced returns an existing CounterFloat64 or constructs and registers a
// new CounterFloat64 no matter the global switch is enabled or not.
// Be sure to unregister the counter from the registry once it is of no use to
// allow for garbage collection.
func GetOrRegisterCounterFloat64Forced(name string, r Registry) CounterFloat64 {
if nil == r {
r = DefaultRegistry
}
return r.GetOrRegister(name, NewCounterFloat64Forced).(CounterFloat64)
}

// NewCounterFloat64 constructs a new StandardCounterFloat64.
func NewCounterFloat64() CounterFloat64 {
if !Enabled {
return NilCounterFloat64{}
}
return &StandardCounterFloat64{count: 0.0}
}

// NewCounterFloat64Forced constructs a new StandardCounterFloat64 and returns it no matter if
// the global switch is enabled or not.
func NewCounterFloat64Forced() CounterFloat64 {
return &StandardCounterFloat64{count: 0.0}
}

// NewRegisteredCounterFloat64 constructs and registers a new StandardCounterFloat64.
func NewRegisteredCounterFloat64(name string, r Registry) CounterFloat64 {
c := NewCounterFloat64()
if nil == r {
r = DefaultRegistry
}
r.Register(name, c)
return c
}

// NewRegisteredCounterFloat64Forced constructs and registers a new StandardCounterFloat64
// and launches a goroutine no matter the global switch is enabled or not.
// Be sure to unregister the counter from the registry once it is of no use to
// allow for garbage collection.
func NewRegisteredCounterFloat64Forced(name string, r Registry) CounterFloat64 {
c := NewCounterFloat64Forced()
if nil == r {
r = DefaultRegistry
}
r.Register(name, c)
return c
}

// CounterFloat64Snapshot is a read-only copy of another CounterFloat64.
type CounterFloat64Snapshot float64

// Clear panics.
func (CounterFloat64Snapshot) Clear() {
panic("Clear called on a CounterFloat64Snapshot")
}

// Count returns the value at the time the snapshot was taken.
func (c CounterFloat64Snapshot) Count() float64 { return float64(c) }

// Dec panics.
func (CounterFloat64Snapshot) Dec(float64) {
panic("Dec called on a CounterFloat64Snapshot")
}

// Inc panics.
func (CounterFloat64Snapshot) Inc(float64) {
panic("Inc called on a CounterFloat64Snapshot")
}

// Snapshot returns the snapshot.
func (c CounterFloat64Snapshot) Snapshot() CounterFloat64 { return c }

// NilCounterFloat64 is a no-op CounterFloat64.
type NilCounterFloat64 struct{}

// Clear is a no-op.
func (NilCounterFloat64) Clear() {}

// Count is a no-op.
func (NilCounterFloat64) Count() float64 { return 0.0 }

// Dec is a no-op.
func (NilCounterFloat64) Dec(i float64) {}

// Inc is a no-op.
func (NilCounterFloat64) Inc(i float64) {}

// Snapshot is a no-op.
func (NilCounterFloat64) Snapshot() CounterFloat64 { return NilCounterFloat64{} }

// StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the
// sync.Mutex package to manage a single float64 value.
type StandardCounterFloat64 struct {
mutex sync.Mutex
count float64
}

// Clear sets the counter to zero.
func (c *StandardCounterFloat64) Clear() {
c.mutex.Lock()
defer c.mutex.Unlock()
c.count = 0.0
}

// Count returns the current value.
func (c *StandardCounterFloat64) Count() float64 {
c.mutex.Lock()
defer c.mutex.Unlock()
return c.count
}

// Dec decrements the counter by the given amount.
func (c *StandardCounterFloat64) Dec(v float64) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.count -= v
}

// Inc increments the counter by the given amount.
func (c *StandardCounterFloat64) Inc(v float64) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.count += v
}

// Snapshot returns a read-only copy of the counter.
func (c *StandardCounterFloat64) Snapshot() CounterFloat64 {
return CounterFloat64Snapshot(c.Count())
}
77 changes: 77 additions & 0 deletions metrics/counter_float_64_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package metrics

import "testing"

func BenchmarkCounterFloat64(b *testing.B) {
c := NewCounterFloat64()
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.Inc(1.0)
}
}

func TestCounterFloat64Clear(t *testing.T) {
c := NewCounterFloat64()
c.Inc(1.0)
c.Clear()
if count := c.Count(); count != 0 {
t.Errorf("c.Count(): 0 != %v\n", count)
}
}

func TestCounterFloat64Dec1(t *testing.T) {
c := NewCounterFloat64()
c.Dec(1.0)
if count := c.Count(); count != -1.0 {
t.Errorf("c.Count(): -1.0 != %v\n", count)
}
}

func TestCounterFloat64Dec2(t *testing.T) {
c := NewCounterFloat64()
c.Dec(2.0)
if count := c.Count(); count != -2.0 {
t.Errorf("c.Count(): -2.0 != %v\n", count)
}
}

func TestCounterFloat64Inc1(t *testing.T) {
c := NewCounterFloat64()
c.Inc(1.0)
if count := c.Count(); count != 1.0 {
t.Errorf("c.Count(): 1.0 != %v\n", count)
}
}

func TestCounterFloat64Inc2(t *testing.T) {
c := NewCounterFloat64()
c.Inc(2.0)
if count := c.Count(); count != 2.0 {
t.Errorf("c.Count(): 2.0 != %v\n", count)
}
}

func TestCounterFloat64Snapshot(t *testing.T) {
c := NewCounterFloat64()
c.Inc(1.0)
snapshot := c.Snapshot()
c.Inc(1.0)
if count := snapshot.Count(); count != 1.0 {
t.Errorf("c.Count(): 1.0 != %v\n", count)
}
}

func TestCounterFloat64Zero(t *testing.T) {
c := NewCounterFloat64()
if count := c.Count(); count != 0 {
t.Errorf("c.Count(): 0 != %v\n", count)
}
}

func TestGetOrRegisterCounterFloat64(t *testing.T) {
r := NewRegistry()
NewRegisteredCounterFloat64("foo", r).Inc(47.0)
if c := GetOrRegisterCounterFloat64("foo", r); c.Count() != 47.0 {
t.Fatal(c)
}
}
7 changes: 7 additions & 0 deletions metrics/exp/exp.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ func (exp *exp) publishCounter(name string, metric metrics.Counter) {
v.Set(metric.Count())
}

func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64) {
v := exp.getFloat(name)
v.Set(metric.Count())
}

func (exp *exp) publishGauge(name string, metric metrics.Gauge) {
v := exp.getInt(name)
v.Set(metric.Value())
Expand Down Expand Up @@ -167,6 +172,8 @@ func (exp *exp) syncToExpvar() {
switch i := i.(type) {
case metrics.Counter:
exp.publishCounter(name, i)
case metrics.CounterFloat64:
exp.publishCounterFloat64(name, i)
case metrics.Gauge:
exp.publishGauge(name, i)
case metrics.GaugeFloat64:
Expand Down
2 changes: 2 additions & 0 deletions metrics/graphite.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ func graphite(c *GraphiteConfig) error {
switch metric := i.(type) {
case Counter:
fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now)
case CounterFloat64:
fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Count(), now)
case Gauge:
fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now)
case GaugeFloat64:
Expand Down
10 changes: 10 additions & 0 deletions metrics/influxdb/influxdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ func (r *reporter) send() error {
},
Time: now,
})
case metrics.CounterFloat64:
count := metric.Count()
pts = append(pts, client.Point{
Measurement: fmt.Sprintf("%s%s.count", namespace, name),
Tags: r.tags,
Fields: map[string]interface{}{
"value": count,
},
Time: now,
})
case metrics.Gauge:
ms := metric.Snapshot()
pts = append(pts, client.Point{
Expand Down
17 changes: 11 additions & 6 deletions metrics/influxdb/influxdbv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ type v2Reporter struct {

client influxdb2.Client
write api.WriteAPI

cache map[string]int64
}

// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags
Expand All @@ -39,7 +37,6 @@ func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, to
organization: organization,
namespace: namespace,
tags: tags,
cache: make(map[string]int64),
}

rep.client = influxdb2.NewClient(rep.endpoint, rep.token)
Expand Down Expand Up @@ -86,17 +83,25 @@ func (r *v2Reporter) send() {
switch metric := i.(type) {
case metrics.Counter:
v := metric.Count()
l := r.cache[name]

measurement := fmt.Sprintf("%s%s.count", namespace, name)
fields := map[string]interface{}{
"value": v - l,
"value": v,
}

pt := influxdb2.NewPoint(measurement, r.tags, fields, now)
r.write.WritePoint(pt)

r.cache[name] = v
case metrics.CounterFloat64:
v := metric.Count()

measurement := fmt.Sprintf("%s%s.count", namespace, name)
fields := map[string]interface{}{
"value": v,
}

pt := influxdb2.NewPoint(measurement, r.tags, fields, now)
r.write.WritePoint(pt)

case metrics.Gauge:
ms := metric.Snapshot()
Expand Down
11 changes: 11 additions & 0 deletions metrics/librato/librato.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,17 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B
}
snapshot.Counters = append(snapshot.Counters, measurement)
}
case metrics.CounterFloat64:
if m.Count() > 0 {
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
measurement[Value] = m.Count()
measurement[Attributes] = map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
}
snapshot.Counters = append(snapshot.Counters, measurement)
}
case metrics.Gauge:
measurement[Name] = name
measurement[Value] = float64(m.Value())
Expand Down
3 changes: 3 additions & 0 deletions metrics/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) {
case Counter:
l.Printf("counter %s\n", name)
l.Printf(" count: %9d\n", metric.Count())
case CounterFloat64:
l.Printf("counter %s\n", name)
l.Printf(" count: %f\n", metric.Count())
case Gauge:
l.Printf("gauge %s\n", name)
l.Printf(" value: %9d\n", metric.Value())
Expand Down
19 changes: 13 additions & 6 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ func CollectProcessMetrics(refresh time.Duration) {
cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry)
cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
cpuSysLoadTotal = GetOrRegisterCounterFloat64("system/cpu/sysload/total", DefaultRegistry)
cpuSysWaitTotal = GetOrRegisterCounterFloat64("system/cpu/syswait/total", DefaultRegistry)
cpuProcLoadTotal = GetOrRegisterCounterFloat64("system/cpu/procload/total", DefaultRegistry)
cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
Expand Down Expand Up @@ -172,13 +175,17 @@ func CollectProcessMetrics(refresh time.Duration) {
secondsSinceLastCollect := collectTime.Sub(lastCollectTime).Seconds()
lastCollectTime = collectTime
if secondsSinceLastCollect > 0 {
sysLoad := (cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / secondsSinceLastCollect
sysWait := (cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / secondsSinceLastCollect
procLoad := (cpustats[now].LocalTime - cpustats[prev].LocalTime) / secondsSinceLastCollect
sysLoad := cpustats[now].GlobalTime - cpustats[prev].GlobalTime
sysWait := cpustats[now].GlobalWait - cpustats[prev].GlobalWait
procLoad := cpustats[now].LocalTime - cpustats[prev].LocalTime
// Convert to integer percentage.
cpuSysLoad.Update(int64(sysLoad * 100))
cpuSysWait.Update(int64(sysWait * 100))
cpuProcLoad.Update(int64(procLoad * 100))
cpuSysLoad.Update(int64(sysLoad / secondsSinceLastCollect * 100))
cpuSysWait.Update(int64(sysWait / secondsSinceLastCollect * 100))
cpuProcLoad.Update(int64(procLoad / secondsSinceLastCollect * 100))
// increment counters (ms)
cpuSysLoadTotal.Inc(sysLoad)
cpuSysWaitTotal.Inc(sysWait)
cpuProcLoadTotal.Inc(procLoad)
}

// Threads
Expand Down
Loading

0 comments on commit 7dc1007

Please sign in to comment.