Skip to content

Commit

Permalink
Add additional metrics to p2p and consensus
Browse files Browse the repository at this point in the history
Partially addresses cosmos/cosmos-sdk#2169.
  • Loading branch information
mslipper committed Sep 19, 2018
1 parent bdd0131 commit 4758e34
Show file tree
Hide file tree
Showing 13 changed files with 171 additions and 37 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ scripts/cutWALUntil/cutWALUntil
.vscode/

libs/pubsub/query/fuzz_test/output
libs/db/test*
shunit2

.tendermint-lite
Expand All @@ -42,4 +43,4 @@ terraform.tfstate
terraform.tfstate.backup
terraform.tfstate.d

.vscode
.vscode
2 changes: 2 additions & 0 deletions CHANGELOG_PENDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,7 @@ IMPROVEMENTS:
- [types] add Address to GenesisValidator [\#1714](https://github.com/tendermint/tendermint/issues/1714)
- [metrics] `consensus.block_interval_metrics` is now gauge, not histogram (you will be able to see spikes, if any)

- Added additional metrics to p2p and consensus

BUG FIXES:
- [node] \#2294 Delay starting node until Genesis time
3 changes: 3 additions & 0 deletions config/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package config

const MetricsNamespace = "tendermint"
2 changes: 1 addition & 1 deletion consensus/byzantine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func TestByzantine(t *testing.T) {
err := eventBus.Subscribe(context.Background(), testSubscriber, types.EventQueryNewBlock, eventChans[i])
require.NoError(t, err)

conR := NewConsensusReactor(css[i], true) // so we dont start the consensus states
conR := NewConsensusReactor(css[i], true, NopMetrics()) // so we dont start the consensus states
conR.SetLogger(logger.With("validator", i))
conR.SetEventBus(eventBus)

Expand Down
68 changes: 50 additions & 18 deletions consensus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"

prometheus "github.com/go-kit/kit/metrics/prometheus"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
tmcfg "github.com/tendermint/tendermint/config"
)

const MetricsSubsystem = "consensus"

// Metrics contains metrics exposed by this package.
type Metrics struct {
// Height of the chain.
Expand Down Expand Up @@ -38,75 +41,102 @@ type Metrics struct {
BlockSizeBytes metrics.Gauge
// Total number of transactions.
TotalTxs metrics.Gauge
// The latest block height.
LatestBlockHeight metrics.Gauge
// Whether or not a node is synced. 0 if no, 1 if yes.
CatchingUp metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
func PrometheusMetrics() *Metrics {
return &Metrics{
Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "height",
Help: "Height of the chain.",
}, []string{}),
Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "rounds",
Help: "Number of rounds.",
}, []string{}),

Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "validators",
Help: "Number of validators.",
}, []string{}),
ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "validators_power",
Help: "Total power of all validators.",
}, []string{}),
MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "missing_validators",
Help: "Number of validators who did not sign.",
}, []string{}),
MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "missing_validators_power",
Help: "Total power of the missing validators.",
}, []string{}),
ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "byzantine_validators",
Help: "Number of validators who tried to double sign.",
}, []string{}),
ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "byzantine_validators_power",
Help: "Total power of the byzantine validators.",
}, []string{}),


BlockIntervalSeconds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "block_interval_seconds",
Help: "Time between this and the last block.",
}, []string{}),

NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "num_txs",
Help: "Number of transactions.",
}, []string{}),
BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "block_size_bytes",
Help: "Size of the block.",
}, []string{}),
TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "total_txs",
Help: "Total number of transactions.",
}, []string{}),
LatestBlockHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "latest_block_height",
Help: "The latest block height.",
}, []string{}),
CatchingUp: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: tmcfg.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "catching_up",
Help: "Whether or not a node is synced. 0 if syncing, 1 if synced.",
}, []string{}),
}
}

Expand All @@ -126,8 +156,10 @@ func NopMetrics() *Metrics {

BlockIntervalSeconds: discard.NewGauge(),

NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
LatestBlockHeight: discard.NewGauge(),
CatchingUp: discard.NewGauge(),
}
}
19 changes: 17 additions & 2 deletions consensus/reactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

"github.com/pkg/errors"

amino "github.com/tendermint/go-amino"
"github.com/tendermint/go-amino"

cstypes "github.com/tendermint/tendermint/consensus/types"
cmn "github.com/tendermint/tendermint/libs/common"
Expand Down Expand Up @@ -42,15 +42,19 @@ type ConsensusReactor struct {
mtx sync.RWMutex
fastSync bool
eventBus *types.EventBus

metrics *Metrics
}

// NewConsensusReactor returns a new ConsensusReactor with the given
// consensusState.
func NewConsensusReactor(consensusState *ConsensusState, fastSync bool) *ConsensusReactor {
func NewConsensusReactor(consensusState *ConsensusState, fastSync bool, csMetrics *Metrics) *ConsensusReactor {
conR := &ConsensusReactor{
conS: consensusState,
fastSync: fastSync,
metrics: csMetrics,
}
conR.setCatchingUp()
conR.BaseReactor = *p2p.NewBaseReactor("ConsensusReactor", conR)
return conR
}
Expand Down Expand Up @@ -94,6 +98,7 @@ func (conR *ConsensusReactor) SwitchToConsensus(state sm.State, blocksSynced int
conR.mtx.Lock()
conR.fastSync = false
conR.mtx.Unlock()
conR.metrics.CatchingUp.Set(0)

if blocksSynced > 0 {
// dont bother with the WAL if we fast synced
Expand Down Expand Up @@ -814,6 +819,16 @@ func (conR *ConsensusReactor) StringIndented(indent string) string {
return s
}

func (conR *ConsensusReactor) setCatchingUp() {
var catchingUp float64
if conR.fastSync {
catchingUp = 1
} else {
catchingUp = 0
}
conR.metrics.CatchingUp.Set(catchingUp)
}

//-----------------------------------------------------------------------------

var (
Expand Down
6 changes: 3 additions & 3 deletions consensus/reactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func startConsensusNet(t *testing.T, css []*ConsensusState, N int) ([]*Consensus
for i := 0; i < N; i++ {
/*logger, err := tmflags.ParseLogLevel("consensus:info,*:error", logger, "info")
if err != nil { t.Fatal(err)}*/
reactors[i] = NewConsensusReactor(css[i], true) // so we dont start the consensus states
reactors[i] = NewConsensusReactor(css[i], true, NopMetrics()) // so we dont start the consensus states
reactors[i].SetLogger(css[i].Logger)

// eventBus is already started with the cs
Expand Down Expand Up @@ -255,7 +255,7 @@ func TestReactorRecordsBlockParts(t *testing.T) {

// create reactor
css := randConsensusNet(1, "consensus_reactor_records_block_parts_test", newMockTickerFunc(true), newPersistentKVStore)
reactor := NewConsensusReactor(css[0], false) // so we dont start the consensus states
reactor := NewConsensusReactor(css[0], false, NopMetrics()) // so we dont start the consensus states
reactor.SetEventBus(css[0].eventBus)
reactor.SetLogger(log.TestingLogger())
sw := p2p.MakeSwitch(cfg.DefaultP2PConfig(), 1, "testing", "123.123.123", func(i int, sw *p2p.Switch) *p2p.Switch { return sw })
Expand Down Expand Up @@ -306,7 +306,7 @@ func TestReactorRecordsVotes(t *testing.T) {

// Create reactor.
css := randConsensusNet(1, "consensus_reactor_records_votes_test", newMockTickerFunc(true), newPersistentKVStore)
reactor := NewConsensusReactor(css[0], false) // so we dont start the consensus states
reactor := NewConsensusReactor(css[0], false, NopMetrics()) // so we dont start the consensus states
reactor.SetEventBus(css[0].eventBus)
reactor.SetLogger(log.TestingLogger())
sw := p2p.MakeSwitch(cfg.DefaultP2PConfig(), 1, "testing", "123.123.123", func(i int, sw *p2p.Switch) *p2p.Switch { return sw })
Expand Down
2 changes: 2 additions & 0 deletions consensus/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,8 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) {
cs.metrics.NumTxs.Set(float64(block.NumTxs))
cs.metrics.BlockSizeBytes.Set(float64(block.Size()))
cs.metrics.TotalTxs.Set(float64(block.TotalTxs))
cs.metrics.LatestBlockHeight.Set(float64(block.Height))

}

//-----------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion mempool/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"

prometheus "github.com/go-kit/kit/metrics/prometheus"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
"github.com/tendermint/tendermint/config"
)

// Metrics contains metrics exposed by this package.
Expand All @@ -19,6 +20,7 @@ type Metrics struct {
func PrometheusMetrics() *Metrics {
return &Metrics{
Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: config.MetricsNamespace,
Subsystem: "mempool",
Name: "size",
Help: "Size of the mempool (number of uncommitted transactions).",
Expand Down
6 changes: 4 additions & 2 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ func NewNode(config *cfg.Config,
bcReactor := bc.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync)
bcReactor.SetLogger(logger.With("module", "blockchain"))

csm := cs.WithMetrics(csMetrics)

// Make ConsensusReactor
consensusState := cs.NewConsensusState(
config.Consensus,
Expand All @@ -295,13 +297,13 @@ func NewNode(config *cfg.Config,
blockStore,
mempool,
evidencePool,
cs.WithMetrics(csMetrics),
csm,
)
consensusState.SetLogger(consensusLogger)
if privValidator != nil {
consensusState.SetPrivValidator(privValidator)
}
consensusReactor := cs.NewConsensusReactor(consensusState, fastSync)
consensusReactor := cs.NewConsensusReactor(consensusState, fastSync, csMetrics)
consensusReactor.SetLogger(consensusLogger)

eventBus := types.NewEventBus()
Expand Down
35 changes: 33 additions & 2 deletions p2p/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,61 @@ import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"

prometheus "github.com/go-kit/kit/metrics/prometheus"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
"github.com/tendermint/tendermint/config"
)

const MetricsSubsystem = "p2p"

// Metrics contains metrics exposed by this package.
type Metrics struct {
// Number of peers.
Peers metrics.Gauge
// Number of bytes received from a given peer.
PeerReceiveBytesTotal metrics.Counter
// Number of bytes sent to a given peer.
PeerSendBytesTotal metrics.Counter
// Pending bytes to be sent to a given peer.
PeerPendingSendBytes metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
func PrometheusMetrics() *Metrics {
return &Metrics{
Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "p2p",
Namespace: config.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "peers",
Help: "Number of peers.",
}, []string{}),
PeerReceiveBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: config.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "peer_receive_bytes_total",
Help: "Number of bytes received from a given peer.",
}, []string{"peer_id"}),
PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: config.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "peer_send_bytes_total",
Help: "Number of bytes sent to a given peer.",
}, []string{"peer_id"}),
PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: config.MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "peer_pending_send_bytes",
Help: "Number of pending bytes to be sent to a given peer.",
}, []string{"peer_id"}),
}
}

// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Peers: discard.NewGauge(),
PeerReceiveBytesTotal: discard.NewCounter(),
PeerSendBytesTotal: discard.NewCounter(),
PeerPendingSendBytes: discard.NewGauge(),
}
}
Loading

0 comments on commit 4758e34

Please sign in to comment.