From ff762eae90f5ae663c8b0be34ce1f0f28571cfeb Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Mon, 23 Sep 2024 17:25:36 -0700 Subject: [PATCH 01/32] measure begin/end blockers --- x/application/module/abci.go | 4 +++ x/proof/keeper/proof_validation.go | 47 +++++++++++++++++++++++++++++- x/supplier/module/abci.go | 4 +++ x/tokenomics/module/abci.go | 4 +++ x/tokenomics/types/tx.pb.go | 1 - 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/x/application/module/abci.go b/x/application/module/abci.go index 3f0b16f4d..832dd7a55 100644 --- a/x/application/module/abci.go +++ b/x/application/module/abci.go @@ -1,13 +1,17 @@ package application import ( + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/pokt-network/poktroll/x/application/keeper" + "github.com/pokt-network/poktroll/x/application/types" ) // EndBlocker is called every block and handles application related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { + defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) + if err := k.EndBlockerAutoUndelegateFromUnstakedGateways(ctx); err != nil { return err } diff --git a/x/proof/keeper/proof_validation.go b/x/proof/keeper/proof_validation.go index 638e01d0a..a81e938c5 100644 --- a/x/proof/keeper/proof_validation.go +++ b/x/proof/keeper/proof_validation.go @@ -31,7 +31,10 @@ package keeper import ( "bytes" "context" + "fmt" + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" + "github.com/hashicorp/go-metrics" "github.com/pokt-network/smt" "github.com/pokt-network/poktroll/pkg/crypto/protocol" @@ -58,12 +61,15 @@ func (k Keeper) EnsureValidProof( ctx context.Context, proof *types.Proof, ) error { + defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), "validation") + logger := k.Logger().With("method", "ValidateProof") // Retrieve the supplier operator's public key. supplierOperatorAddr := proof.SupplierOperatorAddress supplierOperatorPubKey, err := k.accountQuerier.GetPubKeyFromAddress(ctx, supplierOperatorAddr) if err != nil { + telemetryFailProofValidationIncrement("unknown", "address_to_pubkey") return err } @@ -71,6 +77,7 @@ func (k Keeper) EnsureValidProof( var onChainSession *sessiontypes.Session onChainSession, err = k.queryAndValidateSessionHeader(ctx, proof.SessionHeader, supplierOperatorAddr) if err != nil { + telemetryFailProofValidationIncrement("unknown", "session_header") return err } logger.Info("queried and validated the session header") @@ -79,20 +86,24 @@ func (k Keeper) EnsureValidProof( // This corrects for discrepancies between unvalidated fields in the session // header which can be derived from known values (e.g. session end height). sessionHeader := onChainSession.GetHeader() + serviceId := sessionHeader.ServiceId // Validate proof message commit height is within the respective session's // proof submission window using the on-chain session header. if err = k.validateProofWindow(ctx, sessionHeader, supplierOperatorAddr); err != nil { + telemetryFailProofValidationIncrement(serviceId, "proof_window") return err } if len(proof.ClosestMerkleProof) == 0 { + telemetryFailProofValidationIncrement(serviceId, "empty_proof") return types.ErrProofInvalidProof.Wrap("proof cannot be empty") } // Unmarshal the closest merkle proof from the message. sparseMerkleClosestProof := &smt.SparseMerkleClosestProof{} if err = sparseMerkleClosestProof.Unmarshal(proof.ClosestMerkleProof); err != nil { + telemetryFailProofValidationIncrement(serviceId, "closest_proof_unmarshal") return types.ErrProofInvalidProof.Wrapf( "failed to unmarshal closest merkle proof: %s", err, @@ -105,6 +116,7 @@ func (k Keeper) EnsureValidProof( relayBz := sparseMerkleClosestProof.GetValueHash(&protocol.SmtSpec) relay := &servicetypes.Relay{} if err = k.cdc.Unmarshal(relayBz, relay); err != nil { + telemetryFailProofValidationIncrement(serviceId, "relay_unmarshal") return types.ErrProofInvalidRelay.Wrapf( "failed to unmarshal relay: %s", err, @@ -114,12 +126,14 @@ func (k Keeper) EnsureValidProof( // Basic validation of the relay request. relayReq := relay.GetReq() if err = relayReq.ValidateBasic(); err != nil { + telemetryFailProofValidationIncrement(serviceId, "req_validate_basic") return err } logger.Debug("successfully validated relay request") // Make sure that the supplier operator address in the proof matches the one in the relay request. if supplierOperatorAddr != relayReq.Meta.SupplierOperatorAddress { + telemetryFailProofValidationIncrement(serviceId, "supplier_mismatch") return types.ErrProofSupplierMismatch.Wrapf("supplier type mismatch") } logger.Debug("the proof supplier operator address matches the relay request supplier operator address") @@ -127,30 +141,35 @@ func (k Keeper) EnsureValidProof( // Basic validation of the relay response. relayRes := relay.GetRes() if err = relayRes.ValidateBasic(); err != nil { + telemetryFailProofValidationIncrement(serviceId, "res_validate_basic") return err } logger.Debug("successfully validated relay response") // Verify that the relay request session header matches the proof session header. if err = compareSessionHeaders(sessionHeader, relayReq.Meta.GetSessionHeader()); err != nil { + telemetryFailProofValidationIncrement(serviceId, "req_proof_session_header_mismatch") return err } logger.Debug("successfully compared relay request session header") // Verify that the relay response session header matches the proof session header. if err = compareSessionHeaders(sessionHeader, relayRes.Meta.GetSessionHeader()); err != nil { + telemetryFailProofValidationIncrement(serviceId, "res_proof_session_header_mismatch") return err } logger.Debug("successfully compared relay response session header") // Verify the relay request's signature. if err = k.ringClient.VerifyRelayRequestSignature(ctx, relayReq); err != nil { + telemetryFailProofValidationIncrement(serviceId, "relay_req_signature") return err } logger.Debug("successfully verified relay request signature") // Verify the relay response's signature. if err = relayRes.VerifySupplierOperatorSignature(supplierOperatorPubKey); err != nil { + telemetryFailProofValidationIncrement(serviceId, "res_signature") return err } logger.Debug("successfully verified relay response signature") @@ -169,7 +188,8 @@ func (k Keeper) EnsureValidProof( relayBz, serviceRelayDifficultyTargetHash, ); err != nil { - return types.ErrProofInvalidRelayDifficulty.Wrapf("failed to validate relay difficulty for service %s due to: %v", sessionHeader.ServiceId, err) + telemetryFailProofValidationIncrement(serviceId, fmt.Sprint(types.ErrProofInvalidRelayDifficulty.ABCICode())) + return types.ErrProofInvalidRelayDifficulty.Wrapf("failed to validate relay difficulty for service %s due to: %v", serviceId, err) } logger.Debug("successfully validated relay mining difficulty") @@ -181,6 +201,7 @@ func (k Keeper) EnsureValidProof( sessionHeader, supplierOperatorAddr, ); err != nil { + telemetryFailProofValidationIncrement(serviceId, "closest_path") return err } logger.Debug("successfully validated proof path") @@ -196,10 +217,13 @@ func (k Keeper) EnsureValidProof( // Verify the proof's closest merkle proof. if err = verifyClosestProof(sparseMerkleClosestProof, claim.GetRootHash()); err != nil { + telemetryFailProofValidationIncrement(serviceId, "closest_proof") return err } logger.Debug("successfully verified closest merkle proof") + telemetryValidProofIncrement(serviceId) + return nil } @@ -423,3 +447,24 @@ func validateRelayDifficulty(relayBz, serviceRelayDifficultyTargetHash []byte) e ) } + +// telemetryValidProofIncrement increases the `proof_valid` metric. +func telemetryValidProofIncrement(serviceId string) { + cosmostelemetry.IncrCounterWithLabels( + []string{types.ModuleName, "valid"}, + float32(1), + []metrics.Label{cosmostelemetry.NewLabel("service_id", serviceId)}, + ) +} + +// telemetryFailProofValidationIncrement increases the `proof_validation_fail` metric labeled with a reason and service id. +func telemetryFailProofValidationIncrement(serviceId string, reason string) { + cosmostelemetry.IncrCounterWithLabels( + []string{types.ModuleName, "validation_fail"}, + float32(1), + []metrics.Label{ + cosmostelemetry.NewLabel("service_id", serviceId), + cosmostelemetry.NewLabel("reason", reason), + }, + ) +} diff --git a/x/supplier/module/abci.go b/x/supplier/module/abci.go index 86e463ea5..421acad05 100644 --- a/x/supplier/module/abci.go +++ b/x/supplier/module/abci.go @@ -3,11 +3,15 @@ package supplier import ( sdk "github.com/cosmos/cosmos-sdk/types" + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" "github.com/pokt-network/poktroll/x/supplier/keeper" + "github.com/pokt-network/poktroll/x/supplier/types" ) // EndBlocker is called every block and handles supplier related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { + defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) + // TODO_IMPROVE(@red-0ne): Add logs and/or telemetry on the number of unbonded suppliers. if err := k.EndBlockerUnbondSuppliers(ctx); err != nil { return err diff --git a/x/tokenomics/module/abci.go b/x/tokenomics/module/abci.go index c74e22737..db479da2f 100644 --- a/x/tokenomics/module/abci.go +++ b/x/tokenomics/module/abci.go @@ -3,16 +3,20 @@ package tokenomics import ( "fmt" + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/pokt-network/poktroll/pkg/crypto/protocol" "github.com/pokt-network/poktroll/telemetry" prooftypes "github.com/pokt-network/poktroll/x/proof/types" "github.com/pokt-network/poktroll/x/tokenomics/keeper" + "github.com/pokt-network/poktroll/x/tokenomics/types" ) // EndBlocker called at every block and settles all pending claims. func EndBlocker(ctx sdk.Context, k keeper.Keeper) (err error) { + defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) + logger := k.Logger().With("method", "EndBlocker") // NB: There are two main reasons why we settle expiring claims in the end diff --git a/x/tokenomics/types/tx.pb.go b/x/tokenomics/types/tx.pb.go index e4fec264c..9f18a148c 100644 --- a/x/tokenomics/types/tx.pb.go +++ b/x/tokenomics/types/tx.pb.go @@ -125,7 +125,6 @@ type MsgUpdateParam struct { // specified in the `Params` message in `proof/params.proto.` Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` // Types that are valid to be assigned to AsType: - // // *MsgUpdateParam_AsString // *MsgUpdateParam_AsInt64 // *MsgUpdateParam_AsBytes From 1aad6ca00634b68c74067d3062cc4f90dc9e25f1 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Mon, 23 Sep 2024 18:01:04 -0700 Subject: [PATCH 02/32] --wip-- [skip ci] --- telemetry/event_counters.go | 12 +++++++++--- x/proof/keeper/proof_validation.go | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 054e67ddb..8119a49e5 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -28,15 +28,21 @@ func EventSuccessCounter( getValue func() float32, isSuccessful func() bool, ) { - successResult := strconv.FormatBool(isSuccessful()) value := getValue() + var metricName []string + + if isSuccessful() { + metricName = []string{"event", "success"} + } else { + metricName = []string{"event", "failed"} + } + telemetry.IncrCounterWithLabels( - []string{eventTypeMetricKey}, + metricName, value, []metrics.Label{ {Name: "type", Value: eventType}, - {Name: "is_successful", Value: successResult}, }, ) } diff --git a/x/proof/keeper/proof_validation.go b/x/proof/keeper/proof_validation.go index a81e938c5..a943d600f 100644 --- a/x/proof/keeper/proof_validation.go +++ b/x/proof/keeper/proof_validation.go @@ -61,7 +61,7 @@ func (k Keeper) EnsureValidProof( ctx context.Context, proof *types.Proof, ) error { - defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), "validation") + defer cosmostelemetry.MeasureSince(cosmostelemetry.Now(), "proof", "validation") logger := k.Logger().With("method", "ValidateProof") From 025698a7c3158c940ec0d99cedb831d1257094f6 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 26 Sep 2024 12:20:30 -0500 Subject: [PATCH 03/32] metrixxx --- ...appgate_server_config_localnet_vscode.yaml | 2 +- .../relayminer_config_localnet_vscode.yaml | 12 +++-- pkg/appgateserver/cmd/cmd.go | 5 +- pkg/relayer/cmd/cmd.go | 5 +- pkg/relayer/proxy/synchronous.go | 2 +- pkg/relayer/session/sessiontree.go | 9 +++- telemetry/common.go | 10 ++++ telemetry/event_counters.go | 40 ++++++---------- x/proof/keeper/proof_validation.go | 46 +------------------ 9 files changed, 52 insertions(+), 79 deletions(-) create mode 100644 telemetry/common.go diff --git a/localnet/poktrolld/config/appgate_server_config_localnet_vscode.yaml b/localnet/poktrolld/config/appgate_server_config_localnet_vscode.yaml index 7c7e475da..2dab42371 100644 --- a/localnet/poktrolld/config/appgate_server_config_localnet_vscode.yaml +++ b/localnet/poktrolld/config/appgate_server_config_localnet_vscode.yaml @@ -1,5 +1,5 @@ query_node_rpc_url: tcp://localhost:26657 -query_node_grpc_url: tcp://localhost:9090 +query_node_grpc_url: tcp://localhost:36658 signing_key: app1 self_signing: true listening_endpoint: http://0.0.0.0:42069 diff --git a/localnet/poktrolld/config/relayminer_config_localnet_vscode.yaml b/localnet/poktrolld/config/relayminer_config_localnet_vscode.yaml index 74ba44dde..451914f9f 100644 --- a/localnet/poktrolld/config/relayminer_config_localnet_vscode.yaml +++ b/localnet/poktrolld/config/relayminer_config_localnet_vscode.yaml @@ -22,9 +22,9 @@ metrics: enabled: true addr: :9070 pocket_node: - query_node_rpc_url: tcp://127.0.0.1:36657 - query_node_grpc_url: tcp://127.0.0.1:36658 - tx_node_rpc_url: tcp://127.0.0.1:36657 + query_node_rpc_url: tcp://localhost:26657 + query_node_grpc_url: tcp://localhost:36658 + tx_node_rpc_url: tcp://localhost:26657 suppliers: - service_id: anvil listen_url: http://0.0.0.0:8545 @@ -38,6 +38,12 @@ suppliers: backend_url: http://localhost:11434/ publicly_exposed_endpoints: - relayminer1 + - service_id: rest + listen_url: http://0.0.0.0:8545 + service_config: + backend_url: http://rest:10000/ + publicly_exposed_endpoints: + - relayminer1 pprof: enabled: false addr: localhost:6070 diff --git a/pkg/appgateserver/cmd/cmd.go b/pkg/appgateserver/cmd/cmd.go index 740104182..124d8ceb5 100644 --- a/pkg/appgateserver/cmd/cmd.go +++ b/pkg/appgateserver/cmd/cmd.go @@ -31,6 +31,8 @@ var ( flagNodeRPCURL string // flagNodeGRPCURL is the variable containing the Cosmos node GRPC URL flag value. flagNodeGRPCURL string + // flagLogLevel is the variable to set a log level (used by cosmos and polylog). + flagLogLevel string ) // AppGateServerCmd returns the Cobra command for running the AppGate server. @@ -74,6 +76,7 @@ provided that: cmd.Flags().StringVar(&flagNodeRPCURL, cosmosflags.FlagNode, omittedDefaultFlagValue, "Register the default Cosmos node flag, which is needed to initialize the Cosmos query context correctly. It can be used to override the `QueryNodeUrl` field in the config file if specified.") cmd.Flags().StringVar(&flagNodeGRPCURL, cosmosflags.FlagGRPC, omittedDefaultFlagValue, "Register the default Cosmos node grpc flag, which is needed to initialize the Cosmos query context with grpc correctly. It can be used to override the `QueryNodeGRPCUrl` field in the config file if specified.") cmd.Flags().Bool(cosmosflags.FlagGRPCInsecure, true, "Used to initialize the Cosmos query context with grpc security options. It can be used to override the `QueryNodeGRPCInsecure` field in the config file if specified.") + cmd.Flags().StringVar(&flagLogLevel, cosmosflags.FlagLogLevel, "debug", "The logging level (debug|info|warn|error)") return cmd } @@ -99,7 +102,7 @@ func runAppGateServer(cmd *cobra.Command, _ []string) error { // TODO_TECHDEBT: populate logger from the config (ideally, from viper). loggerOpts := []polylog.LoggerOption{ - polyzero.WithLevel(polyzero.DebugLevel), + polyzero.WithLevel(polyzero.ParseLevel(flagLogLevel)), polyzero.WithOutput(os.Stderr), } diff --git a/pkg/relayer/cmd/cmd.go b/pkg/relayer/cmd/cmd.go index 66681b881..a1de86b4a 100644 --- a/pkg/relayer/cmd/cmd.go +++ b/pkg/relayer/cmd/cmd.go @@ -39,6 +39,8 @@ var ( flagNodeRPCURL string // flagNodeGRPCURL is the variable containing the Cosmos node GRPC URL flag value. flagNodeGRPCURL string + // flagLogLevel is the variable to set a log level (used by cosmos and polylog). + flagLogLevel string ) // RelayerCmd returns the Cobra command for running the relay miner. @@ -72,6 +74,7 @@ for such operations.`, cmd.Flags().StringVar(&flagNodeGRPCURL, cosmosflags.FlagGRPC, omittedDefaultFlagValue, "Register the default Cosmos node grpc flag, which is needed to initialize the Cosmos query context with grpc correctly. It can be used to override the `QueryNodeGRPCURL` field in the config file if specified.") cmd.Flags().Bool(cosmosflags.FlagGRPCInsecure, true, "Used to initialize the Cosmos query context with grpc security options. It can be used to override the `QueryNodeGRPCInsecure` field in the config file if specified.") cmd.Flags().String(cosmosflags.FlagChainID, "poktroll", "The network chain ID") + cmd.Flags().StringVar(&flagLogLevel, cosmosflags.FlagLogLevel, "debug", "The logging level (debug|info|warn|error)") return cmd } @@ -97,7 +100,7 @@ func runRelayer(cmd *cobra.Command, _ []string) error { // TODO_TECHDEBT: populate logger from the config (ideally, from viper). loggerOpts := []polylog.LoggerOption{ - polyzero.WithLevel(polyzero.DebugLevel), + polyzero.WithLevel(polyzero.ParseLevel(flagLogLevel)), polyzero.WithOutput(os.Stderr), } diff --git a/pkg/relayer/proxy/synchronous.go b/pkg/relayer/proxy/synchronous.go index e3767eef6..fe447aa0f 100644 --- a/pkg/relayer/proxy/synchronous.go +++ b/pkg/relayer/proxy/synchronous.go @@ -203,7 +203,7 @@ func (sync *synchronousRPCServer) ServeHTTP(writer http.ResponseWriter, request return } - sync.logger.Info().Fields(map[string]any{ + sync.logger.Debug().Fields(map[string]any{ "application_address": relay.Res.Meta.SessionHeader.ApplicationAddress, "service_id": relay.Res.Meta.SessionHeader.ServiceId, "session_start_height": relay.Res.Meta.SessionHeader.SessionStartBlockHeight, diff --git a/pkg/relayer/session/sessiontree.go b/pkg/relayer/session/sessiontree.go index b55340224..8969390c2 100644 --- a/pkg/relayer/session/sessiontree.go +++ b/pkg/relayer/session/sessiontree.go @@ -3,6 +3,7 @@ package session import ( "bytes" "crypto/sha256" + "fmt" "os" "path/filepath" "sync" @@ -273,8 +274,12 @@ func (st *sessionTree) Delete() error { // This was intentionally removed to lower the IO load. // When the database is closed, it is deleted it from disk right away. - if err := st.treeStore.Stop(); err != nil { - return err + if st.treeStore != nil { + if err := st.treeStore.Stop(); err != nil { + return err + } + } else { + fmt.Println("DIMA: the tree store has been deleted already. How come?") } // Delete the KVStore from disk diff --git a/telemetry/common.go b/telemetry/common.go new file mode 100644 index 000000000..3d0b4d085 --- /dev/null +++ b/telemetry/common.go @@ -0,0 +1,10 @@ +package telemetry + +// MetricNameKeys constructs the full metric name by prefixing with a defined +// prefix and appending any additional metrics provided as variadic arguments. +func MetricNameKeys(metrics ...string) []string { + result := make([]string, 0, len(metrics)+1) + result = append(result, metricNamePrefix) + result = append(result, metrics...) + return result +} diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 8119a49e5..98f1895ff 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -6,8 +6,6 @@ package telemetry import ( - "strconv" - "github.com/cosmos/cosmos-sdk/telemetry" "github.com/hashicorp/go-metrics" @@ -15,11 +13,8 @@ import ( ) const ( - // TODO_DECIDE: Decide if we want to continue using these generic metrics keys - // or opt for specific keys for each event_type. - // See: https://github.com/pokt-network/poktroll/pull/631#discussion_r1653760820 - eventTypeMetricKey = "event_type" - eventTypeMetricKeyGauge = "event_type_gauge" + // Prefix all metric names with "poktroll" for easier search + metricNamePrefix = "poktroll" ) // EventSuccessCounter increments a counter with the given data type and success status. @@ -33,9 +28,9 @@ func EventSuccessCounter( var metricName []string if isSuccessful() { - metricName = []string{"event", "success"} + metricName = MetricNameKeys("successful", "events") } else { - metricName = []string{"event", "failed"} + metricName = MetricNameKeys("failed", "events") } telemetry.IncrCounterWithLabels( @@ -56,10 +51,8 @@ func ProofRequirementCounter( err error, ) { incrementAmount := 1 - isRequired := strconv.FormatBool(reason != prooftypes.ProofRequirementReason_NOT_REQUIRED) labels := []metrics.Label{ - {Name: "proof_required_reason", Value: reason.String()}, - {Name: "is_required", Value: isRequired}, + {Name: "reason", Value: reason.String()}, } // Ensure the counter is not incremented if there was an error. @@ -68,7 +61,7 @@ func ProofRequirementCounter( } telemetry.IncrCounterWithLabels( - []string{eventTypeMetricKey}, + MetricNameKeys("proof", "requirements"), float32(incrementAmount), labels, ) @@ -84,8 +77,7 @@ func ClaimComputeUnitsCounter( ) { incrementAmount := numComputeUnits labels := []metrics.Label{ - {Name: "unit", Value: "compute_units"}, - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "proof_stage", Value: claimProofStage.String()}, } // Ensure the counter is not incremented if there was an error. @@ -94,7 +86,7 @@ func ClaimComputeUnitsCounter( } telemetry.IncrCounterWithLabels( - []string{eventTypeMetricKey}, + MetricNameKeys("compute_units"), float32(incrementAmount), labels, ) @@ -111,8 +103,7 @@ func ClaimRelaysCounter( ) { incrementAmount := numRelays labels := []metrics.Label{ - {Name: "unit", Value: "relays"}, - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "proof_stage", Value: claimProofStage.String()}, } // Ensure the counter is not incremented if there was an error. @@ -121,7 +112,7 @@ func ClaimRelaysCounter( } telemetry.IncrCounterWithLabels( - []string{eventTypeMetricKey}, + MetricNameKeys("relays"), float32(incrementAmount), labels, ) @@ -137,8 +128,7 @@ func ClaimCounter( ) { incrementAmount := numClaims labels := []metrics.Label{ - {Name: "unit", Value: "claims"}, - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "proof_stage", Value: claimProofStage.String()}, } // Ensure the counter is not incremented if there was an error. @@ -147,7 +137,7 @@ func ClaimCounter( } telemetry.IncrCounterWithLabels( - []string{eventTypeMetricKey}, + MetricNameKeys("claims"), float32(incrementAmount), labels, ) @@ -158,12 +148,11 @@ func ClaimCounter( // track the difficulty for each service. func RelayMiningDifficultyGauge(difficulty float32, serviceId string) { labels := []metrics.Label{ - {Name: "type", Value: "relay_mining_difficulty"}, {Name: "service_id", Value: serviceId}, } telemetry.SetGaugeWithLabels( - []string{eventTypeMetricKeyGauge}, + MetricNameKeys("relay_mining", "difficulty"), difficulty, labels, ) @@ -173,12 +162,11 @@ func RelayMiningDifficultyGauge(difficulty float32, serviceId string) { // The serviceId is used as a label to be able to track the EMA for each service. func RelayEMAGauge(relayEMA uint64, serviceId string) { labels := []metrics.Label{ - {Name: "type", Value: "relay_ema"}, {Name: "service_id", Value: serviceId}, } telemetry.SetGaugeWithLabels( - []string{eventTypeMetricKeyGauge}, + MetricNameKeys("relay", "ema"), float32(relayEMA), labels, ) diff --git a/x/proof/keeper/proof_validation.go b/x/proof/keeper/proof_validation.go index e016f4f92..b5c749479 100644 --- a/x/proof/keeper/proof_validation.go +++ b/x/proof/keeper/proof_validation.go @@ -31,13 +31,12 @@ package keeper import ( "bytes" "context" - "fmt" cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" - "github.com/hashicorp/go-metrics" "github.com/pokt-network/smt" "github.com/pokt-network/poktroll/pkg/crypto/protocol" + "github.com/pokt-network/poktroll/telemetry" "github.com/pokt-network/poktroll/x/proof/types" servicetypes "github.com/pokt-network/poktroll/x/service/types" sessiontypes "github.com/pokt-network/poktroll/x/session/types" @@ -61,7 +60,7 @@ func (k Keeper) EnsureValidProof( ctx context.Context, proof *types.Proof, ) error { - defer cosmostelemetry.MeasureSince(cosmostelemetry.Now(), "proof", "validation") + defer cosmostelemetry.MeasureSince(cosmostelemetry.Now(), telemetry.MetricNameKeys("proof", "validation")...) logger := k.Logger().With("method", "ValidateProof") @@ -69,7 +68,6 @@ func (k Keeper) EnsureValidProof( supplierOperatorAddr := proof.SupplierOperatorAddress supplierOperatorPubKey, err := k.accountQuerier.GetPubKeyFromAddress(ctx, supplierOperatorAddr) if err != nil { - telemetryFailProofValidationIncrement("unknown", "address_to_pubkey") return err } @@ -77,7 +75,6 @@ func (k Keeper) EnsureValidProof( var onChainSession *sessiontypes.Session onChainSession, err = k.queryAndValidateSessionHeader(ctx, proof.SessionHeader, supplierOperatorAddr) if err != nil { - telemetryFailProofValidationIncrement("unknown", "session_header") return err } logger.Info("queried and validated the session header") @@ -91,19 +88,16 @@ func (k Keeper) EnsureValidProof( // Validate proof message commit height is within the respective session's // proof submission window using the on-chain session header. if err = k.validateProofWindow(ctx, sessionHeader, supplierOperatorAddr); err != nil { - telemetryFailProofValidationIncrement(serviceId, "proof_window") return err } if len(proof.ClosestMerkleProof) == 0 { - telemetryFailProofValidationIncrement(serviceId, "empty_proof") return types.ErrProofInvalidProof.Wrap("proof cannot be empty") } // Unmarshal the closest merkle proof from the message. sparseCompactMerkleClosestProof := &smt.SparseCompactMerkleClosestProof{} if err = sparseCompactMerkleClosestProof.Unmarshal(proof.ClosestMerkleProof); err != nil { - telemetryFailProofValidationIncrement(serviceId, "closest_compact_proof_unmarshal") return types.ErrProofInvalidProof.Wrapf( "failed to unmarshal closest merkle proof: %s", err, @@ -113,7 +107,6 @@ func (k Keeper) EnsureValidProof( // SparseCompactMerkeClosestProof does not implement GetValueHash, so we need to decompact it. sparseMerkleClosestProof, err := smt.DecompactClosestProof(sparseCompactMerkleClosestProof, &protocol.SmtSpec) if err != nil { - telemetryFailProofValidationIncrement(serviceId, "decompact_closest_proof") return types.ErrProofInvalidProof.Wrapf( "failed to decompact closest merkle proof: %s", err, @@ -124,7 +117,6 @@ func (k Keeper) EnsureValidProof( relayBz := sparseMerkleClosestProof.GetValueHash(&protocol.SmtSpec) relay := &servicetypes.Relay{} if err = k.cdc.Unmarshal(relayBz, relay); err != nil { - telemetryFailProofValidationIncrement(serviceId, "relay_unmarshal") return types.ErrProofInvalidRelay.Wrapf( "failed to unmarshal relay: %s", err, @@ -134,14 +126,12 @@ func (k Keeper) EnsureValidProof( // Basic validation of the relay request. relayReq := relay.GetReq() if err = relayReq.ValidateBasic(); err != nil { - telemetryFailProofValidationIncrement(serviceId, "req_validate_basic") return err } logger.Debug("successfully validated relay request") // Make sure that the supplier operator address in the proof matches the one in the relay request. if supplierOperatorAddr != relayReq.Meta.SupplierOperatorAddress { - telemetryFailProofValidationIncrement(serviceId, "supplier_mismatch") return types.ErrProofSupplierMismatch.Wrapf("supplier type mismatch") } logger.Debug("the proof supplier operator address matches the relay request supplier operator address") @@ -149,35 +139,30 @@ func (k Keeper) EnsureValidProof( // Basic validation of the relay response. relayRes := relay.GetRes() if err = relayRes.ValidateBasic(); err != nil { - telemetryFailProofValidationIncrement(serviceId, "res_validate_basic") return err } logger.Debug("successfully validated relay response") // Verify that the relay request session header matches the proof session header. if err = compareSessionHeaders(sessionHeader, relayReq.Meta.GetSessionHeader()); err != nil { - telemetryFailProofValidationIncrement(serviceId, "req_proof_session_header_mismatch") return err } logger.Debug("successfully compared relay request session header") // Verify that the relay response session header matches the proof session header. if err = compareSessionHeaders(sessionHeader, relayRes.Meta.GetSessionHeader()); err != nil { - telemetryFailProofValidationIncrement(serviceId, "res_proof_session_header_mismatch") return err } logger.Debug("successfully compared relay response session header") // Verify the relay request's signature. if err = k.ringClient.VerifyRelayRequestSignature(ctx, relayReq); err != nil { - telemetryFailProofValidationIncrement(serviceId, "relay_req_signature") return err } logger.Debug("successfully verified relay request signature") // Verify the relay response's signature. if err = relayRes.VerifySupplierOperatorSignature(supplierOperatorPubKey); err != nil { - telemetryFailProofValidationIncrement(serviceId, "res_signature") return err } logger.Debug("successfully verified relay response signature") @@ -196,7 +181,6 @@ func (k Keeper) EnsureValidProof( relayBz, serviceRelayDifficultyTargetHash, ); err != nil { - telemetryFailProofValidationIncrement(serviceId, fmt.Sprint(types.ErrProofInvalidRelayDifficulty.ABCICode())) return types.ErrProofInvalidRelayDifficulty.Wrapf("failed to validate relay difficulty for service %s due to: %v", serviceId, err) } logger.Debug("successfully validated relay mining difficulty") @@ -209,7 +193,6 @@ func (k Keeper) EnsureValidProof( sessionHeader, supplierOperatorAddr, ); err != nil { - telemetryFailProofValidationIncrement(serviceId, "closest_path") return err } logger.Debug("successfully validated proof path") @@ -218,7 +201,6 @@ func (k Keeper) EnsureValidProof( // used in the proof validation below. claim, err := k.queryAndValidateClaimForProof(ctx, sessionHeader, supplierOperatorAddr) if err != nil { - telemetryFailProofValidationIncrement(serviceId, "retrieve_claim") return err } @@ -226,13 +208,10 @@ func (k Keeper) EnsureValidProof( // Verify the proof's closest merkle proof. if err = verifyClosestProof(sparseMerkleClosestProof, claim.GetRootHash()); err != nil { - telemetryFailProofValidationIncrement(serviceId, "closest_proof") return err } logger.Debug("successfully verified closest merkle proof") - telemetryValidProofIncrement(serviceId) - return nil } @@ -456,24 +435,3 @@ func validateRelayDifficulty(relayBz, serviceRelayDifficultyTargetHash []byte) e ) } - -// telemetryValidProofIncrement increases the `proof_valid` metric. -func telemetryValidProofIncrement(serviceId string) { - cosmostelemetry.IncrCounterWithLabels( - []string{types.ModuleName, "valid"}, - float32(1), - []metrics.Label{cosmostelemetry.NewLabel("service_id", serviceId)}, - ) -} - -// telemetryFailProofValidationIncrement increases the `proof_validation_fail` metric labeled with a reason and service id. -func telemetryFailProofValidationIncrement(serviceId string, reason string) { - cosmostelemetry.IncrCounterWithLabels( - []string{types.ModuleName, "validation_fail"}, - float32(1), - []metrics.Label{ - cosmostelemetry.NewLabel("service_id", serviceId), - cosmostelemetry.NewLabel("reason", reason), - }, - ) -} From b7c5bd350eb891c97c05affb430ac4f1bfc63d89 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 26 Sep 2024 15:12:56 -0500 Subject: [PATCH 04/32] --wip-- [skip ci] --- telemetry/tokens.go | 36 ++++++++++++++++++++++ x/proof/keeper/proof_validation.go | 3 +- x/tokenomics/keeper/token_logic_modules.go | 33 ++++++++++++++++---- 3 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 telemetry/tokens.go diff --git a/telemetry/tokens.go b/telemetry/tokens.go new file mode 100644 index 000000000..99d136bcb --- /dev/null +++ b/telemetry/tokens.go @@ -0,0 +1,36 @@ +package telemetry + +import ( + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" + "github.com/hashicorp/go-metrics" +) + +// MintedTokensFromModule is a function to track token minting from a specific module. +// The metric used is an increment counter, and the label includes the module name for context. +func MintedTokensFromModule(module string, amount float32) { + // CosmosSDK has a metric called `minted_tokens` (as a part of `mint` module), however it is wrongfully marked a `gauge`. + // It should be an `increment` because it always goes up. `gauge` tracks data that can go up and down. + // More info: https://prometheus.io/docs/concepts/metric_types/ + // + // We can't keep the same metric name because different metric types can't collide under the same name. So we add + // `poktroll_` prefix instead. + cosmostelemetry.IncrCounterWithLabels( + MetricNameKeys("minted", "tokens"), + amount, + []metrics.Label{ + cosmostelemetry.NewLabel("module", module), + }, + ) +} + +// BurnedTokensFromModule is a function to track token burning from a specific module. +// The metric used is an increment counter, and the label includes the module name for context. +func BurnedTokensFromModule(module string, amount float32) { + cosmostelemetry.IncrCounterWithLabels( + MetricNameKeys("burned", "tokens"), + amount, + []metrics.Label{ + cosmostelemetry.NewLabel("module", module), + }, + ) +} diff --git a/x/proof/keeper/proof_validation.go b/x/proof/keeper/proof_validation.go index b5c749479..d66b3673e 100644 --- a/x/proof/keeper/proof_validation.go +++ b/x/proof/keeper/proof_validation.go @@ -83,7 +83,6 @@ func (k Keeper) EnsureValidProof( // This corrects for discrepancies between unvalidated fields in the session // header which can be derived from known values (e.g. session end height). sessionHeader := onChainSession.GetHeader() - serviceId := sessionHeader.ServiceId // Validate proof message commit height is within the respective session's // proof submission window using the on-chain session header. @@ -181,7 +180,7 @@ func (k Keeper) EnsureValidProof( relayBz, serviceRelayDifficultyTargetHash, ); err != nil { - return types.ErrProofInvalidRelayDifficulty.Wrapf("failed to validate relay difficulty for service %s due to: %v", serviceId, err) + return types.ErrProofInvalidRelayDifficulty.Wrapf("failed to validate relay difficulty for service %s due to: %v", sessionHeader.ServiceId, err) } logger.Debug("successfully validated relay mining difficulty") diff --git a/x/tokenomics/keeper/token_logic_modules.go b/x/tokenomics/keeper/token_logic_modules.go index 288c82f73..35c3ec8de 100644 --- a/x/tokenomics/keeper/token_logic_modules.go +++ b/x/tokenomics/keeper/token_logic_modules.go @@ -316,8 +316,9 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( // Mint new uPOKT to the supplier module account. // These funds will be transferred to the supplier's shareholders below. // For reference, see operate/configs/supplier_staking_config.md. + coinsToMint := sdk.NewCoins(settlementCoin) if err := k.bankKeeper.MintCoins( - ctx, suppliertypes.ModuleName, sdk.NewCoins(settlementCoin), + ctx, suppliertypes.ModuleName, coinsToMint, ); err != nil { return tokenomicstypes.ErrTokenomicsSupplierModuleSendFailed.Wrapf( "minting %s to the supplier module account: %v", @@ -325,7 +326,13 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( err, ) } - logger.Info(fmt.Sprintf("minted (%v) coins in the supplier module", settlementCoin)) + + for _, coin := range coinsToMint { + if coin.Amount.IsInt64() { + defer telemetry.MintedTokensFromModule(suppliertypes.ModuleName, float32(coin.Amount.Int64())) + } + } + logger.Debug(fmt.Sprintf("minted (%v) coins in the supplier module", settlementCoin)) // Distribute the rewards to the supplier's shareholders based on the rev share percentage. if err := k.distributeSupplierRewardsToShareHolders(ctx, supplier, service.Id, settlementCoin.Amount.Uint64()); err != nil { @@ -335,16 +342,23 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( err, ) } - logger.Info(fmt.Sprintf("sent (%v) from the supplier module to the supplier account with address %q", settlementCoin, supplier.OperatorAddress)) + logger.Debug(fmt.Sprintf("sent (%v) from the supplier module to the supplier account with address %q", settlementCoin, supplier.OperatorAddress)) // Burn uPOKT from the application module account which was held in escrow // on behalf of the application account. + coinsToBurn := sdk.NewCoins(settlementCoin) if err := k.bankKeeper.BurnCoins( - ctx, apptypes.ModuleName, sdk.NewCoins(settlementCoin), + ctx, apptypes.ModuleName, coinsToBurn, ); err != nil { return tokenomicstypes.ErrTokenomicsApplicationModuleBurn.Wrapf("burning %s from the application module account: %v", settlementCoin, err) } - logger.Info(fmt.Sprintf("burned (%v) from the application module account", settlementCoin)) + + for _, coin := range coinsToBurn { + if coin.Amount.IsInt64() { + defer telemetry.BurnedTokensFromModule(apptypes.ModuleName, float32(coin.Amount.Int64())) + } + } + logger.Debug(fmt.Sprintf("burned (%v) from the application module account", settlementCoin)) // Update the application's on-chain stake newAppStake, err := application.Stake.SafeSub(settlementCoin) @@ -352,7 +366,7 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( return tokenomicstypes.ErrTokenomicsApplicationNewStakeInvalid.Wrapf("application %q stake cannot be reduced to a negative amount %v", application.Address, newAppStake) } application.Stake = &newAppStake - logger.Info(fmt.Sprintf("updated application %q stake to %v", application.Address, newAppStake)) + logger.Debug(fmt.Sprintf("updated application %q stake to %v", application.Address, newAppStake)) return nil } @@ -382,10 +396,17 @@ func (k Keeper) TokenLogicModuleGlobalMint( } // Mint new uPOKT to the tokenomics module account + coinsToMint := sdk.NewCoins(newMintCoin) if err := k.bankKeeper.MintCoins(ctx, tokenomictypes.ModuleName, sdk.NewCoins(newMintCoin)); err != nil { return tokenomicstypes.ErrTokenomicsModuleMintFailed.Wrapf( "minting (%s) to the tokenomics module account: %v", newMintCoin, err) } + + for _, coin := range coinsToMint { + if coin.Amount.IsInt64() { + defer telemetry.MintedTokensFromModule(tokenomictypes.ModuleName, float32(coin.Amount.Int64())) + } + } logger.Info(fmt.Sprintf("minted (%s) to the tokenomics module account", newMintCoin)) // Send a portion of the rewards to the application From 549710eb0508efb8b494372511c7b72a5db91d97 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 26 Sep 2024 17:26:11 -0500 Subject: [PATCH 05/32] bring back original --- x/tokenomics/keeper/token_logic_modules.go | 28 +++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/x/tokenomics/keeper/token_logic_modules.go b/x/tokenomics/keeper/token_logic_modules.go index 35c3ec8de..ae8c0e69b 100644 --- a/x/tokenomics/keeper/token_logic_modules.go +++ b/x/tokenomics/keeper/token_logic_modules.go @@ -316,9 +316,8 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( // Mint new uPOKT to the supplier module account. // These funds will be transferred to the supplier's shareholders below. // For reference, see operate/configs/supplier_staking_config.md. - coinsToMint := sdk.NewCoins(settlementCoin) if err := k.bankKeeper.MintCoins( - ctx, suppliertypes.ModuleName, coinsToMint, + ctx, suppliertypes.ModuleName, sdk.NewCoins(settlementCoin), ); err != nil { return tokenomicstypes.ErrTokenomicsSupplierModuleSendFailed.Wrapf( "minting %s to the supplier module account: %v", @@ -327,11 +326,10 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( ) } - for _, coin := range coinsToMint { - if coin.Amount.IsInt64() { - defer telemetry.MintedTokensFromModule(suppliertypes.ModuleName, float32(coin.Amount.Int64())) - } + if settlementCoin.Amount.IsInt64() { + defer telemetry.MintedTokensFromModule(suppliertypes.ModuleName, float32(settlementCoin.Amount.Int64())) } + logger.Debug(fmt.Sprintf("minted (%v) coins in the supplier module", settlementCoin)) // Distribute the rewards to the supplier's shareholders based on the rev share percentage. @@ -346,18 +344,16 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( // Burn uPOKT from the application module account which was held in escrow // on behalf of the application account. - coinsToBurn := sdk.NewCoins(settlementCoin) if err := k.bankKeeper.BurnCoins( - ctx, apptypes.ModuleName, coinsToBurn, + ctx, apptypes.ModuleName, sdk.NewCoins(settlementCoin), ); err != nil { return tokenomicstypes.ErrTokenomicsApplicationModuleBurn.Wrapf("burning %s from the application module account: %v", settlementCoin, err) } - for _, coin := range coinsToBurn { - if coin.Amount.IsInt64() { - defer telemetry.BurnedTokensFromModule(apptypes.ModuleName, float32(coin.Amount.Int64())) - } + if settlementCoin.Amount.IsInt64() { + defer telemetry.BurnedTokensFromModule(apptypes.ModuleName, float32(settlementCoin.Amount.Int64())) } + logger.Debug(fmt.Sprintf("burned (%v) from the application module account", settlementCoin)) // Update the application's on-chain stake @@ -396,17 +392,15 @@ func (k Keeper) TokenLogicModuleGlobalMint( } // Mint new uPOKT to the tokenomics module account - coinsToMint := sdk.NewCoins(newMintCoin) if err := k.bankKeeper.MintCoins(ctx, tokenomictypes.ModuleName, sdk.NewCoins(newMintCoin)); err != nil { return tokenomicstypes.ErrTokenomicsModuleMintFailed.Wrapf( "minting (%s) to the tokenomics module account: %v", newMintCoin, err) } - for _, coin := range coinsToMint { - if coin.Amount.IsInt64() { - defer telemetry.MintedTokensFromModule(tokenomictypes.ModuleName, float32(coin.Amount.Int64())) - } + if newMintCoin.Amount.IsInt64() { + defer telemetry.MintedTokensFromModule(tokenomictypes.ModuleName, float32(newMintCoin.Amount.Int64())) } + logger.Info(fmt.Sprintf("minted (%s) to the tokenomics module account", newMintCoin)) // Send a portion of the rewards to the application From 208f824c7b13b6fefbf8c4071b02e20c6c372dfa Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 26 Sep 2024 19:38:11 -0500 Subject: [PATCH 06/32] TODO: figure out why prometheus doesn't scrape anymore --- .../kubernetes/observability-prometheus-stack.yaml | 11 +++++++++++ pkg/relayer/session/sessiontree.go | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/localnet/kubernetes/observability-prometheus-stack.yaml b/localnet/kubernetes/observability-prometheus-stack.yaml index c6a29b67b..7313bafdd 100644 --- a/localnet/kubernetes/observability-prometheus-stack.yaml +++ b/localnet/kubernetes/observability-prometheus-stack.yaml @@ -20,4 +20,15 @@ grafana: prometheus: prometheusSpec: + # podMonitorNamespaceSelector: + # any: true + # podMonitorSelector: {} + # podMonitorSelectorNilUsesHelmValues: false + # ruleNamespaceSelector: + # any: true + # ruleSelector: {} + # ruleSelectorNilUsesHelmValues: false + # serviceMonitorNamespaceSelector: + # any: true + # serviceMonitorSelector: {} serviceMonitorSelectorNilUsesHelmValues: false diff --git a/pkg/relayer/session/sessiontree.go b/pkg/relayer/session/sessiontree.go index 8969390c2..2c6abe07f 100644 --- a/pkg/relayer/session/sessiontree.go +++ b/pkg/relayer/session/sessiontree.go @@ -279,7 +279,7 @@ func (st *sessionTree) Delete() error { return err } } else { - fmt.Println("DIMA: the tree store has been deleted already. How come?") + fmt.Println("Unable to delete as tree store is nil") } // Delete the KVStore from disk From 90624df9b4bbf7be644814e78e5a66013c5db606 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Fri, 27 Sep 2024 13:45:47 -0500 Subject: [PATCH 07/32] log level and reduce verbosity of some logs --- Tiltfile | 22 +++++++++++++++++++--- telemetry/event_counters.go | 19 +++++++++++++++++++ x/session/keeper/query_get_session.go | 2 +- x/session/keeper/session_hydrator.go | 13 ++++++++----- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/Tiltfile b/Tiltfile index 8c339de7f..9bf118570 100644 --- a/Tiltfile +++ b/Tiltfile @@ -38,14 +38,26 @@ localnet_config_defaults = { "enabled": True, "grafana": {"defaultDashboardsEnabled": False}, }, - "relayminers": {"count": 1, "delve": {"enabled": False}}, + "relayminers": { + "count": 1, + "delve": {"enabled": False}, + "logs": { + "level": "debug", + }, + }, "gateways": { "count": 1, "delve": {"enabled": False}, + "logs": { + "level": "debug", + }, }, "appgateservers": { "count": 1, "delve": {"enabled": False}, + "logs": { + "level": "debug", + }, }, # TODO_BLOCKER(@red-0ne, #511): Add support for `REST` and enabled this. "ollama": { @@ -89,8 +101,9 @@ if localnet_config["observability"]["enabled"]: helm_repo("prometheus-community", "https://prometheus-community.github.io/helm-charts") helm_repo("grafana-helm-repo", "https://grafana.github.io/helm-charts") - # Increase timeout for building the image - update_settings(k8s_upsert_timeout_secs=60) + # Increase timeout for building the imagedefault is 30, which can be too low for slow internet connections to pull + # container images. + update_settings(k8s_upsert_timeout_secs=120) # helm_resource( "observability", @@ -233,6 +246,7 @@ for x in range(localnet_config["relayminers"]["count"]): "--values=./localnet/kubernetes/values-relayminer-" + str(actor_number) + ".yaml", "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["relayminers"]["delve"]["enabled"]), + "--set=logLevel=" + str(localnet_config["relayminers"]["logs"]["level"]), ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], @@ -273,6 +287,7 @@ for x in range(localnet_config["appgateservers"]["count"]): "--set=config.signing_key=app" + str(actor_number), "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["appgateservers"]["delve"]["enabled"]), + "--set=logLevel=" + str(localnet_config["appgateservers"]["logs"]["level"]), ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], @@ -314,6 +329,7 @@ for x in range(localnet_config["gateways"]["count"]): "--set=config.signing_key=gateway" + str(actor_number), "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["gateways"]["delve"]["enabled"]), + "--set=logLevel=" + str(localnet_config["gateways"]["logs"]["level"]), ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 98f1895ff..bd02b21ba 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -6,6 +6,8 @@ package telemetry import ( + "strconv" + "github.com/cosmos/cosmos-sdk/telemetry" "github.com/hashicorp/go-metrics" @@ -171,3 +173,20 @@ func RelayEMAGauge(relayEMA uint64, serviceId string) { labels, ) } + +// SessionSuppliersGauge sets a gauge which tracks the number of candidates available +// for session suppliers at the given maxPerSession value. +// The serviceId is used as a label to be able to track this information for each service. +func SessionSuppliersGauge(candidates int, maxPerSession int, serviceId string) { + maxPerSessionStr := strconv.Itoa(maxPerSession) + labels := []metrics.Label{ + {Name: "service_id", Value: serviceId}, + {Name: "max_per_session", Value: maxPerSessionStr}, + } + + telemetry.SetGaugeWithLabels( + MetricNameKeys("session", "suppliers"), + float32(candidates), + labels, + ) +} diff --git a/x/session/keeper/query_get_session.go b/x/session/keeper/query_get_session.go index 166c77d37..bc25fff0e 100644 --- a/x/session/keeper/query_get_session.go +++ b/x/session/keeper/query_get_session.go @@ -36,7 +36,7 @@ func (k Keeper) GetSession(ctx context.Context, req *types.QueryGetSessionReques blockHeight = req.BlockHeight } - k.Logger().Info(fmt.Sprintf("Getting session for height: %d", blockHeight)) + k.Logger().Debug(fmt.Sprintf("Getting session for height: %d", blockHeight)) sessionHydrator := NewSessionHydrator(req.ApplicationAddress, req.ServiceId, blockHeight) session, err := k.HydrateSession(ctx, sessionHydrator) diff --git a/x/session/keeper/session_hydrator.go b/x/session/keeper/session_hydrator.go index 2d66b5844..b47b077d2 100644 --- a/x/session/keeper/session_hydrator.go +++ b/x/session/keeper/session_hydrator.go @@ -12,6 +12,7 @@ import ( sdk "github.com/cosmos/cosmos-sdk/types" _ "golang.org/x/crypto/sha3" + "github.com/pokt-network/poktroll/telemetry" "github.com/pokt-network/poktroll/x/session/types" "github.com/pokt-network/poktroll/x/shared" sharedtypes "github.com/pokt-network/poktroll/x/shared/types" @@ -68,22 +69,22 @@ func (k Keeper) HydrateSession(ctx context.Context, sh *sessionHydrator) (*types if err := k.hydrateSessionMetadata(ctx, sh); err != nil { return nil, err } - logger.Info("Finished hydrating session metadata") + logger.Debug("Finished hydrating session metadata") if err := k.hydrateSessionID(ctx, sh); err != nil { return nil, err } - logger.Info(fmt.Sprintf("Finished hydrating session ID: %s", sh.sessionHeader.SessionId)) + logger.Debug(fmt.Sprintf("Finished hydrating session ID: %s", sh.sessionHeader.SessionId)) if err := k.hydrateSessionApplication(ctx, sh); err != nil { return nil, err } - logger.Info(fmt.Sprintf("Finished hydrating session application: %+v", sh.session.Application)) + logger.Debug(fmt.Sprintf("Finished hydrating session application: %+v", sh.session.Application)) if err := k.hydrateSessionSuppliers(ctx, sh); err != nil { return nil, err } - logger.Info("Finished hydrating session suppliers") + logger.Debug("Finished hydrating session suppliers") sh.session.Header = sh.sessionHeader sh.session.SessionId = sh.sessionHeader.SessionId @@ -198,6 +199,8 @@ func (k Keeper) hydrateSessionSuppliers(ctx context.Context, sh *sessionHydrator } } + telemetry.SessionSuppliersGauge(len(candidateSuppliers), NumSupplierPerSession, sh.sessionHeader.ServiceId) + if len(candidateSuppliers) == 0 { logger.Error("[ERROR] no suppliers found for session") return types.ErrSessionSuppliersNotFound.Wrapf( @@ -208,7 +211,7 @@ func (k Keeper) hydrateSessionSuppliers(ctx context.Context, sh *sessionHydrator } if len(candidateSuppliers) < NumSupplierPerSession { - logger.Info(fmt.Sprintf( + logger.Debug(fmt.Sprintf( "Number of available suppliers (%d) is less than the maximum number of possible suppliers per session (%d)", len(candidateSuppliers), NumSupplierPerSession, From 8fa8e0468721bb38d441955828aff8c31d5d0567 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Fri, 27 Sep 2024 15:41:17 -0500 Subject: [PATCH 08/32] increase the stakes to run the load-test --- config.yml | 14 +++++++------- .../config/application1_stake_config.yaml | 2 +- .../poktrolld/config/gateway1_stake_config.yaml | 2 +- .../poktrolld/config/supplier1_stake_config.yaml | 2 +- x/session/keeper/session_hydrator.go | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/config.yml b/config.yml index 4c5215779..0eef70f67 100644 --- a/config.yml +++ b/config.yml @@ -18,7 +18,7 @@ accounts: - name: app1 mnemonic: "mention spy involve verb exercise fiction catalog order agent envelope mystery text defy sing royal fringe return face alpha knife wonder vocal virus drum" coins: - - 100000000upokt + - 10000000000000upokt - name: app2 mnemonic: "material little labor strong search device trick amateur action crouch invite glide provide elite mango now paper sense found hamster neglect work install bulk" coins: @@ -34,7 +34,7 @@ accounts: - name: supplier1 mnemonic: "cool industry busy tumble funny relax error state height like board wing goat emerge visual idle never unveil announce hill primary okay spatial frog" coins: - - 110000000upokt + - 11000000000000upokt - name: supplier2 mnemonic: "peanut hen enroll meat legal have error input bulk later correct denial onion fossil wing excuse elephant object apology switch claim rare decide surface" coins: @@ -46,7 +46,7 @@ accounts: - name: gateway1 mnemonic: "salt iron goat also absorb depend involve agent apology between lift shy door left bulb arrange industry father jelly olive rifle return predict into" coins: - - 100000000upokt + - 10000000000000upokt - name: gateway2 mnemonic: "suffer wet jelly furnace cousin flip layer render finish frequent pledge feature economy wink like water disease final erase goat include apple state furnace" coins: @@ -175,7 +175,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `supplier1_stake_config.yaml` so that the stake command causes a state change. - amount: "1000068" + amount: "100000000068" denom: upokt - address: pokt1ad28jdap2zfanjd7hpkh984yveney6k9a42man delegatee_gateway_addresses: [] @@ -186,7 +186,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `supplier1_stake_config.yaml` so that the stake command causes a state change. - amount: "1000068" + amount: "100000000068" denom: upokt supplier: supplierList: @@ -220,7 +220,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `application1_stake_config.yaml` so that the stake command causes a state change. - amount: "1000068" + amount: "100000000068" denom: upokt gateway: gatewayList: @@ -228,7 +228,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `gateway1_stake_config.yaml` so that the stake command causes a state change. - amount: "1000068" + amount: "100000000068" denom: upokt service: params: diff --git a/localnet/poktrolld/config/application1_stake_config.yaml b/localnet/poktrolld/config/application1_stake_config.yaml index b4e8aa96c..97a874e20 100644 --- a/localnet/poktrolld/config/application1_stake_config.yaml +++ b/localnet/poktrolld/config/application1_stake_config.yaml @@ -1,4 +1,4 @@ -stake_amount: 1000069upokt +stake_amount: 100000000069upokt service_ids: - anvil - svc1 diff --git a/localnet/poktrolld/config/gateway1_stake_config.yaml b/localnet/poktrolld/config/gateway1_stake_config.yaml index dc6a9bad7..83678608d 100644 --- a/localnet/poktrolld/config/gateway1_stake_config.yaml +++ b/localnet/poktrolld/config/gateway1_stake_config.yaml @@ -1 +1 @@ -stake_amount: 1000069upokt +stake_amount: 100000000069upokt diff --git a/localnet/poktrolld/config/supplier1_stake_config.yaml b/localnet/poktrolld/config/supplier1_stake_config.yaml index 228e6ade2..5e9896556 100644 --- a/localnet/poktrolld/config/supplier1_stake_config.yaml +++ b/localnet/poktrolld/config/supplier1_stake_config.yaml @@ -1,6 +1,6 @@ owner_address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4 operator_address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4 -stake_amount: 1000069upokt +stake_amount: 100000000069upokt default_rev_share_percent: - pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4: 80.5 - pokt1eeeksh2tvkh7wzmfrljnhw4wrhs55lcuvmekkw: 19.5 diff --git a/x/session/keeper/session_hydrator.go b/x/session/keeper/session_hydrator.go index b47b077d2..f92b80520 100644 --- a/x/session/keeper/session_hydrator.go +++ b/x/session/keeper/session_hydrator.go @@ -199,7 +199,7 @@ func (k Keeper) hydrateSessionSuppliers(ctx context.Context, sh *sessionHydrator } } - telemetry.SessionSuppliersGauge(len(candidateSuppliers), NumSupplierPerSession, sh.sessionHeader.ServiceId) + defer telemetry.SessionSuppliersGauge(len(candidateSuppliers), NumSupplierPerSession, sh.sessionHeader.ServiceId) if len(candidateSuppliers) == 0 { logger.Error("[ERROR] no suppliers found for session") From 02620623eb7d865394ca66209cd0a2256add00e1 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Mon, 30 Sep 2024 17:30:40 -0700 Subject: [PATCH 09/32] --wip-- [skip ci] --- .../cosmos_sdk_insights.json | 262 ++++++++++++++++++ .../observability-prometheus-stack.yaml | 20 +- 2 files changed, 270 insertions(+), 12 deletions(-) create mode 100644 localnet/grafana-dashboards/cosmos_sdk_insights.json diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json new file mode 100644 index 000000000..eff3440ef --- /dev/null +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -0,0 +1,262 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 11, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(begin_blocker) by (module, quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(begin_blocker_sum) by (module) / sum(begin_blocker_count) by (module)", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "Begin Blocker Time", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(end_blocker) by (module, quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(end_blocker_sum) by (module) / sum(end_blocker_count) by (module)", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "End Blocker Time", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Protocol / Cosmos SDK Insights", + "uid": "adzickiu028lcb", + "version": 1, + "weekStart": "" + } \ No newline at end of file diff --git a/localnet/kubernetes/observability-prometheus-stack.yaml b/localnet/kubernetes/observability-prometheus-stack.yaml index 7313bafdd..99894b497 100644 --- a/localnet/kubernetes/observability-prometheus-stack.yaml +++ b/localnet/kubernetes/observability-prometheus-stack.yaml @@ -20,15 +20,11 @@ grafana: prometheus: prometheusSpec: - # podMonitorNamespaceSelector: - # any: true - # podMonitorSelector: {} - # podMonitorSelectorNilUsesHelmValues: false - # ruleNamespaceSelector: - # any: true - # ruleSelector: {} - # ruleSelectorNilUsesHelmValues: false - # serviceMonitorNamespaceSelector: - # any: true - # serviceMonitorSelector: {} - serviceMonitorSelectorNilUsesHelmValues: false + scrapeConfigSelector: + matchLabels: null + serviceMonitorSelector: + matchLabels: null + ruleSelector: + matchLabels: null + podMonitorSelector: + matchLabels: null From d39fb8f75666f3a1ff28ce2e63d54cf4c993163f Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Wed, 2 Oct 2024 16:52:29 -0700 Subject: [PATCH 10/32] --wip-- [skip ci] --- .../cosmos_sdk_insights.json | 267 +++++++++++++++++- telemetry/event_counters.go | 5 + x/proof/keeper/msg_server_create_claim.go | 20 +- x/proof/keeper/msg_server_submit_proof.go | 2 +- 4 files changed, 279 insertions(+), 15 deletions(-) diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index eff3440ef..667dea570 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -18,7 +18,6 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 11, "links": [], "panels": [ { @@ -81,7 +80,20 @@ }, "unit": "ms" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] }, "gridPos": { "h": 8, @@ -192,7 +204,20 @@ }, "unit": "ms" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] }, "gridPos": { "h": 8, @@ -242,6 +267,240 @@ ], "title": "End Blocker Time", "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_minted_tokens[1m])) by (module)", + "instant": false, + "legendFormat": "minted by {{module}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_burned_tokens[1m])) by (module)", + "hide": false, + "instant": false, + "legendFormat": "burned by {{module}}", + "range": true, + "refId": "B" + } + ], + "title": "Minted / Burned Tokens", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_proof_validation) by (quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_proof_validation_sum) / sum(poktroll_proof_validation_count)", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "Proof Validation Time", + "type": "timeseries" } ], "schemaVersion": 39, @@ -250,7 +509,7 @@ "list": [] }, "time": { - "from": "now-3h", + "from": "now-1h", "to": "now" }, "timepicker": {}, diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index bd02b21ba..9e279e4cf 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -50,11 +50,13 @@ func EventSuccessCounter( // If err is not nil, the counter is not incremented but Prometheus will ingest this event. func ProofRequirementCounter( reason prooftypes.ProofRequirementReason, + serviceId string, err error, ) { incrementAmount := 1 labels := []metrics.Label{ {Name: "reason", Value: reason.String()}, + {Name: "service_id", Value: serviceId}, } // Ensure the counter is not incremented if there was an error. @@ -75,6 +77,7 @@ func ProofRequirementCounter( func ClaimComputeUnitsCounter( claimProofStage prooftypes.ClaimProofStage, numComputeUnits uint64, + serviceId string, err error, ) { incrementAmount := numComputeUnits @@ -101,6 +104,7 @@ func ClaimComputeUnitsCounter( func ClaimRelaysCounter( claimProofStage prooftypes.ClaimProofStage, numRelays uint64, + serviceId string, err error, ) { incrementAmount := numRelays @@ -126,6 +130,7 @@ func ClaimRelaysCounter( func ClaimCounter( claimProofStage prooftypes.ClaimProofStage, numClaims uint64, + serviceId string, err error, ) { incrementAmount := numClaims diff --git a/x/proof/keeper/msg_server_create_claim.go b/x/proof/keeper/msg_server_create_claim.go index dd53f3d78..77627731a 100644 --- a/x/proof/keeper/msg_server_create_claim.go +++ b/x/proof/keeper/msg_server_create_claim.go @@ -26,16 +26,6 @@ func (k msgServer) CreateClaim( numClaimComputeUnits uint64 ) - // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - // Only increment these metrics counters if handling a new claim. - if !isExistingClaim { - telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, err) - } - }() - logger := k.Logger().With("method", "CreateClaim") sdkCtx := cosmostypes.UnwrapSDKContext(ctx) logger.Info("creating claim") @@ -52,6 +42,16 @@ func (k msgServer) CreateClaim( return nil, status.Error(codes.InvalidArgument, err.Error()) } + // Defer telemetry calls so that they reference the final values the relevant variables. + defer func() { + // Only increment these metrics counters if handling a new claim. + if !isExistingClaim { + telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, err) + } + }() + // Construct and insert claim claim = types.Claim{ SupplierOperatorAddress: msg.GetSupplierOperatorAddress(), diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index ed819784a..6cc4f46f7 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -218,7 +218,7 @@ func (k Keeper) ProofRequirementForClaim(ctx context.Context, claim *types.Claim // Defer telemetry calls so that they reference the final values the relevant variables. defer func() { - telemetry.ProofRequirementCounter(requirementReason, err) + telemetry.ProofRequirementCounter(requirementReason, claim.SessionHeader.ServiceId, err) }() // NB: Assumption that claim is non-nil and has a valid root sum because it From 69f3df562b4335be02e7301c230f656f510673cd Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 3 Oct 2024 15:46:55 -0700 Subject: [PATCH 11/32] --wip-- [skip ci] --- Tiltfile | 2 +- .../cosmos_sdk_insights.json | 970 +++++++++--------- telemetry/event_counters.go | 3 + telemetry/tokens.go | 12 + x/proof/keeper/msg_server_create_claim.go | 7 +- x/proof/keeper/msg_server_submit_proof.go | 21 +- x/tokenomics/keeper/settle_pending_claims.go | 53 +- x/tokenomics/keeper/token_logic_modules.go | 3 + x/tokenomics/module/abci.go | 37 - 9 files changed, 584 insertions(+), 524 deletions(-) diff --git a/Tiltfile b/Tiltfile index 9bf118570..60a9e2feb 100644 --- a/Tiltfile +++ b/Tiltfile @@ -6,7 +6,7 @@ load("ext://deployment", "deployment_create") load("ext://execute_in_pod", "execute_in_pod") # A list of directories where changes trigger a hot-reload of the validator -hot_reload_dirs = ["app", "cmd", "tools", "x", "pkg"] +hot_reload_dirs = ["app", "cmd", "tools", "x", "pkg", "telemetry"] def merge_dicts(base, updates): diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 667dea570..3ecf3dbd4 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -1,521 +1,547 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "links": [], - "panels": [ + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "default": true, - "type": "prometheus", - "uid": "prometheus" + "type": "grafana", + "uid": "-- Grafana --" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "ms" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*: avg$" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "properties": [ - { - "id": "custom.lineWidth", - "value": 3 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "ms" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(begin_blocker) by (module, quantile)", - "instant": false, - "legendFormat": "{{module}}: q{{quantile}}", - "range": true, - "refId": "A" - }, + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" }, - "editorMode": "code", - "expr": "sum(begin_blocker_sum) by (module) / sum(begin_blocker_count) by (module)", - "hide": false, - "instant": false, - "legendFormat": "{{module}}: avg", - "range": true, - "refId": "B" + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] } - ], - "title": "Begin Blocker Time", - "type": "timeseries" + ] }, - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(begin_blocker{job=\"$job\"}) by (module, quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(begin_blocker_sum{job=\"$job\"}) by (module) / sum(begin_blocker_count{job=\"$job\"}) by (module)", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "Begin Blocker Time", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "ms" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*: avg$" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "properties": [ - { - "id": "custom.lineWidth", - "value": 3 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "ms" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" }, - "editorMode": "code", - "expr": "sum(end_blocker) by (module, quantile)", - "instant": false, - "legendFormat": "{{module}}: q{{quantile}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(end_blocker_sum) by (module) / sum(end_blocker_count) by (module)", - "hide": false, - "instant": false, - "legendFormat": "{{module}}: avg", - "range": true, - "refId": "B" + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] } - ], - "title": "End Blocker Time", - "type": "timeseries" + ] }, - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(end_blocker{job=\"$job\"}) by (module, quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(end_blocker_sum{job=\"$job\"}) by (module) / sum(end_blocker_count{job=\"$job\"}) by (module)", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "End Blocker Time", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "sum(increase(poktroll_minted_tokens{job=\"$job\"}[1m])) by (module)", + "instant": false, + "legendFormat": "minted by {{module}}", + "range": true, + "refId": "A" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(poktroll_minted_tokens[1m])) by (module)", - "instant": false, - "legendFormat": "minted by {{module}}", - "range": true, - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(poktroll_burned_tokens[1m])) by (module)", - "hide": false, - "instant": false, - "legendFormat": "burned by {{module}}", - "range": true, - "refId": "B" - } - ], - "title": "Minted / Burned Tokens", - "type": "timeseries" + "editorMode": "code", + "expr": "sum(increase(poktroll_burned_tokens{job=\"$job\"}[1m])) by (module)", + "hide": false, + "instant": false, + "legendFormat": "burned by {{module}}", + "range": true, + "refId": "B" + } + ], + "title": "Minted / Burned Tokens", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "ms" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*: avg$" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "properties": [ - { - "id": "custom.lineWidth", - "value": 3 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "ms" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(poktroll_proof_validation) by (quantile)", - "instant": false, - "legendFormat": "{{module}}: q{{quantile}}", - "range": true, - "refId": "A" - }, + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" }, - "editorMode": "code", - "expr": "sum(poktroll_proof_validation_sum) / sum(poktroll_proof_validation_count)", - "hide": false, - "instant": false, - "legendFormat": "{{module}}: avg", - "range": true, - "refId": "B" + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] } - ], - "title": "Proof Validation Time", - "type": "timeseries" + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_proof_validation{job=\"$job\"}) by (quantile)", + "instant": false, + "legendFormat": "{{module}}: q{{quantile}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_proof_validation_sum{job=\"$job\"}) / sum(poktroll_proof_validation_count{job=\"$job\"})", + "hide": false, + "instant": false, + "legendFormat": "{{module}}: avg", + "range": true, + "refId": "B" + } + ], + "title": "Proof Validation Time", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "validator-poktroll-validator", + "value": "validator-poktroll-validator" + }, + "definition": "label_values(cometbft_consensus_height,job)", + "hide": 0, + "includeAll": false, + "label": "Full Node", + "multi": false, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(cometbft_consensus_height,job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } - ], - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Protocol / Cosmos SDK Insights", - "uid": "adzickiu028lcb", - "version": 1, - "weekStart": "" - } \ No newline at end of file + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Protocol / Cosmos SDK Insights", + "uid": "adzickiu028lcb", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 9e279e4cf..3dde498e5 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -83,6 +83,7 @@ func ClaimComputeUnitsCounter( incrementAmount := numComputeUnits labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "service_id", Value: serviceId}, } // Ensure the counter is not incremented if there was an error. @@ -110,6 +111,7 @@ func ClaimRelaysCounter( incrementAmount := numRelays labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "service_id", Value: serviceId}, } // Ensure the counter is not incremented if there was an error. @@ -136,6 +138,7 @@ func ClaimCounter( incrementAmount := numClaims labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "service_id", Value: serviceId}, } // Ensure the counter is not incremented if there was an error. diff --git a/telemetry/tokens.go b/telemetry/tokens.go index 99d136bcb..447933c86 100644 --- a/telemetry/tokens.go +++ b/telemetry/tokens.go @@ -34,3 +34,15 @@ func BurnedTokensFromModule(module string, amount float32) { }, ) } + +// SlashedTokensFromModule is a function to track token slashing from a specific module. +// The metric used is an increment counter, and the label includes the module name for context. +func SlashedTokensFromModule(module string, amount float32) { + cosmostelemetry.IncrCounterWithLabels( + MetricNameKeys("slashed", "tokens"), + amount, + []metrics.Label{ + cosmostelemetry.NewLabel("module", module), + }, + ) +} diff --git a/x/proof/keeper/msg_server_create_claim.go b/x/proof/keeper/msg_server_create_claim.go index 77627731a..fca82eb11 100644 --- a/x/proof/keeper/msg_server_create_claim.go +++ b/x/proof/keeper/msg_server_create_claim.go @@ -44,11 +44,12 @@ func (k msgServer) CreateClaim( // Defer telemetry calls so that they reference the final values the relevant variables. defer func() { + serviceId := session.Header.ServiceId // Only increment these metrics counters if handling a new claim. if !isExistingClaim { - telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, err) + telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, err) } }() diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index 6cc4f46f7..71584d437 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -48,16 +48,6 @@ func (k msgServer) SubmitProof( numComputeUnits uint64 ) - // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - // Only increment these metrics counters if handling a new claim. - if !isExistingProof { - telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numComputeUnits, err) - } - }() - logger := k.Logger().With("method", "SubmitProof") sdkCtx := cosmostypes.UnwrapSDKContext(ctx) logger.Info("About to start submitting proof") @@ -74,6 +64,17 @@ func (k msgServer) SubmitProof( return nil, status.Error(codes.InvalidArgument, err.Error()) } + // Defer telemetry calls so that they reference the final values the relevant variables. + defer func() { + serviceId := session.Header.ServiceId + // Only increment these metrics counters if handling a new claim. + if !isExistingProof { + telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numComputeUnits, serviceId, err) + } + }() + if err = k.deductProofSubmissionFee(ctx, msg.GetSupplierOperatorAddress()); err != nil { logger.Error(fmt.Sprintf("failed to deduct proof submission fee: %v", err)) return nil, status.Error(codes.FailedPrecondition, err.Error()) diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index 321196f1d..08978c6b5 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -8,6 +8,7 @@ import ( "github.com/cosmos/cosmos-sdk/types/query" "github.com/pokt-network/poktroll/app/volatile" + "github.com/pokt-network/poktroll/telemetry" prooftypes "github.com/pokt-network/poktroll/x/proof/types" "github.com/pokt-network/poktroll/x/shared" sharedtypes "github.com/pokt-network/poktroll/x/shared/types" @@ -144,6 +145,29 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( expiredResult.NumClaims++ expiredResult.NumRelays += numClaimRelays expiredResult.NumComputeUnits += numClaimComputeUnits + + // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. + defer func() { + telemetry.ClaimCounter( + prooftypes.ClaimProofStage_EXPIRED, + 1, + claim.SessionHeader.ServiceId, + err, + ) + telemetry.ClaimRelaysCounter( + prooftypes.ClaimProofStage_EXPIRED, + numClaimRelays, + claim.SessionHeader.ServiceId, + err, + ) + telemetry.ClaimComputeUnitsCounter( + prooftypes.ClaimProofStage_EXPIRED, + numClaimComputeUnits, + claim.SessionHeader.ServiceId, + err, + ) + }() + continue } } @@ -196,7 +220,29 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( settledResult.NumComputeUnits += numClaimComputeUnits settledResult.RelaysPerServiceMap[claim.SessionHeader.ServiceId] += numClaimRelays - logger.Info(fmt.Sprintf("Successfully settled claim for session ID %q at block height %d", claim.SessionHeader.SessionId, blockHeight)) + logger.Debug(fmt.Sprintf("Successfully settled claim for session ID %q at block height %d", claim.SessionHeader.SessionId, blockHeight)) + + // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. + defer func() { + telemetry.ClaimCounter( + prooftypes.ClaimProofStage_SETTLED, + 1, + claim.SessionHeader.ServiceId, + err, + ) + telemetry.ClaimRelaysCounter( + prooftypes.ClaimProofStage_SETTLED, + numClaimRelays, + claim.SessionHeader.ServiceId, + err, + ) + telemetry.ClaimComputeUnitsCounter( + prooftypes.ClaimProofStage_SETTLED, + numClaimComputeUnits, + claim.SessionHeader.ServiceId, + err, + ) + }() } // Slash all the suppliers that have been marked for slashing slashingCount times. @@ -311,6 +357,11 @@ func (k Keeper) slashSupplierStake( return err } + // Update telemetry information + if totalSlashingCoin.Amount.IsInt64() { + defer telemetry.SlashedTokensFromModule(suppliertypes.ModuleName, float32(totalSlashingCoin.Amount.Int64())) + } + supplierToSlash.Stake = &remainingStakeCoin logger.Info(fmt.Sprintf( diff --git a/x/tokenomics/keeper/token_logic_modules.go b/x/tokenomics/keeper/token_logic_modules.go index ae8c0e69b..a817880e9 100644 --- a/x/tokenomics/keeper/token_logic_modules.go +++ b/x/tokenomics/keeper/token_logic_modules.go @@ -326,6 +326,7 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( ) } + // Update telemetry information if settlementCoin.Amount.IsInt64() { defer telemetry.MintedTokensFromModule(suppliertypes.ModuleName, float32(settlementCoin.Amount.Int64())) } @@ -350,6 +351,7 @@ func (k Keeper) TokenLogicModuleRelayBurnEqualsMint( return tokenomicstypes.ErrTokenomicsApplicationModuleBurn.Wrapf("burning %s from the application module account: %v", settlementCoin, err) } + // Update telemetry information if settlementCoin.Amount.IsInt64() { defer telemetry.BurnedTokensFromModule(apptypes.ModuleName, float32(settlementCoin.Amount.Int64())) } @@ -397,6 +399,7 @@ func (k Keeper) TokenLogicModuleGlobalMint( "minting (%s) to the tokenomics module account: %v", newMintCoin, err) } + // Update telemetry information if newMintCoin.Amount.IsInt64() { defer telemetry.MintedTokensFromModule(tokenomictypes.ModuleName, float32(newMintCoin.Amount.Int64())) } diff --git a/x/tokenomics/module/abci.go b/x/tokenomics/module/abci.go index db479da2f..815395cfd 100644 --- a/x/tokenomics/module/abci.go +++ b/x/tokenomics/module/abci.go @@ -8,7 +8,6 @@ import ( "github.com/pokt-network/poktroll/pkg/crypto/protocol" "github.com/pokt-network/poktroll/telemetry" - prooftypes "github.com/pokt-network/poktroll/x/proof/types" "github.com/pokt-network/poktroll/x/tokenomics/keeper" "github.com/pokt-network/poktroll/x/tokenomics/types" ) @@ -37,42 +36,6 @@ func EndBlocker(ctx sdk.Context, k keeper.Keeper) (err error) { expiredResult.NumClaims, )) - // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - telemetry.ClaimCounter( - prooftypes.ClaimProofStage_SETTLED, - settledResult.NumClaims, - err, - ) - telemetry.ClaimRelaysCounter( - prooftypes.ClaimProofStage_SETTLED, - settledResult.NumRelays, - err, - ) - telemetry.ClaimComputeUnitsCounter( - prooftypes.ClaimProofStage_SETTLED, - settledResult.NumComputeUnits, - err, - ) - - telemetry.ClaimCounter( - prooftypes.ClaimProofStage_EXPIRED, - expiredResult.NumClaims, - err, - ) - telemetry.ClaimRelaysCounter( - prooftypes.ClaimProofStage_EXPIRED, - expiredResult.NumRelays, - err, - ) - telemetry.ClaimComputeUnitsCounter( - prooftypes.ClaimProofStage_EXPIRED, - expiredResult.NumComputeUnits, - err, - ) - // TODO_IMPROVE(#observability): Add a counter for expired compute units. - }() - // Update the relay mining difficulty for every service that settled pending // claims based on how many estimated relays were serviced for it. difficultyPerServiceMap, err := k.UpdateRelayMiningDifficulty(ctx, settledResult.RelaysPerServiceMap) From fbfe6d1ee8686fdd766c8a021996dc04d28fd7b5 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 3 Oct 2024 16:29:40 -0700 Subject: [PATCH 12/32] --wip-- [skip ci] --- .../cosmos_sdk_insights.json | 112 +++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 3ecf3dbd4..0b018c171 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -373,9 +373,22 @@ "legendFormat": "burned by {{module}}", "range": true, "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_slashed_tokens{job=\"$job\"}[1m])) by (module)", + "hide": false, + "instant": false, + "legendFormat": "slashed by {{module}}", + "range": true, + "refId": "C" } ], - "title": "Minted / Burned Tokens", + "title": "Minted / Burned / Slashed Tokens", "type": "timeseries" }, { @@ -501,6 +514,103 @@ ], "title": "Proof Validation Time", "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_claims{job=\"$job\"}[1m])) by (service_id, proof_stage)", + "instant": false, + "legendFormat": "{{service_id}}-{{proof_stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Claims", + "type": "timeseries" } ], "refresh": "5s", From cfa7dc7ffea180214b39d760d7ef55be3eaa68ad Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 3 Oct 2024 17:26:46 -0700 Subject: [PATCH 13/32] --wip-- [skip ci] --- .../cosmos_sdk_insights.json | 667 ++++++++++++++++-- 1 file changed, 606 insertions(+), 61 deletions(-) diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 0b018c171..03737e25f 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -20,6 +20,17 @@ "graphTooltip": 0, "links": [], "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 8, + "title": "Block processing times", + "type": "row" + }, { "datasource": { "default": true, @@ -99,7 +110,7 @@ "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 1, "options": { @@ -223,7 +234,7 @@ "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 2, "options": { @@ -325,17 +336,31 @@ "value": 80 } ] - } + }, + "unit": "ms" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*: avg$" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 8 + "x": 12, + "y": 9 }, - "id": 3, + "id": 4, "options": { "legend": { "calcs": [], @@ -355,9 +380,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_minted_tokens{job=\"$job\"}[1m])) by (module)", + "expr": "sum(poktroll_proof_validation{job=\"$job\"}) by (quantile)", "instant": false, - "legendFormat": "minted by {{module}}", + "legendFormat": "{{module}}: q{{quantile}}", "range": true, "refId": "A" }, @@ -367,28 +392,125 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_burned_tokens{job=\"$job\"}[1m])) by (module)", + "expr": "sum(poktroll_proof_validation_sum{job=\"$job\"}) / sum(poktroll_proof_validation_count{job=\"$job\"})", "hide": false, "instant": false, - "legendFormat": "burned by {{module}}", + "legendFormat": "{{module}}: avg", "range": true, "refId": "B" + } + ], + "title": "Proof Validation Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 9, + "panels": [], + "title": "Tokenomics", + "type": "row" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_slashed_tokens{job=\"$job\"}[1m])) by (module)", - "hide": false, + "expr": "sum(poktroll_relay_ema{job=\"$job\",service_id=~\"$service_id\"}) by (service_id)", "instant": false, - "legendFormat": "slashed by {{module}}", + "legendFormat": "__auto", "range": true, - "refId": "C" + "refId": "A" } ], - "title": "Minted / Burned / Slashed Tokens", + "title": "EMA by ServiceID", "type": "timeseries" }, { @@ -448,31 +570,17 @@ "value": 80 } ] - }, - "unit": "ms" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*: avg$" - }, - "properties": [ - { - "id": "custom.lineWidth", - "value": 3 - } - ] } - ] + }, + "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 + "h": 6, + "w": 6, + "x": 6, + "y": 18 }, - "id": 4, + "id": 12, "options": { "legend": { "calcs": [], @@ -492,27 +600,111 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(poktroll_proof_validation{job=\"$job\"}) by (quantile)", + "expr": "sum(poktroll_relay_mining_difficulty{job=\"$job\",service_id=~\"$service_id\"}) by (service_id)", "instant": false, - "legendFormat": "{{module}}: q{{quantile}}", + "legendFormat": "__auto", "range": true, "refId": "A" + } + ], + "title": "RelayMining difficulty by ServiceID", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(poktroll_proof_validation_sum{job=\"$job\"}) / sum(poktroll_proof_validation_count{job=\"$job\"})", - "hide": false, + "expr": "sum(increase(poktroll_relays{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", "instant": false, - "legendFormat": "{{module}}: avg", + "legendFormat": "{{service_id}}-{{proof_stage}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "Proof Validation Time", + "title": "On-Chain Relays", "type": "timeseries" }, { @@ -578,11 +770,11 @@ }, "gridPos": { "h": 6, - "w": 7, - "x": 0, - "y": 16 + "w": 6, + "x": 18, + "y": 18 }, - "id": 5, + "id": 6, "options": { "legend": { "calcs": [], @@ -602,28 +794,349 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_claims{job=\"$job\"}[1m])) by (service_id, proof_stage)", + "expr": "sum(increase(poktroll_compute_units{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", "instant": false, "legendFormat": "{{service_id}}-{{proof_stage}}", "range": true, "refId": "A" } ], - "title": "Claims", + "title": "Compute Units", "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_proof_requirements{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, reason)", + "instant": false, + "legendFormat": "{{service_id}}-{{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Proof requirements", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 24 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_minted_tokens{job=\"$job\"}[1m])) by (module)", + "instant": false, + "legendFormat": "minted by {{module}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_burned_tokens{job=\"$job\"}[1m])) by (module)", + "hide": false, + "instant": false, + "legendFormat": "burned by {{module}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_slashed_tokens{job=\"$job\"}[1m])) by (module)", + "hide": false, + "instant": false, + "legendFormat": "slashed by {{module}}", + "range": true, + "refId": "C" + } + ], + "title": "Minted / Burned / Slashed Tokens", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 24 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_claims{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", + "instant": false, + "legendFormat": "{{service_id}}-{{proof_stage}}", + "range": true, + "refId": "A" + } + ], + "title": "Compute Units", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, "text": "validator-poktroll-validator", "value": "validator-poktroll-validator" }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "definition": "label_values(cometbft_consensus_height,job)", "hide": 0, "includeAll": false, @@ -641,6 +1154,38 @@ "skipUrlSync": false, "sort": 0, "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(poktroll_proof_requirements,service_id)", + "hide": 0, + "includeAll": true, + "label": "Service ID", + "multi": true, + "name": "service_id", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(poktroll_proof_requirements,service_id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -650,7 +1195,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Protocol / Cosmos SDK Insights", + "title": "Protocol / Insights and On-Chain data", "uid": "adzickiu028lcb", "version": 1, "weekStart": "" From 050687a15949ced80a064b19b1091221fb669dd3 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Thu, 3 Oct 2024 17:29:41 -0700 Subject: [PATCH 14/32] add histogram --- .../cosmos_sdk_insights.json | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 03737e25f..6081a0c1b 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -279,6 +279,96 @@ "title": "End Blocker Time", "type": "timeseries" }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 13, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "scale": { + "type": "linear" + } + } + }, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-green", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Greens", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(le) (increase(cometbft_consensus_block_interval_seconds_bucket[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CometBFT Consensus Block Interval", + "type": "heatmap" + }, { "datasource": { "default": true, From 478609779a26e2aa2a2a2f4a28e3c5c0053e81c8 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Fri, 4 Oct 2024 16:24:28 -0700 Subject: [PATCH 15/32] add custom metrics config --- app/app.go | 5 ++ cmd/poktrolld/cmd/config.go | 63 ++++++++++++++---- config.yml | 16 +++-- go.mod | 2 +- localnet/poktrolld/config/app.toml | 5 ++ telemetry/block.go | 8 +++ telemetry/common.go | 36 +++++++++++ telemetry/event_counters.go | 68 ++++++++++++++++---- telemetry/telemetry.go | 30 +++++++++ telemetry/tokens.go | 12 ++++ x/proof/keeper/msg_server_create_claim.go | 11 ++-- x/proof/keeper/msg_server_submit_proof.go | 11 ++-- x/tokenomics/keeper/settle_pending_claims.go | 16 +++++ 13 files changed, 241 insertions(+), 42 deletions(-) create mode 100644 telemetry/telemetry.go diff --git a/app/app.go b/app/app.go index 8384a82f4..4e92bc0aa 100644 --- a/app/app.go +++ b/app/app.go @@ -2,6 +2,7 @@ package app import ( // this line is used by starport scaffolding # stargate/app/moduleImport + "io" "os" "path/filepath" @@ -302,6 +303,10 @@ func New( return nil, err } + if err := telemetry.New(appOpts); err != nil { + return nil, err + } + return app, nil } diff --git a/cmd/poktrolld/cmd/config.go b/cmd/poktrolld/cmd/config.go index 003c6a690..673433fb7 100644 --- a/cmd/poktrolld/cmd/config.go +++ b/cmd/poktrolld/cmd/config.go @@ -9,10 +9,26 @@ import ( sdk "github.com/cosmos/cosmos-sdk/types" "github.com/pokt-network/poktroll/app" + "github.com/pokt-network/poktroll/telemetry" ) var once sync.Once +// PoktrollAdditionalConfig represents a poktroll-specific part of `app.toml` file. +// See the `customAppConfigTemplate()` for additional information about each setting. +type PoktrollAdditionalConfig struct { + Telemetry telemetry.PoktrollTelemetryConfig `mapstructure:"telemetry"` +} + +// poktrollAdditionalConfigDefaults sets default values to render in `app.toml`. +func poktrollAdditionalConfigDefaults() PoktrollAdditionalConfig { + return PoktrollAdditionalConfig{ + Telemetry: telemetry.PoktrollTelemetryConfig{ + CardinalityLevel: "medium", + }, + } +} + func InitSDKConfig() { once.Do(func() { checkOrInitSDKConfig() @@ -90,6 +106,7 @@ func initAppConfig() (string, interface{}) { // The following code snippet is just for reference. type CustomAppConfig struct { serverconfig.Config `mapstructure:",squash"` + Poktroll PoktrollAdditionalConfig `mapstructure:"poktroll"` } // Optionally allow the chain developer to overwrite the SDK's default @@ -113,7 +130,7 @@ func initAppConfig() (string, interface{}) { srvCfg.MinGasPrices = "0.000000001upokt" // Also adjust ignite's `config.yml`. srvCfg.Mempool.MaxTxs = 10000 srvCfg.Telemetry.Enabled = true - srvCfg.Telemetry.PrometheusRetentionTime = 60 // in seconds. This turns on Prometheus support. + srvCfg.Telemetry.PrometheusRetentionTime = 60 * 10 // in seconds. This turns on Prometheus support. srvCfg.Telemetry.MetricsSink = "mem" srvCfg.Pruning = "nothing" // archiving node by default srvCfg.API.Enable = true @@ -121,19 +138,37 @@ func initAppConfig() (string, interface{}) { srvCfg.GRPCWeb.Enable = true customAppConfig := CustomAppConfig{ - Config: *srvCfg, + Config: *srvCfg, + Poktroll: poktrollAdditionalConfigDefaults(), } - customAppTemplate := serverconfig.DefaultConfigTemplate - // Edit the default template file - // - // customAppTemplate := serverconfig.DefaultConfigTemplate + ` - // [wasm] - // # This is the maximum sdk gas (wasm and storage) that we allow for any x/wasm "smart" queries - // query_gas_limit = 300000 - // # This is the number of wasm vm instances we keep cached in memory for speed-up - // # Warning: this is currently unstable and may lead to crashes, best to keep for 0 unless testing locally - // lru_size = 0` - - return customAppTemplate, customAppConfig + return customAppConfigTemplate(), customAppConfig +} + +// customAppConfigTemplate extends the default configuration `app.toml` file with our own configs. They are going to be +// used on validators and full-nodes, and they render using default values from `initAppConfig`. +func customAppConfigTemplate() string { + return serverconfig.DefaultConfigTemplate + ` + ############################################################################### + ### Poktroll ### + ############################################################################### + + # Poktroll-specific configuration for Full Nodes and Validators. + [poktroll] + + # Telemetry configuration in addition to the [telemetry] settings. + [poktroll.telemetry] + + # Cardinality level for telemetry metrics collection + # This controls the level of detail (number of unique labels) in metrics. + # Options: + # - "low": Collects basic metrics with low cardinality. + # Suitable for production environments with tight performance constraints. + # - "medium": Collects a moderate number of labels, balancing detail and performance. + # Suitable for moderate workloads or staging environments. + # - "high": WARNING: WILL CAUSE STRESS TO YOUR MONITORING ENVIRONMENT! Collects detailed metrics with high + # cardinality, including labels with many unique values (e.g., application_id, session_id). + # Recommended for debugging or testing environments. + cardinality-level = "{{ .Poktroll.Telemetry.CardinalityLevel }}" + ` } diff --git a/config.yml b/config.yml index 0eef70f67..1c1959e83 100644 --- a/config.yml +++ b/config.yml @@ -18,7 +18,7 @@ accounts: - name: app1 mnemonic: "mention spy involve verb exercise fiction catalog order agent envelope mystery text defy sing royal fringe return face alpha knife wonder vocal virus drum" coins: - - 10000000000000upokt + - 100000000upokt - name: app2 mnemonic: "material little labor strong search device trick amateur action crouch invite glide provide elite mango now paper sense found hamster neglect work install bulk" coins: @@ -46,7 +46,7 @@ accounts: - name: gateway1 mnemonic: "salt iron goat also absorb depend involve agent apology between lift shy door left bulb arrange industry father jelly olive rifle return predict into" coins: - - 10000000000000upokt + - 100000000upokt - name: gateway2 mnemonic: "suffer wet jelly furnace cousin flip layer render finish frequent pledge feature economy wink like water disease final erase goat include apple state furnace" coins: @@ -92,6 +92,10 @@ validators: telemetry: enabled: true prometheus-retention-time: "600" # seconds + poktroll: + telemetry: + # "high" produces a lot of timeseries. Only suitable for small networks such as LocalNet. + cardinality-level: high config: moniker: "validator1" consensus: @@ -175,7 +179,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `supplier1_stake_config.yaml` so that the stake command causes a state change. - amount: "100000000068" + amount: "1000068" denom: upokt - address: pokt1ad28jdap2zfanjd7hpkh984yveney6k9a42man delegatee_gateway_addresses: [] @@ -186,7 +190,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `supplier1_stake_config.yaml` so that the stake command causes a state change. - amount: "100000000068" + amount: "1000068" denom: upokt supplier: supplierList: @@ -220,7 +224,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `application1_stake_config.yaml` so that the stake command causes a state change. - amount: "100000000068" + amount: "1000068" denom: upokt gateway: gatewayList: @@ -228,7 +232,7 @@ genesis: stake: # NB: This value should be exactly 1upokt smaller than the value in # `gateway1_stake_config.yaml` so that the stake command causes a state change. - amount: "100000000068" + amount: "1000068" denom: upokt service: params: diff --git a/go.mod b/go.mod index 91de15f1d..024364e1b 100644 --- a/go.mod +++ b/go.mod @@ -82,6 +82,7 @@ require ( require ( cosmossdk.io/x/tx v0.13.4 github.com/jhump/protoreflect v1.16.0 + github.com/mitchellh/mapstructure v1.5.0 ) require ( @@ -224,7 +225,6 @@ require ( github.com/minio/highwayhash v1.0.2 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-testing-interface v1.14.1 // indirect - github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/term v0.5.0 // indirect github.com/morikuni/aec v1.0.0 // indirect diff --git a/localnet/poktrolld/config/app.toml b/localnet/poktrolld/config/app.toml index c80f545ae..34b6ef929 100644 --- a/localnet/poktrolld/config/app.toml +++ b/localnet/poktrolld/config/app.toml @@ -34,6 +34,11 @@ query-gas-limit = "0" [mempool] max-txs = 10000 +[poktroll] + + [poktroll.telemetry] + cardinality-level = "high" + [rpc] cors_allowed_origins = ["*"] diff --git a/telemetry/block.go b/telemetry/block.go index b012a7d4a..59074f783 100644 --- a/telemetry/block.go +++ b/telemetry/block.go @@ -77,6 +77,10 @@ func (mal metricsABCIListener) ListenFinalizeBlock( req abci.RequestFinalizeBlock, res abci.ResponseFinalizeBlock, ) error { + if !isTelemetyEnabled() { + return nil + } + telemetry.SetGauge( float32(res.Size()), blockResultSizeBytesMetric, @@ -94,5 +98,9 @@ func (mal metricsABCIListener) ListenCommit( res abci.ResponseCommit, changeSet []*storetypes.StoreKVPair, ) error { + if !isTelemetyEnabled() { + return nil + } + return nil } diff --git a/telemetry/common.go b/telemetry/common.go index 3d0b4d085..c3df432fd 100644 --- a/telemetry/common.go +++ b/telemetry/common.go @@ -1,5 +1,10 @@ package telemetry +import ( + cosmostelemetry "github.com/cosmos/cosmos-sdk/telemetry" + "github.com/hashicorp/go-metrics" +) + // MetricNameKeys constructs the full metric name by prefixing with a defined // prefix and appending any additional metrics provided as variadic arguments. func MetricNameKeys(metrics ...string) []string { @@ -8,3 +13,34 @@ func MetricNameKeys(metrics ...string) []string { result = append(result, metrics...) return result } + +// isTelemetyEnabled returns whether is telemetry turned on in the config file. +func isTelemetyEnabled() bool { + return cosmostelemetry.IsTelemetryEnabled() +} + +// addMediumCardinalityLabel only creates the label if cardinality if set to "medium". +// Good example of a medium cardinality label is `service_id` — we do not control the number of services +// on the network, and as permissionless services grow the metrics can get easily out of hand. We're keeping +// an option to turn off such labels. +// Such labels are included when the cardinality is set to "high". +func addMediumCardinalityLabel(labels []metrics.Label, name string, value string) []metrics.Label { + if globalTelemetryConfig.CardinalityLevel == "medium" || globalTelemetryConfig.CardinalityLevel == "high" { + return append(labels, metrics.Label{Name: name, Value: value}) + } + + return labels +} + +// addHighCardinalityLabel only creates the label if cardinality if set to "high". +// Good examples of high cardinality labels are `application_address` or `supplier_address`. +// This setting, on a large network, will slow down both the full node and the metric scraping system. +// We want to have such labels exposed for local development, debugging and performance troubleshooring. +// More background on why this is important: https://www.robustperception.io/cardinality-is-key/ +func addHighCardinalityLabel(labels []metrics.Label, name string, value string) []metrics.Label { + if globalTelemetryConfig.CardinalityLevel == "high" { + return append(labels, metrics.Label{Name: name, Value: value}) + } + + return labels +} diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 3dde498e5..5e35f1787 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -25,6 +25,10 @@ func EventSuccessCounter( getValue func() float32, isSuccessful func() bool, ) { + if !isTelemetyEnabled() { + return + } + value := getValue() var metricName []string @@ -53,15 +57,19 @@ func ProofRequirementCounter( serviceId string, err error, ) { + if !isTelemetyEnabled() { + return + } + incrementAmount := 1 labels := []metrics.Label{ {Name: "reason", Value: reason.String()}, - {Name: "service_id", Value: serviceId}, } + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) // Ensure the counter is not incremented if there was an error. if err != nil { - incrementAmount = 0 + return } telemetry.IncrCounterWithLabels( @@ -78,13 +86,21 @@ func ClaimComputeUnitsCounter( claimProofStage prooftypes.ClaimProofStage, numComputeUnits uint64, serviceId string, + applicationAddress string, + supplierOperatorAddress string, err error, ) { + if !isTelemetyEnabled() { + return + } + incrementAmount := numComputeUnits labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, - {Name: "service_id", Value: serviceId}, } + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) + labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -106,13 +122,21 @@ func ClaimRelaysCounter( claimProofStage prooftypes.ClaimProofStage, numRelays uint64, serviceId string, + applicationAddress string, + supplierOperatorAddress string, err error, ) { + if !isTelemetyEnabled() { + return + } + incrementAmount := numRelays labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, - {Name: "service_id", Value: serviceId}, } + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) + labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -133,14 +157,23 @@ func ClaimCounter( claimProofStage prooftypes.ClaimProofStage, numClaims uint64, serviceId string, + applicationAddress string, + supplierOperatorAddress string, err error, ) { + if !isTelemetyEnabled() { + return + } + incrementAmount := numClaims labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, - {Name: "service_id", Value: serviceId}, } + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) + labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) + // Ensure the counter is not incremented if there was an error. if err != nil { incrementAmount = 0 @@ -157,10 +190,13 @@ func ClaimCounter( // of the relay mining difficulty. The serviceId is used as a label to be able to // track the difficulty for each service. func RelayMiningDifficultyGauge(difficulty float32, serviceId string) { - labels := []metrics.Label{ - {Name: "service_id", Value: serviceId}, + if !isTelemetyEnabled() { + return } + labels := []metrics.Label{} + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + telemetry.SetGaugeWithLabels( MetricNameKeys("relay_mining", "difficulty"), difficulty, @@ -171,10 +207,13 @@ func RelayMiningDifficultyGauge(difficulty float32, serviceId string) { // RelayEMAGauge sets a gauge which tracks the relay EMA for a service. // The serviceId is used as a label to be able to track the EMA for each service. func RelayEMAGauge(relayEMA uint64, serviceId string) { - labels := []metrics.Label{ - {Name: "service_id", Value: serviceId}, + if !isTelemetyEnabled() { + return } + labels := []metrics.Label{} + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + telemetry.SetGaugeWithLabels( MetricNameKeys("relay", "ema"), float32(relayEMA), @@ -186,12 +225,15 @@ func RelayEMAGauge(relayEMA uint64, serviceId string) { // for session suppliers at the given maxPerSession value. // The serviceId is used as a label to be able to track this information for each service. func SessionSuppliersGauge(candidates int, maxPerSession int, serviceId string) { - maxPerSessionStr := strconv.Itoa(maxPerSession) - labels := []metrics.Label{ - {Name: "service_id", Value: serviceId}, - {Name: "max_per_session", Value: maxPerSessionStr}, + if !isTelemetyEnabled() { + return } + maxPerSessionStr := strconv.Itoa(maxPerSession) + labels := []metrics.Label{} + labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = addMediumCardinalityLabel(labels, "max_per_session", maxPerSessionStr) + telemetry.SetGaugeWithLabels( MetricNameKeys("session", "suppliers"), float32(candidates), diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go new file mode 100644 index 000000000..94b22660b --- /dev/null +++ b/telemetry/telemetry.go @@ -0,0 +1,30 @@ +package telemetry + +import ( + "fmt" + + servertypes "github.com/cosmos/cosmos-sdk/server/types" + "github.com/mitchellh/mapstructure" +) + +// globalTelemetryConfig is a private variable that stores cardinality level. +// It is set on initialization and does not change for the lifetime of the program. +var globalTelemetryConfig PoktrollTelemetryConfig + +// PoktrollTelemetryConfig represents the relemetry protion of the custom poktroll config. +type PoktrollTelemetryConfig struct { + CardinalityLevel string `mapstructure:"cardinality-level"` +} + +// New sets the globalTelemetryConfig for telemetry package. +func New(appOpts servertypes.AppOptions) error { + // Extract the map from appOpts. `poktroll.telemetry` comes from `app.toml` which is parsed into a map. + telemetryMap := appOpts.Get("poktroll.telemetry").(map[string]interface{}) + + // Use mapstructure to decode the map into the struct + if err := mapstructure.Decode(telemetryMap, &globalTelemetryConfig); err != nil { + return fmt.Errorf("error decoding telemetry config: %v", err) + } + + return nil +} diff --git a/telemetry/tokens.go b/telemetry/tokens.go index 447933c86..a84b65510 100644 --- a/telemetry/tokens.go +++ b/telemetry/tokens.go @@ -8,6 +8,10 @@ import ( // MintedTokensFromModule is a function to track token minting from a specific module. // The metric used is an increment counter, and the label includes the module name for context. func MintedTokensFromModule(module string, amount float32) { + if !isTelemetyEnabled() { + return + } + // CosmosSDK has a metric called `minted_tokens` (as a part of `mint` module), however it is wrongfully marked a `gauge`. // It should be an `increment` because it always goes up. `gauge` tracks data that can go up and down. // More info: https://prometheus.io/docs/concepts/metric_types/ @@ -26,6 +30,10 @@ func MintedTokensFromModule(module string, amount float32) { // BurnedTokensFromModule is a function to track token burning from a specific module. // The metric used is an increment counter, and the label includes the module name for context. func BurnedTokensFromModule(module string, amount float32) { + if !isTelemetyEnabled() { + return + } + cosmostelemetry.IncrCounterWithLabels( MetricNameKeys("burned", "tokens"), amount, @@ -38,6 +46,10 @@ func BurnedTokensFromModule(module string, amount float32) { // SlashedTokensFromModule is a function to track token slashing from a specific module. // The metric used is an increment counter, and the label includes the module name for context. func SlashedTokensFromModule(module string, amount float32) { + if !isTelemetyEnabled() { + return + } + cosmostelemetry.IncrCounterWithLabels( MetricNameKeys("slashed", "tokens"), amount, diff --git a/x/proof/keeper/msg_server_create_claim.go b/x/proof/keeper/msg_server_create_claim.go index fca82eb11..ca7c5b9c8 100644 --- a/x/proof/keeper/msg_server_create_claim.go +++ b/x/proof/keeper/msg_server_create_claim.go @@ -44,12 +44,15 @@ func (k msgServer) CreateClaim( // Defer telemetry calls so that they reference the final values the relevant variables. defer func() { - serviceId := session.Header.ServiceId // Only increment these metrics counters if handling a new claim. if !isExistingClaim { - telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, err) + serviceId := session.Header.ServiceId + applicationAddress := session.Header.ApplicationAddress + supplierOperatorAddress := msg.GetSupplierOperatorAddress() + + telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } }() diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index 71584d437..848e83141 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -66,12 +66,15 @@ func (k msgServer) SubmitProof( // Defer telemetry calls so that they reference the final values the relevant variables. defer func() { - serviceId := session.Header.ServiceId // Only increment these metrics counters if handling a new claim. if !isExistingProof { - telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numComputeUnits, serviceId, err) + serviceId := session.Header.ServiceId + applicationAddress := session.Header.ApplicationAddress + supplierOperatorAddress := msg.GetSupplierOperatorAddress() + + telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } }() diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index 08978c6b5..7e6e09885 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -148,22 +148,30 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. defer func() { + applicationAddress := claim.SessionHeader.ApplicationAddress + telemetry.ClaimCounter( prooftypes.ClaimProofStage_EXPIRED, 1, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) telemetry.ClaimRelaysCounter( prooftypes.ClaimProofStage_EXPIRED, numClaimRelays, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) telemetry.ClaimComputeUnitsCounter( prooftypes.ClaimProofStage_EXPIRED, numClaimComputeUnits, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) }() @@ -224,22 +232,30 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. defer func() { + applicationAddress := claim.SessionHeader.ApplicationAddress + telemetry.ClaimCounter( prooftypes.ClaimProofStage_SETTLED, 1, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) telemetry.ClaimRelaysCounter( prooftypes.ClaimProofStage_SETTLED, numClaimRelays, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) telemetry.ClaimComputeUnitsCounter( prooftypes.ClaimProofStage_SETTLED, numClaimComputeUnits, claim.SessionHeader.ServiceId, + applicationAddress, + claim.SupplierOperatorAddress, err, ) }() From 83f1b7dd2cf9b2bc95d998ba2102fecfd3089e97 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Fri, 4 Oct 2024 16:43:28 -0700 Subject: [PATCH 16/32] break proofs by app and supplier --- app/app.go | 1 - cmd/poktrolld/cmd/config.go | 2 +- config.yml | 2 +- .../config/application1_stake_config.yaml | 2 +- .../config/gateway1_stake_config.yaml | 2 +- .../config/supplier1_stake_config.yaml | 2 +- pkg/relayer/session/sessiontree.go | 2 +- telemetry/event_counters.go | 19 +++++++++++++------ x/proof/keeper/msg_server_submit_proof.go | 8 +++++++- 9 files changed, 26 insertions(+), 14 deletions(-) diff --git a/app/app.go b/app/app.go index 4e92bc0aa..e65ed502f 100644 --- a/app/app.go +++ b/app/app.go @@ -2,7 +2,6 @@ package app import ( // this line is used by starport scaffolding # stargate/app/moduleImport - "io" "os" "path/filepath" diff --git a/cmd/poktrolld/cmd/config.go b/cmd/poktrolld/cmd/config.go index 673433fb7..ac46dc0c9 100644 --- a/cmd/poktrolld/cmd/config.go +++ b/cmd/poktrolld/cmd/config.go @@ -146,7 +146,7 @@ func initAppConfig() (string, interface{}) { } // customAppConfigTemplate extends the default configuration `app.toml` file with our own configs. They are going to be -// used on validators and full-nodes, and they render using default values from `initAppConfig`. +// used on validators and full-nodes, and they render using default values from `poktrollAdditionalConfigDefaults()`. func customAppConfigTemplate() string { return serverconfig.DefaultConfigTemplate + ` ############################################################################### diff --git a/config.yml b/config.yml index 1c1959e83..e7c011064 100644 --- a/config.yml +++ b/config.yml @@ -34,7 +34,7 @@ accounts: - name: supplier1 mnemonic: "cool industry busy tumble funny relax error state height like board wing goat emerge visual idle never unveil announce hill primary okay spatial frog" coins: - - 11000000000000upokt + - 110000000upokt - name: supplier2 mnemonic: "peanut hen enroll meat legal have error input bulk later correct denial onion fossil wing excuse elephant object apology switch claim rare decide surface" coins: diff --git a/localnet/poktrolld/config/application1_stake_config.yaml b/localnet/poktrolld/config/application1_stake_config.yaml index 97a874e20..b4e8aa96c 100644 --- a/localnet/poktrolld/config/application1_stake_config.yaml +++ b/localnet/poktrolld/config/application1_stake_config.yaml @@ -1,4 +1,4 @@ -stake_amount: 100000000069upokt +stake_amount: 1000069upokt service_ids: - anvil - svc1 diff --git a/localnet/poktrolld/config/gateway1_stake_config.yaml b/localnet/poktrolld/config/gateway1_stake_config.yaml index 83678608d..dc6a9bad7 100644 --- a/localnet/poktrolld/config/gateway1_stake_config.yaml +++ b/localnet/poktrolld/config/gateway1_stake_config.yaml @@ -1 +1 @@ -stake_amount: 100000000069upokt +stake_amount: 1000069upokt diff --git a/localnet/poktrolld/config/supplier1_stake_config.yaml b/localnet/poktrolld/config/supplier1_stake_config.yaml index 5e9896556..228e6ade2 100644 --- a/localnet/poktrolld/config/supplier1_stake_config.yaml +++ b/localnet/poktrolld/config/supplier1_stake_config.yaml @@ -1,6 +1,6 @@ owner_address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4 operator_address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4 -stake_amount: 100000000069upokt +stake_amount: 1000069upokt default_rev_share_percent: - pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4: 80.5 - pokt1eeeksh2tvkh7wzmfrljnhw4wrhs55lcuvmekkw: 19.5 diff --git a/pkg/relayer/session/sessiontree.go b/pkg/relayer/session/sessiontree.go index 2c6abe07f..9c8eac2b2 100644 --- a/pkg/relayer/session/sessiontree.go +++ b/pkg/relayer/session/sessiontree.go @@ -279,7 +279,7 @@ func (st *sessionTree) Delete() error { return err } } else { - fmt.Println("Unable to delete as tree store is nil") + fmt.Println("Unable to delete as tree store is nil. This shouldn't happen.") } // Delete the KVStore from disk diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 5e35f1787..ed5bbe8ce 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -17,6 +17,9 @@ import ( const ( // Prefix all metric names with "poktroll" for easier search metricNamePrefix = "poktroll" + + ApplicationAddressLabelName = "app_addr" + SupplierOperatorAddressLabelName = "supop_addr" ) // EventSuccessCounter increments a counter with the given data type and success status. @@ -55,6 +58,8 @@ func EventSuccessCounter( func ProofRequirementCounter( reason prooftypes.ProofRequirementReason, serviceId string, + applicationAddress string, + supplierOperatorAddress string, err error, ) { if !isTelemetyEnabled() { @@ -66,6 +71,8 @@ func ProofRequirementCounter( {Name: "reason", Value: reason.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -99,8 +106,8 @@ func ClaimComputeUnitsCounter( {Name: "proof_stage", Value: claimProofStage.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) - labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -135,8 +142,8 @@ func ClaimRelaysCounter( {Name: "proof_stage", Value: claimProofStage.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) - labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -171,8 +178,8 @@ func ClaimCounter( } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, "application_address", applicationAddress) - labels = addHighCardinalityLabel(labels, "supplier_operator_address", supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index 848e83141..208f3494d 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -222,7 +222,13 @@ func (k Keeper) ProofRequirementForClaim(ctx context.Context, claim *types.Claim // Defer telemetry calls so that they reference the final values the relevant variables. defer func() { - telemetry.ProofRequirementCounter(requirementReason, claim.SessionHeader.ServiceId, err) + telemetry.ProofRequirementCounter( + requirementReason, + claim.SessionHeader.ServiceId, + claim.SessionHeader.ApplicationAddress, + claim.SupplierOperatorAddress, + err, + ) }() // NB: Assumption that claim is non-nil and has a valid root sum because it From 10eb2bea237fb32fbf6a22569f0f76f76fa18994 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Fri, 4 Oct 2024 18:25:13 -0700 Subject: [PATCH 17/32] --wip-- [skip ci] --- pkg/relayer/proxy/metrics.go | 2 +- telemetry/event_counters.go | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pkg/relayer/proxy/metrics.go b/pkg/relayer/proxy/metrics.go index d42f09c4b..76c005583 100644 --- a/pkg/relayer/proxy/metrics.go +++ b/pkg/relayer/proxy/metrics.go @@ -45,7 +45,7 @@ var ( }, []string{"service_id"}) // relaysSuccessTotal is a Counter metric for successful requests in the relay miner. - // It increments with each successful request, labeled by ''service_id'. + // It increments with each successful request, labeled by 'service_id'. relaysSuccessTotal = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Subsystem: relayMinerProcess, Name: requestsSuccessTotal, diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index ed5bbe8ce..ba42e3cb7 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -18,8 +18,9 @@ const ( // Prefix all metric names with "poktroll" for easier search metricNamePrefix = "poktroll" - ApplicationAddressLabelName = "app_addr" - SupplierOperatorAddressLabelName = "supop_addr" + // Label Names + applicationAddressLabelName = "app_addr" + supplierOperatorAddressLabelName = "supop_addr" ) // EventSuccessCounter increments a counter with the given data type and success status. @@ -71,8 +72,8 @@ func ProofRequirementCounter( {Name: "reason", Value: reason.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -106,8 +107,8 @@ func ClaimComputeUnitsCounter( {Name: "proof_stage", Value: claimProofStage.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -142,8 +143,8 @@ func ClaimRelaysCounter( {Name: "proof_stage", Value: claimProofStage.String()}, } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -178,8 +179,8 @@ func ClaimCounter( } labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, ApplicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, SupplierOperatorAddressLabelName, supplierOperatorAddress) + labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) + labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) // Ensure the counter is not incremented if there was an error. if err != nil { From cdf62fc09be070c1b28a0e6decc779b25f57f166 Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Mon, 7 Oct 2024 15:56:58 -0700 Subject: [PATCH 18/32] self-review --- pkg/relayer/session/sessiontree.go | 9 ++---- telemetry/common.go | 19 +++++++----- telemetry/event_counters.go | 47 ++++++++++++++++++++---------- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/pkg/relayer/session/sessiontree.go b/pkg/relayer/session/sessiontree.go index 9c8eac2b2..b55340224 100644 --- a/pkg/relayer/session/sessiontree.go +++ b/pkg/relayer/session/sessiontree.go @@ -3,7 +3,6 @@ package session import ( "bytes" "crypto/sha256" - "fmt" "os" "path/filepath" "sync" @@ -274,12 +273,8 @@ func (st *sessionTree) Delete() error { // This was intentionally removed to lower the IO load. // When the database is closed, it is deleted it from disk right away. - if st.treeStore != nil { - if err := st.treeStore.Stop(); err != nil { - return err - } - } else { - fmt.Println("Unable to delete as tree store is nil. This shouldn't happen.") + if err := st.treeStore.Stop(); err != nil { + return err } // Delete the KVStore from disk diff --git a/telemetry/common.go b/telemetry/common.go index c3df432fd..de14386b9 100644 --- a/telemetry/common.go +++ b/telemetry/common.go @@ -19,28 +19,31 @@ func isTelemetyEnabled() bool { return cosmostelemetry.IsTelemetryEnabled() } -// addMediumCardinalityLabel only creates the label if cardinality if set to "medium". +// appendMediumCardinalityLabels only creates the label if cardinality if set to "medium". // Good example of a medium cardinality label is `service_id` — we do not control the number of services // on the network, and as permissionless services grow the metrics can get easily out of hand. We're keeping // an option to turn off such labels. // Such labels are included when the cardinality is set to "high". -func addMediumCardinalityLabel(labels []metrics.Label, name string, value string) []metrics.Label { +func appendMediumCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "medium" || globalTelemetryConfig.CardinalityLevel == "high" { - return append(labels, metrics.Label{Name: name, Value: value}) + return append(labels, labelPairs...) } - return labels } -// addHighCardinalityLabel only creates the label if cardinality if set to "high". +// appendHighCardinalityLabels only creates the label if cardinality if set to "high". // Good examples of high cardinality labels are `application_address` or `supplier_address`. // This setting, on a large network, will slow down both the full node and the metric scraping system. // We want to have such labels exposed for local development, debugging and performance troubleshooring. // More background on why this is important: https://www.robustperception.io/cardinality-is-key/ -func addHighCardinalityLabel(labels []metrics.Label, name string, value string) []metrics.Label { +func appendHighCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "high" { - return append(labels, metrics.Label{Name: name, Value: value}) + return append(labels, labelPairs...) } - return labels } + +// toMetricLabel takes simple key and value of the label to return metrics.Label. +func toMetricLabel(key, value string) metrics.Label { + return metrics.Label{Name: key, Value: value} +} diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index ba42e3cb7..3b5d124b5 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -71,9 +71,12 @@ func ProofRequirementCounter( labels := []metrics.Label{ {Name: "reason", Value: reason.String()}, } - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) + labels = appendHighCardinalityLabels( + labels, + toMetricLabel(applicationAddressLabelName, applicationAddress), + toMetricLabel(supplierOperatorAddressLabelName, supplierOperatorAddress), + ) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -106,9 +109,12 @@ func ClaimComputeUnitsCounter( labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, } - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) + labels = appendHighCardinalityLabels( + labels, + toMetricLabel(applicationAddressLabelName, applicationAddress), + toMetricLabel(supplierOperatorAddressLabelName, supplierOperatorAddress), + ) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -142,9 +148,12 @@ func ClaimRelaysCounter( labels := []metrics.Label{ {Name: "proof_stage", Value: claimProofStage.String()}, } - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) + labels = appendHighCardinalityLabels( + labels, + toMetricLabel(applicationAddressLabelName, applicationAddress), + toMetricLabel(supplierOperatorAddressLabelName, supplierOperatorAddress), + ) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -178,9 +187,12 @@ func ClaimCounter( {Name: "proof_stage", Value: claimProofStage.String()}, } - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addHighCardinalityLabel(labels, applicationAddressLabelName, applicationAddress) - labels = addHighCardinalityLabel(labels, supplierOperatorAddressLabelName, supplierOperatorAddress) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) + labels = appendHighCardinalityLabels( + labels, + toMetricLabel(applicationAddressLabelName, applicationAddress), + toMetricLabel(supplierOperatorAddressLabelName, supplierOperatorAddress), + ) // Ensure the counter is not incremented if there was an error. if err != nil { @@ -203,7 +215,7 @@ func RelayMiningDifficultyGauge(difficulty float32, serviceId string) { } labels := []metrics.Label{} - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) telemetry.SetGaugeWithLabels( MetricNameKeys("relay_mining", "difficulty"), @@ -220,7 +232,7 @@ func RelayEMAGauge(relayEMA uint64, serviceId string) { } labels := []metrics.Label{} - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) + labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) telemetry.SetGaugeWithLabels( MetricNameKeys("relay", "ema"), @@ -239,8 +251,11 @@ func SessionSuppliersGauge(candidates int, maxPerSession int, serviceId string) maxPerSessionStr := strconv.Itoa(maxPerSession) labels := []metrics.Label{} - labels = addMediumCardinalityLabel(labels, "service_id", serviceId) - labels = addMediumCardinalityLabel(labels, "max_per_session", maxPerSessionStr) + labels = appendMediumCardinalityLabels( + labels, + toMetricLabel("service_id", serviceId), + toMetricLabel("max_per_session", maxPerSessionStr), + ) telemetry.SetGaugeWithLabels( MetricNameKeys("session", "suppliers"), From 6628c07edb2075edf4fae24a150b76e3af6f96fe Mon Sep 17 00:00:00 2001 From: "Dmitry K." Date: Mon, 7 Oct 2024 16:03:02 -0700 Subject: [PATCH 19/32] self-review --- app/app.go | 1 + 1 file changed, 1 insertion(+) diff --git a/app/app.go b/app/app.go index e65ed502f..b98b86b42 100644 --- a/app/app.go +++ b/app/app.go @@ -302,6 +302,7 @@ func New( return nil, err } + // Set up poktroll telemetry using `app.toml` configuration options (in addition to cosmos-sdk telemetry config). if err := telemetry.New(appOpts); err != nil { return nil, err } From 72b3086883a0102786bc9200a4fa30a4bd4d4616 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Mon, 21 Oct 2024 17:17:55 -0700 Subject: [PATCH 20/32] fix after merge --- api/poktroll/application/event.pulsar.go | 2 +- go.mod | 1 - go.sum | 2 -- x/proof/keeper/msg_server_submit_proof.go | 12 +----------- x/tokenomics/keeper/token_logic_modules.go | 2 +- 5 files changed, 3 insertions(+), 16 deletions(-) diff --git a/api/poktroll/application/event.pulsar.go b/api/poktroll/application/event.pulsar.go index 043aa2264..290bbde55 100644 --- a/api/poktroll/application/event.pulsar.go +++ b/api/poktroll/application/event.pulsar.go @@ -3,11 +3,11 @@ package application import ( _ "cosmossdk.io/api/cosmos/base/v1beta1" + _ "github.com/pokt-network/poktroll/api/poktroll/shared" fmt "fmt" _ "github.com/cosmos/cosmos-proto" runtime "github.com/cosmos/cosmos-proto/runtime" _ "github.com/cosmos/gogoproto/gogoproto" - _ "github.com/pokt-network/poktroll/api/poktroll/shared" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoiface "google.golang.org/protobuf/runtime/protoiface" protoimpl "google.golang.org/protobuf/runtime/protoimpl" diff --git a/go.mod b/go.mod index 5e7aecdec..024364e1b 100644 --- a/go.mod +++ b/go.mod @@ -83,7 +83,6 @@ require ( cosmossdk.io/x/tx v0.13.4 github.com/jhump/protoreflect v1.16.0 github.com/mitchellh/mapstructure v1.5.0 - go.uber.org/mock v0.4.0 ) require ( diff --git a/go.sum b/go.sum index b35917516..62dc61bb0 100644 --- a/go.sum +++ b/go.sum @@ -1213,8 +1213,6 @@ go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0 go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= -go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index 96a82e9d9..e6a050cb6 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -47,16 +47,6 @@ func (k msgServer) SubmitProof( numClaimComputeUnits uint64 ) - // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - // Only increment these metrics counters if handling a new claim. - if !isExistingProof { - telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numClaimComputeUnits, err) - } - }() - logger := k.Logger().With("method", "SubmitProof") sdkCtx := cosmostypes.UnwrapSDKContext(ctx) logger.Info("About to start submitting proof") @@ -83,7 +73,7 @@ func (k msgServer) SubmitProof( telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, applicationAddress, supplierOperatorAddress, err) telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } }() diff --git a/x/tokenomics/keeper/token_logic_modules.go b/x/tokenomics/keeper/token_logic_modules.go index d83ed34b9..f78c9d82a 100644 --- a/x/tokenomics/keeper/token_logic_modules.go +++ b/x/tokenomics/keeper/token_logic_modules.go @@ -437,7 +437,7 @@ func (k Keeper) TokenLogicModuleGlobalMint( // Update telemetry information if newMintCoin.Amount.IsInt64() { - defer telemetry.MintedTokensFromModule(tokenomictypes.ModuleName, float32(newMintCoin.Amount.Int64())) + defer telemetry.MintedTokensFromModule(tokenomicstypes.ModuleName, float32(newMintCoin.Amount.Int64())) } logger.Info(fmt.Sprintf("minted (%s) to the tokenomics module account", newMintCoin)) From aa61aced6433dbdf5f5cd17cc18f353cb398cd7c Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Mon, 21 Oct 2024 17:55:29 -0700 Subject: [PATCH 21/32] self-review pass --- Tiltfile | 2 +- telemetry/common.go | 8 +++++--- telemetry/telemetry.go | 4 ++-- telemetry/tokens.go | 10 ++++------ x/application/module/abci.go | 1 + x/proof/keeper/proof_validation.go | 1 + x/supplier/module/abci.go | 1 + x/tokenomics/module/abci.go | 1 + 8 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Tiltfile b/Tiltfile index 60a9e2feb..6ad068719 100644 --- a/Tiltfile +++ b/Tiltfile @@ -103,7 +103,7 @@ if localnet_config["observability"]["enabled"]: # Increase timeout for building the imagedefault is 30, which can be too low for slow internet connections to pull # container images. - update_settings(k8s_upsert_timeout_secs=120) # + update_settings(k8s_upsert_timeout_secs=120) helm_resource( "observability", diff --git a/telemetry/common.go b/telemetry/common.go index de14386b9..b77010e25 100644 --- a/telemetry/common.go +++ b/telemetry/common.go @@ -14,7 +14,7 @@ func MetricNameKeys(metrics ...string) []string { return result } -// isTelemetyEnabled returns whether is telemetry turned on in the config file. +// isTelemetyEnabled returns whether is telemetry turned on in the config file `app.toml` - cosmos-sdk's telemetry section. func isTelemetyEnabled() bool { return cosmostelemetry.IsTelemetryEnabled() } @@ -23,7 +23,8 @@ func isTelemetyEnabled() bool { // Good example of a medium cardinality label is `service_id` — we do not control the number of services // on the network, and as permissionless services grow the metrics can get easily out of hand. We're keeping // an option to turn off such labels. -// Such labels are included when the cardinality is set to "high". +// Medium cardinality labels are included when the cardinality is set to "high". +// Configuration option is exposed in app.toml, our own `poktroll.telemetry` section. func appendMediumCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "medium" || globalTelemetryConfig.CardinalityLevel == "high" { return append(labels, labelPairs...) @@ -36,6 +37,7 @@ func appendMediumCardinalityLabels(labels []metrics.Label, labelPairs ...metrics // This setting, on a large network, will slow down both the full node and the metric scraping system. // We want to have such labels exposed for local development, debugging and performance troubleshooring. // More background on why this is important: https://www.robustperception.io/cardinality-is-key/ +// Configuration option is exposed in app.toml, our own `poktroll.telemetry` section. func appendHighCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "high" { return append(labels, labelPairs...) @@ -45,5 +47,5 @@ func appendHighCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.L // toMetricLabel takes simple key and value of the label to return metrics.Label. func toMetricLabel(key, value string) metrics.Label { - return metrics.Label{Name: key, Value: value} + return cosmostelemetry.NewLabel(key, value) } diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 94b22660b..bd09b76da 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -11,7 +11,7 @@ import ( // It is set on initialization and does not change for the lifetime of the program. var globalTelemetryConfig PoktrollTelemetryConfig -// PoktrollTelemetryConfig represents the relemetry protion of the custom poktroll config. +// PoktrollTelemetryConfig represents the telemetry protion of the custom poktroll config. type PoktrollTelemetryConfig struct { CardinalityLevel string `mapstructure:"cardinality-level"` } @@ -23,7 +23,7 @@ func New(appOpts servertypes.AppOptions) error { // Use mapstructure to decode the map into the struct if err := mapstructure.Decode(telemetryMap, &globalTelemetryConfig); err != nil { - return fmt.Errorf("error decoding telemetry config: %v", err) + return fmt.Errorf("error decoding poktroll.telemetry config: %v", err) } return nil diff --git a/telemetry/tokens.go b/telemetry/tokens.go index a84b65510..4e32f9292 100644 --- a/telemetry/tokens.go +++ b/telemetry/tokens.go @@ -5,6 +5,10 @@ import ( "github.com/hashicorp/go-metrics" ) +// TODO_TECHDEBT: Minted, burned and shashed tokens values might not be accurate or be inflated. +// While we're keeping this metric for now consider removing in favor of utilizing the `cosmos-exporter` which uses on-chain data. +// Context: https://github.com/cosmos/cosmos-sdk/issues/21614, https://github.com/pokt-network/poktroll/pull/832 + // MintedTokensFromModule is a function to track token minting from a specific module. // The metric used is an increment counter, and the label includes the module name for context. func MintedTokensFromModule(module string, amount float32) { @@ -12,12 +16,6 @@ func MintedTokensFromModule(module string, amount float32) { return } - // CosmosSDK has a metric called `minted_tokens` (as a part of `mint` module), however it is wrongfully marked a `gauge`. - // It should be an `increment` because it always goes up. `gauge` tracks data that can go up and down. - // More info: https://prometheus.io/docs/concepts/metric_types/ - // - // We can't keep the same metric name because different metric types can't collide under the same name. So we add - // `poktroll_` prefix instead. cosmostelemetry.IncrCounterWithLabels( MetricNameKeys("minted", "tokens"), amount, diff --git a/x/application/module/abci.go b/x/application/module/abci.go index 4d5538af9..c5882d863 100644 --- a/x/application/module/abci.go +++ b/x/application/module/abci.go @@ -10,6 +10,7 @@ import ( // EndBlocker is called every block and handles application related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { + // Telemetry: measure execution time like standard cosmos-sdk modules do that. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) if err := k.EndBlockerAutoUndelegateFromUnstakedGateways(ctx); err != nil { diff --git a/x/proof/keeper/proof_validation.go b/x/proof/keeper/proof_validation.go index 085cc81c4..a228e9ffe 100644 --- a/x/proof/keeper/proof_validation.go +++ b/x/proof/keeper/proof_validation.go @@ -61,6 +61,7 @@ func (k Keeper) EnsureValidProof( ctx context.Context, proof *types.Proof, ) error { + // Telemetry: measure execution time. defer cosmostelemetry.MeasureSince(cosmostelemetry.Now(), telemetry.MetricNameKeys("proof", "validation")...) logger := k.Logger().With("method", "ValidateProof") diff --git a/x/supplier/module/abci.go b/x/supplier/module/abci.go index 421acad05..b0bf59405 100644 --- a/x/supplier/module/abci.go +++ b/x/supplier/module/abci.go @@ -10,6 +10,7 @@ import ( // EndBlocker is called every block and handles supplier related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { + // Telemetry: measure execution time like standard cosmos-sdk modules do that. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) // TODO_IMPROVE(@red-0ne): Add logs and/or telemetry on the number of unbonded suppliers. diff --git a/x/tokenomics/module/abci.go b/x/tokenomics/module/abci.go index 815395cfd..f614548ab 100644 --- a/x/tokenomics/module/abci.go +++ b/x/tokenomics/module/abci.go @@ -14,6 +14,7 @@ import ( // EndBlocker called at every block and settles all pending claims. func EndBlocker(ctx sdk.Context, k keeper.Keeper) (err error) { + // Telemetry: measure execution time like standard cosmos-sdk modules do that. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) logger := k.Logger().With("method", "EndBlocker") From 2cd91de7d3187a47ef496d538b8e386dd15b08b4 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Thu, 24 Oct 2024 10:03:14 -0700 Subject: [PATCH 22/32] change retention time on localnet --- config.yml | 6 +++--- localnet/poktrolld/config/app.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.yml b/config.yml index 128b3b5b4..2415211d3 100644 --- a/config.yml +++ b/config.yml @@ -91,7 +91,7 @@ validators: # minimum-gas-prices: 0.000000001upokt telemetry: enabled: true - prometheus-retention-time: "600" # seconds + prometheus-retention-time: "10" # seconds. Let's see if block time * 5 gives a better data. poktroll: telemetry: # "high" produces a lot of timeseries. Only suitable for small networks such as LocalNet. @@ -239,8 +239,8 @@ genesis: params: # TODO_MAINNET: Determine realistic amount for minimum gateway stake amount. min_stake: - amount: "1000000" # 1 POKT - denom: upokt + amount: "1000000" # 1 POKT + denom: upokt gatewayList: - address: pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4 stake: diff --git a/localnet/poktrolld/config/app.toml b/localnet/poktrolld/config/app.toml index 34b6ef929..73e52cb05 100644 --- a/localnet/poktrolld/config/app.toml +++ b/localnet/poktrolld/config/app.toml @@ -61,6 +61,6 @@ query-gas-limit = "0" enabled = true global-labels = [] metrics-sink = "mem" - prometheus-retention-time = "600" + prometheus-retention-time = "10" service-name = "" statsd-addr = "" From ce77cc7869d8868d7cb4287d5dc53e6e463d0934 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Thu, 24 Oct 2024 17:40:46 -0700 Subject: [PATCH 23/32] add psql datasource to grafana --- .../kubernetes/observability-prometheus-stack.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/localnet/kubernetes/observability-prometheus-stack.yaml b/localnet/kubernetes/observability-prometheus-stack.yaml index 99894b497..59e36ba41 100644 --- a/localnet/kubernetes/observability-prometheus-stack.yaml +++ b/localnet/kubernetes/observability-prometheus-stack.yaml @@ -17,6 +17,20 @@ grafana: viewers_can_edit: true security: allow_embedding: true + additionalDataSources: + - name: PostgreSQL + type: postgres + access: proxy + url: postgres-service:5432 + database: postgres + user: postgres + secureJsonData: + password: postgres + jsonData: + sslmode: disable + postgresVersion: 1604 # Adjust to match your PostgreSQL version + # timescaledb: false # Set to true if you are using TimescaleDB + schema: localnet # Specify your schema here prometheus: prometheusSpec: From 4a0b12be7d61b2eda43239eb5c82aeccd16c8a79 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Mon, 28 Oct 2024 16:14:58 -0700 Subject: [PATCH 24/32] localnet_up after merge --- api/poktroll/application/types.pulsar.go | 14 ++++++-------- x/application/types/types.pb.go | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/api/poktroll/application/types.pulsar.go b/api/poktroll/application/types.pulsar.go index 4a7751438..d1f8f492f 100644 --- a/api/poktroll/application/types.pulsar.go +++ b/api/poktroll/application/types.pulsar.go @@ -2190,14 +2190,12 @@ type Application struct { Address string `protobuf:"bytes,1,opt,name=address,proto3" json:"address,omitempty"` // The Bech32 address of the application. Stake *v1beta1.Coin `protobuf:"bytes,2,opt,name=stake,proto3" json:"stake,omitempty"` // The total amount of uPOKT the application has staked - // As per this discussion: - // https://github.com/pokt-network/poktroll/pull/750#discussion_r1735025033 - // The number of service_configs is limited to 1 per service. - // This is to ensure that an application could not over service by making multiple - // claims settelments compete to burn the same stake. - // A slice of service_configs is still maintained to allow for future multi-service - // capabilities to be added, such as off-chain application stake tracking by suppliers: - // https://www.notion.so/buildwithgrove/Off-chain-Application-Stake-Tracking-6a8bebb107db4f7f9dc62cbe7ba555f7 + // CRITICAL_DEV_NOTE: The number of service_configs must be EXACTLY ONE. + // This prevents applications from over-servicing. + // The field is kept repeated (a list) for both legacy and future logic reaosns. + // References: + // - https://github.com/pokt-network/poktroll/pull/750#discussion_r1735025033 + // - https://www.notion.so/buildwithgrove/Off-chain-Application-Stake-Tracking-6a8bebb107db4f7f9dc62cbe7ba555f7 ServiceConfigs []*shared.ApplicationServiceConfig `protobuf:"bytes,3,rep,name=service_configs,json=serviceConfigs,proto3" json:"service_configs,omitempty"` // The list of services this appliccation is configured to request service for // TODO_BETA: Rename `delegatee_gateway_addresses` to `gateway_addresses_delegated_to`. // Ensure to rename all relevant configs, comments, variables, function names, etc as well. diff --git a/x/application/types/types.pb.go b/x/application/types/types.pb.go index 9e5b8ff6c..32bc0cc13 100644 --- a/x/application/types/types.pb.go +++ b/x/application/types/types.pb.go @@ -31,14 +31,12 @@ const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package type Application struct { Address string `protobuf:"bytes,1,opt,name=address,proto3" json:"address,omitempty"` Stake *types.Coin `protobuf:"bytes,2,opt,name=stake,proto3" json:"stake,omitempty"` - // As per this discussion: - // https://github.com/pokt-network/poktroll/pull/750#discussion_r1735025033 - // The number of service_configs is limited to 1 per service. - // This is to ensure that an application could not over service by making multiple - // claims settelments compete to burn the same stake. - // A slice of service_configs is still maintained to allow for future multi-service - // capabilities to be added, such as off-chain application stake tracking by suppliers: - // https://www.notion.so/buildwithgrove/Off-chain-Application-Stake-Tracking-6a8bebb107db4f7f9dc62cbe7ba555f7 + // CRITICAL_DEV_NOTE: The number of service_configs must be EXACTLY ONE. + // This prevents applications from over-servicing. + // The field is kept repeated (a list) for both legacy and future logic reaosns. + // References: + // - https://github.com/pokt-network/poktroll/pull/750#discussion_r1735025033 + // - https://www.notion.so/buildwithgrove/Off-chain-Application-Stake-Tracking-6a8bebb107db4f7f9dc62cbe7ba555f7 ServiceConfigs []*types1.ApplicationServiceConfig `protobuf:"bytes,3,rep,name=service_configs,json=serviceConfigs,proto3" json:"service_configs,omitempty"` // TODO_BETA: Rename `delegatee_gateway_addresses` to `gateway_addresses_delegated_to`. // Ensure to rename all relevant configs, comments, variables, function names, etc as well. From 814d0a2eaa1f94e9dfd502a041d893570832b071 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Tue, 29 Oct 2024 18:19:28 -0700 Subject: [PATCH 25/32] more dashboards --- Tiltfile | 4 + cmd/poktrolld/cmd/config.go | 3 +- config.yml | 15 +- .../cosmos_sdk_insights.json | 38 +- .../permissionless_demand_observability.json | 1314 +++++++++++++++++ .../grafana-dashboards/tokenomics_relays.json | 523 +++++++ localnet/poktrolld/config/app.toml | 2 +- pkg/relayer/proxy/metrics.go | 2 +- pkg/relayer/proxy/synchronous.go | 2 +- 9 files changed, 1884 insertions(+), 19 deletions(-) create mode 100644 localnet/grafana-dashboards/permissionless_demand_observability.json create mode 100644 localnet/grafana-dashboards/tokenomics_relays.json diff --git a/Tiltfile b/Tiltfile index 9cb6ea837..12ef5a70a 100644 --- a/Tiltfile +++ b/Tiltfile @@ -240,6 +240,7 @@ helm_resource( "--set=logs.format=" + str(localnet_config["validator"]["logs"]["format"]), "--set=serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["validator"]["delve"]["enabled"]), + "--set=image.repository=poktrolld", ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], @@ -259,6 +260,7 @@ for x in range(localnet_config["relayminers"]["count"]): "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["relayminers"]["delve"]["enabled"]), "--set=logLevel=" + str(localnet_config["relayminers"]["logs"]["level"]), + "--set=image.repository=poktrolld", ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], @@ -300,6 +302,7 @@ for x in range(localnet_config["appgateservers"]["count"]): "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["appgateservers"]["delve"]["enabled"]), "--set=logLevel=" + str(localnet_config["appgateservers"]["logs"]["level"]), + "--set=image.repository=poktrolld", ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], @@ -342,6 +345,7 @@ for x in range(localnet_config["gateways"]["count"]): "--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]), "--set=development.delve.enabled=" + str(localnet_config["gateways"]["delve"]["enabled"]), "--set=logLevel=" + str(localnet_config["gateways"]["logs"]["level"]), + "--set=image.repository=poktrolld", ], image_deps=["poktrolld"], image_keys=[("image.repository", "image.tag")], diff --git a/cmd/poktrolld/cmd/config.go b/cmd/poktrolld/cmd/config.go index ac46dc0c9..844bef61a 100644 --- a/cmd/poktrolld/cmd/config.go +++ b/cmd/poktrolld/cmd/config.go @@ -130,7 +130,8 @@ func initAppConfig() (string, interface{}) { srvCfg.MinGasPrices = "0.000000001upokt" // Also adjust ignite's `config.yml`. srvCfg.Mempool.MaxTxs = 10000 srvCfg.Telemetry.Enabled = true - srvCfg.Telemetry.PrometheusRetentionTime = 60 * 10 // in seconds. This turns on Prometheus support. + // Positive value turns on prometheus support. Prometheus metrics are removed from the exporter when retention time is reached. + srvCfg.Telemetry.PrometheusRetentionTime = 60 * 60 * 24 // in seconds. srvCfg.Telemetry.MetricsSink = "mem" srvCfg.Pruning = "nothing" // archiving node by default srvCfg.API.Enable = true diff --git a/config.yml b/config.yml index 2f9bf2700..18fd0e6be 100644 --- a/config.yml +++ b/config.yml @@ -91,7 +91,6 @@ validators: # minimum-gas-prices: 0.000000001upokt telemetry: enabled: true - prometheus-retention-time: "10" # seconds. Let's see if block time * 5 gives a better data. poktroll: telemetry: # "high" produces a lot of timeseries. Only suitable for small networks such as LocalNet. @@ -143,13 +142,13 @@ genesis: denom: upokt bank: supply: - - amount: "1003000204" + - amount: "1102000204" denom: upokt balances: # Application module - address: pokt1rl3gjgzexmplmds3tq3r3yk84zlwdl6djzgsvm coins: - - amount: "1000068" # Equals to the total of all app stakes below + - amount: "100000068" # Equals to the total of all app stakes below denom: upokt # Supplier module - address: pokt1j40dzzmn6cn9kxku7a5tjnud6hv37vesr5ccaa @@ -175,9 +174,8 @@ genesis: denom: upokt applicationList: - address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4 - delegatee_gateway_addresses: [ - pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4 - ] + delegatee_gateway_addresses: + [pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4] service_configs: - service_id: anvil stake: @@ -186,9 +184,8 @@ genesis: amount: "100000068" # ~100 POKT denom: upokt - address: pokt184zvylazwu4queyzpl0gyz9yf5yxm2kdhh9hpm - delegatee_gateway_addresses: [ - pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4 - ] + delegatee_gateway_addresses: + [pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4] service_configs: - service_id: rest stake: diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 6081a0c1b..8eb10af1b 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -17,7 +17,7 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 1, "links": [], "panels": [ { @@ -349,7 +349,7 @@ "unit": "s" } }, - "pluginVersion": "11.2.1", + "pluginVersion": "11.2.2+security-01", "targets": [ { "datasource": { @@ -472,7 +472,7 @@ "editorMode": "code", "expr": "sum(poktroll_proof_validation{job=\"$job\"}) by (quantile)", "instant": false, - "legendFormat": "{{module}}: q{{quantile}}", + "legendFormat": " q{{quantile}}", "range": true, "refId": "A" }, @@ -485,7 +485,7 @@ "expr": "sum(poktroll_proof_validation_sum{job=\"$job\"}) / sum(poktroll_proof_validation_count{job=\"$job\"})", "hide": false, "instant": false, - "legendFormat": "{{module}}: avg", + "legendFormat": "avg", "range": true, "refId": "B" } @@ -1050,7 +1050,20 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] }, "gridPos": { "h": 6, @@ -1109,6 +1122,19 @@ "legendFormat": "slashed by {{module}}", "range": true, "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(poktroll_minted_tokens{job=\"$job\"}[1m])) - sum(increase(poktroll_burned_tokens{job=\"$job\"}[1m]))", + "hide": false, + "instant": false, + "legendFormat": "Minted total (minus burned)", + "range": true, + "refId": "D" } ], "title": "Minted / Burned / Slashed Tokens", @@ -1208,7 +1234,7 @@ "refId": "A" } ], - "title": "Compute Units", + "title": "Claims", "type": "timeseries" } ], diff --git a/localnet/grafana-dashboards/permissionless_demand_observability.json b/localnet/grafana-dashboards/permissionless_demand_observability.json new file mode 100644 index 000000000..9fffee6e9 --- /dev/null +++ b/localnet/grafana-dashboards/permissionless_demand_observability.json @@ -0,0 +1,1314 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(cometbft_consensus_block_size_bytes{})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Block size", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n b.timestamp AS time,\n COUNT(m.id) AS num_proofs\nFROM\n localnet.messages m\nJOIN\n localnet.transactions t ON t.id = m.transaction_id\nJOIN\n localnet.blocks b ON b.id = t.block_id\nWHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n b.timestamp\nORDER BY\n b.timestamp;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Number of proofs", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(appgateserver_relay_requests_total[1m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Actual relays", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT\n $__timeGroup(b.\"timestamp\", '1m') AS time,\n SUM(trim(both '\"' from ea_num_relays.value)::bigint) AS total_num_relays,\n (ea_claim.value::json ->> 'supplier_operator_address') AS supplier_operator_address,\n (ea_claim.value::json -> 'session_header' ->> 'service_id') AS service_id\nFROM\n localnet.events e\nJOIN\n localnet.event_attributes ea_num_relays ON ea_num_relays.event_id = e.id\n AND ea_num_relays.key = 'num_relays'\nJOIN\n localnet.event_attributes ea_claim ON ea_claim.event_id = e.id\n AND ea_claim.key = 'claim'\nJOIN\n localnet.blocks b ON b.id = e.block_id\nWHERE\n e.type = 'poktroll.proof.EventClaimCreated'\n AND b.\"timestamp\" BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n time, supplier_operator_address, service_id\nORDER BY\n time;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "On chain relays from claims", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_relay_ema{service_id=~\"$service_id\"}) by (service_id)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "EMA by service", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(poktroll_relay_mining_difficulty{service_id=~\"$service_id\"}) by (service_id)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RM difficulty by service", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "-- Average number of proofs per application\n\nWITH proofs_per_block AS (\n SELECT\n b.height AS block_height,\n m.json::jsonb -> 'sessionHeader' ->> 'applicationAddress' AS application_address,\n COUNT(m.id) AS num_proofs\n FROM\n localnet.messages m\n JOIN\n localnet.transactions t ON t.id = m.transaction_id\n JOIN\n localnet.blocks b ON b.id = t.block_id\n WHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\n GROUP BY\n b.height, application_address\n)\n\nSELECT\n application_address AS application,\n AVG(num_proofs) AS avg_num_proofs\nFROM\n proofs_per_block\nGROUP BY\n application\nORDER BY\n avg_num_proofs DESC;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Average Number of Proofs per Application", + "type": "barchart" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 9, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "-- Average number of proofs per service\n\nWITH proofs_per_block AS (\n SELECT\n b.height AS block_height,\n m.json::jsonb -> 'sessionHeader' ->> 'serviceId' AS service_id,\n COUNT(m.id) AS num_proofs\n FROM\n localnet.messages m\n JOIN\n localnet.transactions t ON t.id = m.transaction_id\n JOIN\n localnet.blocks b ON b.id = t.block_id\n WHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\n GROUP BY\n b.height, service_id\n)\n\nSELECT\n service_id AS service,\n AVG(num_proofs) AS avg_num_proofs\nFROM\n proofs_per_block\nGROUP BY\n service\nORDER BY\n avg_num_proofs DESC;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Average Number of Proofs per Service", + "type": "barchart" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 5, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n b.timestamp AS time,\n COUNT(m.id) AS num_proofs,\n m.json::jsonb -> 'sessionHeader' ->> 'applicationAddress' AS application_address\nFROM\n localnet.messages m\nJOIN\n localnet.transactions t ON t.id = m.transaction_id\nJOIN\n localnet.blocks b ON b.id = t.block_id\nWHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n b.timestamp, application_address\nORDER BY\n b.timestamp;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Proof per application", + "type": "barchart" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 6, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n b.timestamp AS time,\n COUNT(m.id) AS num_proofs,\n m.json::jsonb -> 'sessionHeader' ->> 'serviceId' AS service_id\nFROM\n localnet.messages m\nJOIN\n localnet.transactions t ON t.id = m.transaction_id\nJOIN\n localnet.blocks b ON b.id = t.block_id\nWHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n b.timestamp, service_id\nORDER BY\n b.timestamp;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Proof per service", + "type": "barchart" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 7, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n b.timestamp AS time,\n COUNT(m.id) AS num_proofs,\n m.json::jsonb ->> 'supplierOperatorAddress' AS supplier_operator_address\nFROM\n localnet.messages m\nJOIN\n localnet.transactions t ON t.id = m.transaction_id\nJOIN\n localnet.blocks b ON b.id = t.block_id\nWHERE\n m.type_url = '/poktroll.proof.MsgSubmitProof'\n AND b.timestamp BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n b.timestamp, supplier_operator_address\nORDER BY\n b.timestamp;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Proof per supplier operator", + "type": "barchart" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 10, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "-- Serviced relays (from claims)\nWITH serviced_relays AS (\n SELECT\n (ea_claim.value::json ->> 'supplier_operator_address') AS supplier_operator_address,\n SUM(trim(both '\"' from ea_num_relays.value)::bigint) AS total_serviced_relays\n FROM\n localnet.events e\n JOIN\n localnet.event_attributes ea_num_relays ON ea_num_relays.event_id = e.id\n AND ea_num_relays.key = 'num_relays'\n JOIN\n localnet.event_attributes ea_claim ON ea_claim.event_id = e.id\n AND ea_claim.key = 'claim'\n JOIN\n localnet.blocks b ON b.id = e.block_id\n WHERE\n e.type = 'poktroll.proof.EventClaimCreated'\n AND b.\"timestamp\" BETWEEN $__timeFrom() AND $__timeTo()\n GROUP BY\n supplier_operator_address\n),\n\n-- Estimated relays (from proofs)\nestimated_relays AS (\n SELECT\n (ea_proof.value::json ->> 'supplier_operator_address') AS supplier_operator_address,\n SUM(trim(both '\"' from ea_num_relays.value)::bigint) AS total_estimated_relays\n FROM\n localnet.events e\n JOIN\n localnet.event_attributes ea_num_relays ON ea_num_relays.event_id = e.id\n AND ea_num_relays.key = 'num_relays'\n JOIN\n localnet.event_attributes ea_proof ON ea_proof.event_id = e.id\n AND ea_proof.key = 'proof'\n JOIN\n localnet.blocks b ON b.id = e.block_id\n WHERE\n e.type = 'poktroll.proof.EventProofSubmitted'\n AND b.\"timestamp\" BETWEEN $__timeFrom() AND $__timeTo()\n GROUP BY\n supplier_operator_address\n)\n\n-- Combine the totals per supplier\nSELECT\n COALESCE(s.supplier_operator_address, e.supplier_operator_address) AS supplier_operator_address,\n COALESCE(s.total_serviced_relays, 0) AS total_serviced_relays,\n COALESCE(e.total_estimated_relays, 0) AS total_estimated_relays\nFROM\n serviced_relays s\nFULL OUTER JOIN\n estimated_relays e ON s.supplier_operator_address = e.supplier_operator_address\nORDER BY\n supplier_operator_address;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "cumulative number of relays claims vs proofs per supplier", + "type": "barchart" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Permissionless demand observability", + "version": 0, + "weekStart": "" +} \ No newline at end of file diff --git a/localnet/grafana-dashboards/tokenomics_relays.json b/localnet/grafana-dashboards/tokenomics_relays.json new file mode 100644 index 000000000..f6228f499 --- /dev/null +++ b/localnet/grafana-dashboards/tokenomics_relays.json @@ -0,0 +1,523 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 5, + "y": 0 + }, + "id": 4, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "## What does this dashboard show?\n\nAs relays flow through the network\nAppGate -> RelayMiner and RelayMiner creates proofs and claims, we can capture the whole relay cycle.\n\n1. shows the actual amount of relays on AppGate;\n2. shows the actual amount of relays processed by RelayMiner;\n3. the amount of relays from the on-chain information using `EventClaimCreated`;\n4. relays from `EventProofSubmitted`;", + "mode": "markdown" + }, + "pluginVersion": "11.2.2+security-01", + "title": "Wht is this dashboard?", + "type": "text" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(appgateserver_relay_requests_total[$group_by])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Actual AppGate Requests", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(relayminer_requests_total[$group_by])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Actual RelayMIner Requests", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n $__timeGroup(b.\"timestamp\", '$group_by') AS time,\n SUM(trim(both '\"' from ea_num_relays.value)::bigint) AS total_num_relays,\n (ea_claim.value::json ->> 'supplier_operator_address') AS supplier_operator_address,\n (ea_claim.value::json -> 'session_header' ->> 'service_id') AS service_id\nFROM\n localnet.events e\nJOIN\n localnet.event_attributes ea_num_relays ON ea_num_relays.event_id = e.id\n AND ea_num_relays.key = 'num_relays'\nJOIN\n localnet.event_attributes ea_claim ON ea_claim.event_id = e.id\n AND ea_claim.key = 'claim'\nJOIN\n localnet.blocks b ON b.id = e.block_id\nWHERE\n e.type = 'poktroll.proof.EventClaimCreated'\n AND b.\"timestamp\" BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n time, supplier_operator_address, service_id\nORDER BY\n time;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Relays (from on-chain claims)", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PCC52D03280B7034C" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n $__timeGroup(b.\"timestamp\", '$group_by') AS time,\n SUM(trim(both '\"' from ea_num_relays.value)::bigint) AS total_num_relays,\n (ea_proof.value::json ->> 'supplier_operator_address') AS supplier_operator_address,\n (ea_proof.value::json -> 'session_header' ->> 'service_id') AS service_id\nFROM\n localnet.events e\nJOIN\n localnet.event_attributes ea_num_relays ON ea_num_relays.event_id = e.id\n AND ea_num_relays.key = 'num_relays'\nJOIN\n localnet.event_attributes ea_proof ON ea_proof.event_id = e.id\n AND ea_proof.key = 'proof'\nJOIN\n localnet.blocks b ON b.id = e.block_id\nWHERE\n e.type = 'poktroll.proof.EventProofSubmitted'\n AND b.\"timestamp\" BETWEEN $__timeFrom() AND $__timeTo()\nGROUP BY\n time, supplier_operator_address, service_id\nORDER BY\n time;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Relays (from on-chain proofs)", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "1m", + "value": "1m" + }, + "hide": 0, + "includeAll": false, + "label": "Group by time / Window", + "multi": false, + "name": "group_by", + "options": [ + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + } + ], + "query": "1m,5m,10m,15m", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Protocol / Tokenomics / Relays life-cycle", + "version": 0, + "weekStart": "" +} \ No newline at end of file diff --git a/localnet/poktrolld/config/app.toml b/localnet/poktrolld/config/app.toml index 73e52cb05..d5135735e 100644 --- a/localnet/poktrolld/config/app.toml +++ b/localnet/poktrolld/config/app.toml @@ -61,6 +61,6 @@ query-gas-limit = "0" enabled = true global-labels = [] metrics-sink = "mem" - prometheus-retention-time = "10" + prometheus-retention-time = 86400 service-name = "" statsd-addr = "" diff --git a/pkg/relayer/proxy/metrics.go b/pkg/relayer/proxy/metrics.go index 76c005583..4fdea226e 100644 --- a/pkg/relayer/proxy/metrics.go +++ b/pkg/relayer/proxy/metrics.go @@ -29,7 +29,7 @@ var ( Subsystem: relayMinerProcess, Name: requestsTotal, Help: "Total number of requests processed, labeled by service ID.", - }, []string{"service_id"}) + }, []string{"service_id", "supplier_operator_address"}) // relaysErrorsTotal is a Counter for total error events in the relay miner. // It increments with each error, labeled by 'service_id', diff --git a/pkg/relayer/proxy/synchronous.go b/pkg/relayer/proxy/synchronous.go index fe447aa0f..a159686df 100644 --- a/pkg/relayer/proxy/synchronous.go +++ b/pkg/relayer/proxy/synchronous.go @@ -174,7 +174,7 @@ func (sync *synchronousRPCServer) ServeHTTP(writer http.ResponseWriter, request } // Increment the relays counter. - relaysTotal.With("service_id", supplierServiceId).Add(1) + relaysTotal.With("service_id", supplierServiceId, "supplier_operator_address", relayRequest.Meta.SupplierOperatorAddress).Add(1) defer func() { duration := time.Since(startTime).Seconds() From 54d6c0f5abe27e65ce367d13ade828c8877481a7 Mon Sep 17 00:00:00 2001 From: "Dima K." Date: Wed, 30 Oct 2024 10:55:34 -0700 Subject: [PATCH 26/32] Update Tiltfile Co-authored-by: Daniel Olshansky --- Tiltfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tiltfile b/Tiltfile index b09dbfae5..bc0d78b92 100644 --- a/Tiltfile +++ b/Tiltfile @@ -112,7 +112,8 @@ if localnet_config["observability"]["enabled"]: helm_repo("prometheus-community", "https://prometheus-community.github.io/helm-charts") helm_repo("grafana-helm-repo", "https://grafana.github.io/helm-charts") - # Increase timeout for building the imagedefault is 30, which can be too low for slow internet connections to pull + # Timeout is increased to 120 seconds (default is 30) because a slow internet connection + # could timeout pulling the image. # container images. update_settings(k8s_upsert_timeout_secs=120) From dd5276d40d671eb94c01158b9a7759483955c0b5 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 13:29:04 -0700 Subject: [PATCH 27/32] address the feedback --- cmd/poktrolld/cmd/config.go | 31 ++++--- config.yml | 15 ++-- .../cosmos_sdk_insights.json | 12 +-- .../observability-prometheus-stack.yaml | 4 +- localnet/poktrolld/config/config.toml | 4 +- pkg/relayer/proxy/synchronous.go | 5 +- telemetry/common.go | 31 ++++--- telemetry/event_counters.go | 12 +-- telemetry/telemetry.go | 7 +- telemetry/tokens.go | 5 +- x/application/module/abci.go | 2 +- x/proof/keeper/msg_server_create_claim.go | 32 ++++--- x/proof/keeper/msg_server_submit_proof.go | 48 +++++----- x/supplier/module/abci.go | 2 +- x/tokenomics/keeper/settle_pending_claims.go | 89 +++++++------------ x/tokenomics/module/abci.go | 2 +- 16 files changed, 152 insertions(+), 149 deletions(-) diff --git a/cmd/poktrolld/cmd/config.go b/cmd/poktrolld/cmd/config.go index 844bef61a..61a441fa7 100644 --- a/cmd/poktrolld/cmd/config.go +++ b/cmd/poktrolld/cmd/config.go @@ -14,15 +14,16 @@ import ( var once sync.Once -// PoktrollAdditionalConfig represents a poktroll-specific part of `app.toml` file. -// See the `customAppConfigTemplate()` for additional information about each setting. -type PoktrollAdditionalConfig struct { +// PoktrollAppConfig represents a poktroll-specific part of `app.toml` file. +// Checkout `customAppConfigTemplate()` for additional information about each setting. +type PoktrollAppConfig struct { Telemetry telemetry.PoktrollTelemetryConfig `mapstructure:"telemetry"` } -// poktrollAdditionalConfigDefaults sets default values to render in `app.toml`. -func poktrollAdditionalConfigDefaults() PoktrollAdditionalConfig { - return PoktrollAdditionalConfig{ +// poktrollAppConfigDefaults sets default values to render in `app.toml`. +// Checkout `customAppConfigTemplate()` for additional information about each setting. +func poktrollAppConfigDefaults() PoktrollAppConfig { + return PoktrollAppConfig{ Telemetry: telemetry.PoktrollTelemetryConfig{ CardinalityLevel: "medium", }, @@ -106,7 +107,7 @@ func initAppConfig() (string, interface{}) { // The following code snippet is just for reference. type CustomAppConfig struct { serverconfig.Config `mapstructure:",squash"` - Poktroll PoktrollAdditionalConfig `mapstructure:"poktroll"` + Poktroll PoktrollAppConfig `mapstructure:"poktroll"` } // Optionally allow the chain developer to overwrite the SDK's default @@ -130,7 +131,8 @@ func initAppConfig() (string, interface{}) { srvCfg.MinGasPrices = "0.000000001upokt" // Also adjust ignite's `config.yml`. srvCfg.Mempool.MaxTxs = 10000 srvCfg.Telemetry.Enabled = true - // Positive value turns on prometheus support. Prometheus metrics are removed from the exporter when retention time is reached. + // Positive non-zero value turns on Prometheus support. + // Prometheus metrics are removed from the exporter when retention time is reached. srvCfg.Telemetry.PrometheusRetentionTime = 60 * 60 * 24 // in seconds. srvCfg.Telemetry.MetricsSink = "mem" srvCfg.Pruning = "nothing" // archiving node by default @@ -140,21 +142,22 @@ func initAppConfig() (string, interface{}) { customAppConfig := CustomAppConfig{ Config: *srvCfg, - Poktroll: poktrollAdditionalConfigDefaults(), + Poktroll: poktrollAppConfigDefaults(), } - return customAppConfigTemplate(), customAppConfig + return customPoktrollAppConfigTemplate(), customAppConfig } -// customAppConfigTemplate extends the default configuration `app.toml` file with our own configs. They are going to be -// used on validators and full-nodes, and they render using default values from `poktrollAdditionalConfigDefaults()`. -func customAppConfigTemplate() string { +// customPoktrollAppConfigTemplate extends the default configuration `app.toml` file with our own configs. +// They are going to be used by validators and full-nodes. +// These configs are rendered using default values from `poktrollAppConfigDefaults()`. +func customPoktrollAppConfigTemplate() string { return serverconfig.DefaultConfigTemplate + ` ############################################################################### ### Poktroll ### ############################################################################### - # Poktroll-specific configuration for Full Nodes and Validators. + # Poktroll-specific app configuration for Full Nodes and Validators. [poktroll] # Telemetry configuration in addition to the [telemetry] settings. diff --git a/config.yml b/config.yml index b25664cc9..91623b55d 100644 --- a/config.yml +++ b/config.yml @@ -93,13 +93,14 @@ validators: enabled: true poktroll: telemetry: - # "high" produces a lot of timeseries. Only suitable for small networks such as LocalNet. + # "high" produces a lot of timeseries. + # ONLY suitable for small networks such as LocalNet. cardinality-level: high config: moniker: "validator1" consensus: - timeout_commit: "2s" - timeout_propose: "2s" + timeout_commit: "5s" + timeout_propose: "5s" instrumentation: prometheus: true log_level: "info" @@ -148,22 +149,22 @@ genesis: # Application module - address: pokt1rl3gjgzexmplmds3tq3r3yk84zlwdl6djzgsvm coins: - - amount: "100000068" # Equals to the total of all app stakes below + - amount: "100000068" # MUST BE equal to the total of all app stakes below denom: upokt # Supplier module - address: pokt1j40dzzmn6cn9kxku7a5tjnud6hv37vesr5ccaa coins: - - amount: "1000068" # Equals to the total of all supplier stakes below + - amount: "1000068" # MUST BE equal to the total of all supplier stakes below denom: upokt # Gateway module - address: pokt1f6j7u6875p2cvyrgjr0d2uecyzah0kget9vlpl coins: - - amount: "1000068" # Equals to the total of all gateway stakes below + - amount: "1000068" # MUST BE equal to the total of all gateway stakes below denom: upokt # Service module - address: pokt1nhmtqf4gcmpxu0p6e53hpgtwj0llmsqpxtumcf coins: - - amount: "1000000000" # Equals to one add_service_fee below + - amount: "1000000000" # MUST BE equal to one add_service_fee below denom: upokt application: params: diff --git a/localnet/grafana-dashboards/cosmos_sdk_insights.json b/localnet/grafana-dashboards/cosmos_sdk_insights.json index 8eb10af1b..157314cdc 100644 --- a/localnet/grafana-dashboards/cosmos_sdk_insights.json +++ b/localnet/grafana-dashboards/cosmos_sdk_insights.json @@ -787,9 +787,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_relays{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", + "expr": "sum(increase(poktroll_relays{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, claim_proof_stage)", "instant": false, - "legendFormat": "{{service_id}}-{{proof_stage}}", + "legendFormat": "{{service_id}}-{{claim_proof_stage}}", "range": true, "refId": "A" } @@ -884,9 +884,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_compute_units{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", + "expr": "sum(increase(poktroll_compute_units{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, claim_proof_stage)", "instant": false, - "legendFormat": "{{service_id}}-{{proof_stage}}", + "legendFormat": "{{service_id}}-{{claim_proof_stage}}", "range": true, "refId": "A" } @@ -1227,9 +1227,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(poktroll_claims{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, proof_stage)", + "expr": "sum(increase(poktroll_claims{job=\"$job\",service_id=~\"$service_id\"}[1m])) by (service_id, claim_proof_stage)", "instant": false, - "legendFormat": "{{service_id}}-{{proof_stage}}", + "legendFormat": "{{service_id}}-{{claim_proof_stage}}", "range": true, "refId": "A" } diff --git a/localnet/kubernetes/observability-prometheus-stack.yaml b/localnet/kubernetes/observability-prometheus-stack.yaml index 59e36ba41..566ac33fd 100644 --- a/localnet/kubernetes/observability-prometheus-stack.yaml +++ b/localnet/kubernetes/observability-prometheus-stack.yaml @@ -29,8 +29,8 @@ grafana: jsonData: sslmode: disable postgresVersion: 1604 # Adjust to match your PostgreSQL version - # timescaledb: false # Set to true if you are using TimescaleDB - schema: localnet # Specify your schema here + # timescaledb: false # Set to true if you are using TimescaleDB + schema: localnet # Specify your postgres schema here prometheus: prometheusSpec: diff --git a/localnet/poktrolld/config/config.toml b/localnet/poktrolld/config/config.toml index bbfa0366f..34b8edb5a 100644 --- a/localnet/poktrolld/config/config.toml +++ b/localnet/poktrolld/config/config.toml @@ -409,7 +409,7 @@ version = "v0" wal_file = "data/cs.wal/wal" # How long we wait for a proposal block before prevoting nil -timeout_propose = "2s" +timeout_propose = "5s" # How much timeout_propose increases with each round timeout_propose_delta = "5s" # How long we wait after receiving +2/3 prevotes for “anything” (ie. not a single block or nil) @@ -423,7 +423,7 @@ timeout_precommit_delta = "5s" # How long we wait after committing a block, before starting on the new # height (this gives us a chance to receive some more precommits, even # though we already have +2/3). -timeout_commit = "2s" +timeout_commit = "5s" # How many blocks to look back to check existence of the node's consensus votes before joining consensus # When non-zero, the node will panic upon restart diff --git a/pkg/relayer/proxy/synchronous.go b/pkg/relayer/proxy/synchronous.go index a159686df..df3bb8e35 100644 --- a/pkg/relayer/proxy/synchronous.go +++ b/pkg/relayer/proxy/synchronous.go @@ -174,7 +174,10 @@ func (sync *synchronousRPCServer) ServeHTTP(writer http.ResponseWriter, request } // Increment the relays counter. - relaysTotal.With("service_id", supplierServiceId, "supplier_operator_address", relayRequest.Meta.SupplierOperatorAddress).Add(1) + relaysTotal.With( + "service_id", supplierServiceId, + "supplier_operator_address", relayRequest.Meta.SupplierOperatorAddress, + ).Add(1) defer func() { duration := time.Since(startTime).Seconds() diff --git a/telemetry/common.go b/telemetry/common.go index b77010e25..28f5dc59b 100644 --- a/telemetry/common.go +++ b/telemetry/common.go @@ -5,8 +5,9 @@ import ( "github.com/hashicorp/go-metrics" ) -// MetricNameKeys constructs the full metric name by prefixing with a defined -// prefix and appending any additional metrics provided as variadic arguments. +// MetricNameKeys prefixes metrics with `poktroll` for easy identification. +// E.g., `("hodlers", "regret_level")` yields `poktroll_hodlers_regret_level` — great for tracking FOMO as hodlers rethink choices. +// Returns a slice of strings as `go-metric`, the underlying metrics library, expects. func MetricNameKeys(metrics ...string) []string { result := make([]string, 0, len(metrics)+1) result = append(result, metricNamePrefix) @@ -19,12 +20,13 @@ func isTelemetyEnabled() bool { return cosmostelemetry.IsTelemetryEnabled() } -// appendMediumCardinalityLabels only creates the label if cardinality if set to "medium". -// Good example of a medium cardinality label is `service_id` — we do not control the number of services -// on the network, and as permissionless services grow the metrics can get easily out of hand. We're keeping -// an option to turn off such labels. -// Medium cardinality labels are included when the cardinality is set to "high". -// Configuration option is exposed in app.toml, our own `poktroll.telemetry` section. +// appendMediumCardinalityLabels only creates the label if cardinality if set to "medium" or higher. +// A good example for a "medium" cardinality use-case is `service_id`: +// - This is a network wide parameter +// - It is dependenon the permissionless nature of the network and can grow unbounded +// - We're keeping an option to turn off such labels to avoid metric bloat +// +// Configuration option is exposed in app.toml under the `poktroll.telemetry` section. func appendMediumCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "medium" || globalTelemetryConfig.CardinalityLevel == "high" { return append(labels, labelPairs...) @@ -32,12 +34,13 @@ func appendMediumCardinalityLabels(labels []metrics.Label, labelPairs ...metrics return labels } -// appendHighCardinalityLabels only creates the label if cardinality if set to "high". -// Good examples of high cardinality labels are `application_address` or `supplier_address`. -// This setting, on a large network, will slow down both the full node and the metric scraping system. -// We want to have such labels exposed for local development, debugging and performance troubleshooring. -// More background on why this is important: https://www.robustperception.io/cardinality-is-key/ -// Configuration option is exposed in app.toml, our own `poktroll.telemetry` section. +// appendHighCardinalityLabels only creates the label if cardinality is set to "high". +// A good example of high cardinality labels is `application_address` or `supplier_address`: +// - This setting, on a large network, will slow down both the full node and the metric scraping system. +// - These labels need to be exposed for local development, debugging and performance troubleshooting. +// +// Additional references on cardinality: https://www.robustperception.io/cardinality-is-key/ +// Configuration option is exposed in app.toml under the `poktroll.telemetry` section. func appendHighCardinalityLabels(labels []metrics.Label, labelPairs ...metrics.Label) []metrics.Label { if globalTelemetryConfig.CardinalityLevel == "high" { return append(labels, labelPairs...) diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index 3b5d124b5..c526d44f9 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -20,7 +20,7 @@ const ( // Label Names applicationAddressLabelName = "app_addr" - supplierOperatorAddressLabelName = "supop_addr" + supplierOperatorAddressLabelName = "sup_op_addr" ) // EventSuccessCounter increments a counter with the given data type and success status. @@ -107,7 +107,7 @@ func ClaimComputeUnitsCounter( incrementAmount := numComputeUnits labels := []metrics.Label{ - {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage.String()}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) labels = appendHighCardinalityLabels( @@ -146,7 +146,7 @@ func ClaimRelaysCounter( incrementAmount := numRelays labels := []metrics.Label{ - {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage.String()}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) labels = appendHighCardinalityLabels( @@ -184,7 +184,7 @@ func ClaimCounter( incrementAmount := numClaims labels := []metrics.Label{ - {Name: "proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage.String()}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) @@ -244,7 +244,7 @@ func RelayEMAGauge(relayEMA uint64, serviceId string) { // SessionSuppliersGauge sets a gauge which tracks the number of candidates available // for session suppliers at the given maxPerSession value. // The serviceId is used as a label to be able to track this information for each service. -func SessionSuppliersGauge(candidates int, maxPerSession int, serviceId string) { +func SessionSuppliersGauge(numCandidates int, maxPerSession int, serviceId string) { if !isTelemetyEnabled() { return } @@ -259,7 +259,7 @@ func SessionSuppliersGauge(candidates int, maxPerSession int, serviceId string) telemetry.SetGaugeWithLabels( MetricNameKeys("session", "suppliers"), - float32(candidates), + float32(numCandidates), labels, ) } diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index bd09b76da..5185196f7 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -7,18 +7,19 @@ import ( "github.com/mitchellh/mapstructure" ) -// globalTelemetryConfig is a private variable that stores cardinality level. +// globalTelemetryConfig stores poktroll specific telemetry configurations. // It is set on initialization and does not change for the lifetime of the program. var globalTelemetryConfig PoktrollTelemetryConfig -// PoktrollTelemetryConfig represents the telemetry protion of the custom poktroll config. +// PoktrollTelemetryConfig represents the telemetry protion of the custom poktroll config section in `app.toml`. type PoktrollTelemetryConfig struct { CardinalityLevel string `mapstructure:"cardinality-level"` } // New sets the globalTelemetryConfig for telemetry package. func New(appOpts servertypes.AppOptions) error { - // Extract the map from appOpts. `poktroll.telemetry` comes from `app.toml` which is parsed into a map. + // Extract the map from appOpts. + // `poktroll.telemetry` comes from `app.toml` which is parsed into a map. telemetryMap := appOpts.Get("poktroll.telemetry").(map[string]interface{}) // Use mapstructure to decode the map into the struct diff --git a/telemetry/tokens.go b/telemetry/tokens.go index 4e32f9292..a62b3355f 100644 --- a/telemetry/tokens.go +++ b/telemetry/tokens.go @@ -5,7 +5,10 @@ import ( "github.com/hashicorp/go-metrics" ) -// TODO_TECHDEBT: Minted, burned and shashed tokens values might not be accurate or be inflated. +// TODO_MAINNET(@bryanchriswhite): Revisit how telemetry is managed under `x/tokenomics` to ensure that it +// complies with the new hardened settlement approach. + +// TODO_MAINNET(@red-0ne, #897): Minted, burnt and slashd tokens values might not be completely accurate. // While we're keeping this metric for now consider removing in favor of utilizing the `cosmos-exporter` which uses on-chain data. // Context: https://github.com/cosmos/cosmos-sdk/issues/21614, https://github.com/pokt-network/poktroll/pull/832 diff --git a/x/application/module/abci.go b/x/application/module/abci.go index c5882d863..3a1b7b337 100644 --- a/x/application/module/abci.go +++ b/x/application/module/abci.go @@ -10,7 +10,7 @@ import ( // EndBlocker is called every block and handles application related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { - // Telemetry: measure execution time like standard cosmos-sdk modules do that. + // Telemetry: measure the end-block execution time following standard cosmos-sdk practices. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) if err := k.EndBlockerAutoUndelegateFromUnstakedGateways(ctx); err != nil { diff --git a/x/proof/keeper/msg_server_create_claim.go b/x/proof/keeper/msg_server_create_claim.go index bdbeeeaf7..79fcc8c2c 100644 --- a/x/proof/keeper/msg_server_create_claim.go +++ b/x/proof/keeper/msg_server_create_claim.go @@ -11,6 +11,7 @@ import ( "github.com/pokt-network/poktroll/telemetry" "github.com/pokt-network/poktroll/x/proof/types" + sessiontypes "github.com/pokt-network/poktroll/x/session/types" sharedtypes "github.com/pokt-network/poktroll/x/shared/types" ) @@ -42,19 +43,8 @@ func (k msgServer) CreateClaim( return nil, status.Error(codes.InvalidArgument, err.Error()) } - // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - // Only increment these metrics counters if handling a new claim. - if !isExistingClaim { - serviceId := session.Header.ServiceId - applicationAddress := session.Header.ApplicationAddress - supplierOperatorAddress := msg.GetSupplierOperatorAddress() - - telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) - } - }() + // Defer telemetry calls to a helper function to keep business logic clean. + defer k.finalizeCreateClaimTelemetry(session, msg, isExistingClaim, numRelays, numClaimComputeUnits, err) // Construct and insert claim claim = types.Claim{ @@ -158,3 +148,19 @@ func (k msgServer) CreateClaim( Claim: &claim, }, nil } + +// finalizeCreateClaimTelemetry defers telemetry calls to be executed after business logic, +// incrementing counters based on whether a new claim was handled successfully. +// Meant to run deferred. +func (k msgServer) finalizeCreateClaimTelemetry(session *sessiontypes.Session, msg *types.MsgCreateClaim, isExistingClaim bool, numRelays, numClaimComputeUnits uint64, err error) { + // Only increment these metrics counters if handling a new claim. + if !isExistingClaim { + serviceId := session.Header.ServiceId + applicationAddress := session.Header.ApplicationAddress + supplierOperatorAddress := msg.GetSupplierOperatorAddress() + + telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + } +} diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index c3d7960cd..5f48a2cc2 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -16,6 +16,7 @@ import ( "github.com/pokt-network/poktroll/telemetry" "github.com/pokt-network/poktroll/x/proof/types" servicekeeper "github.com/pokt-network/poktroll/x/service/keeper" + sessiontypes "github.com/pokt-network/poktroll/x/session/types" sharedtypes "github.com/pokt-network/poktroll/x/shared/types" ) @@ -64,18 +65,7 @@ func (k msgServer) SubmitProof( } // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - // Only increment these metrics counters if handling a new claim. - if !isExistingProof { - serviceId := session.Header.ServiceId - applicationAddress := session.Header.ApplicationAddress - supplierOperatorAddress := msg.GetSupplierOperatorAddress() - - telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) - } - }() + defer k.finalizeSubmitProofTelemetry(session, msg, isExistingProof, numRelays, numClaimComputeUnits, err) if err = k.deductProofSubmissionFee(ctx, msg.GetSupplierOperatorAddress()); err != nil { logger.Error(fmt.Sprintf("failed to deduct proof submission fee: %v", err)) @@ -223,15 +213,7 @@ func (k Keeper) ProofRequirementForClaim(ctx context.Context, claim *types.Claim var requirementReason = types.ProofRequirementReason_NOT_REQUIRED // Defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - telemetry.ProofRequirementCounter( - requirementReason, - claim.SessionHeader.ServiceId, - claim.SessionHeader.ApplicationAddress, - claim.SupplierOperatorAddress, - err, - ) - }() + defer k.finalizeProofRequirementTelemetry(requirementReason, claim, err) proofParams := k.GetParams(ctx) sharedParams := k.sharedKeeper.GetParams(ctx) @@ -332,3 +314,27 @@ func (k Keeper) getProofRequirementSeedBlockHash( // the block that is before the earliest block at which a proof can be committed. return k.sessionKeeper.GetBlockHash(ctx, earliestSupplierProofCommitHeight-1), nil } + +// finalizeSubmitProofTelemetry finalizes telemetry updates for SubmitProof, incrementing counters as needed. +func (k msgServer) finalizeSubmitProofTelemetry(session *sessiontypes.Session, msg *types.MsgSubmitProof, isExistingProof bool, numRelays, numClaimComputeUnits uint64, err error) { + if !isExistingProof { + serviceId := session.Header.ServiceId + applicationAddress := session.Header.ApplicationAddress + supplierOperatorAddress := msg.GetSupplierOperatorAddress() + + telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + } +} + +// finalizeProofRequirementTelemetry finalizes telemetry updates for proof requirements. +func (k Keeper) finalizeProofRequirementTelemetry(requirementReason types.ProofRequirementReason, claim *types.Claim, err error) { + telemetry.ProofRequirementCounter( + requirementReason, + claim.SessionHeader.ServiceId, + claim.SessionHeader.ApplicationAddress, + claim.SupplierOperatorAddress, + err, + ) +} diff --git a/x/supplier/module/abci.go b/x/supplier/module/abci.go index 3ed9de506..f448fd5f0 100644 --- a/x/supplier/module/abci.go +++ b/x/supplier/module/abci.go @@ -10,7 +10,7 @@ import ( // EndBlocker is called every block and handles supplier related updates. func EndBlocker(ctx sdk.Context, k keeper.Keeper) error { - // Telemetry: measure execution time like standard cosmos-sdk modules do that. + // Telemetry: measure the end-block execution time following standard cosmos-sdk practices. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) // TODO_IMPROVE: Add logs and/or telemetry on the number of unbonded suppliers. diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index 9402eb876..9f8a070be 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -171,34 +171,15 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( expiredResult.NumComputeUnits += numClaimComputeUnits // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - applicationAddress := claim.SessionHeader.ApplicationAddress - - telemetry.ClaimCounter( - prooftypes.ClaimProofStage_EXPIRED, - 1, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - telemetry.ClaimRelaysCounter( - prooftypes.ClaimProofStage_EXPIRED, - numClaimRelays, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - telemetry.ClaimComputeUnitsCounter( - prooftypes.ClaimProofStage_EXPIRED, - numClaimComputeUnits, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - }() + defer k.finalizeTelemetry( + prooftypes.ClaimProofStage_EXPIRED, + claim.SessionHeader.ServiceId, + claim.SessionHeader.ApplicationAddress, + claim.SupplierOperatorAddress, + numClaimRelays, + numClaimComputeUnits, + err, + ) continue } @@ -259,34 +240,15 @@ func (k Keeper) SettlePendingClaims(ctx sdk.Context) ( logger.Debug(fmt.Sprintf("Successfully settled claim for session ID %q at block height %d", claim.SessionHeader.SessionId, blockHeight)) // Telemetry - defer telemetry calls so that they reference the final values the relevant variables. - defer func() { - applicationAddress := claim.SessionHeader.ApplicationAddress - - telemetry.ClaimCounter( - prooftypes.ClaimProofStage_SETTLED, - 1, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - telemetry.ClaimRelaysCounter( - prooftypes.ClaimProofStage_SETTLED, - numClaimRelays, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - telemetry.ClaimComputeUnitsCounter( - prooftypes.ClaimProofStage_SETTLED, - numClaimComputeUnits, - claim.SessionHeader.ServiceId, - applicationAddress, - claim.SupplierOperatorAddress, - err, - ) - }() + defer k.finalizeTelemetry( + prooftypes.ClaimProofStage_SETTLED, + claim.SessionHeader.ServiceId, + claim.SessionHeader.ApplicationAddress, + claim.SupplierOperatorAddress, + numClaimRelays, + numClaimComputeUnits, + err, + ) } // Slash all the suppliers that have been marked for slashing slashingCount times. @@ -469,3 +431,18 @@ func (k Keeper) slashSupplierStake( return nil } + +// finalizeTelemetry logs telemetry metrics for a claim based on its stage (e.g., EXPIRED, SETTLED). +func (k Keeper) finalizeTelemetry( + proofStage prooftypes.ClaimProofStage, + serviceId string, + applicationAddress string, + supplierOperatorAddress string, + numRelays uint64, + numClaimComputeUnits uint64, + err error, +) { + telemetry.ClaimCounter(proofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(proofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(proofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) +} diff --git a/x/tokenomics/module/abci.go b/x/tokenomics/module/abci.go index f614548ab..fc0776c68 100644 --- a/x/tokenomics/module/abci.go +++ b/x/tokenomics/module/abci.go @@ -14,7 +14,7 @@ import ( // EndBlocker called at every block and settles all pending claims. func EndBlocker(ctx sdk.Context, k keeper.Keeper) (err error) { - // Telemetry: measure execution time like standard cosmos-sdk modules do that. + // Telemetry: measure the end-block execution time following standard cosmos-sdk practices. defer cosmostelemetry.ModuleMeasureSince(types.ModuleName, cosmostelemetry.Now(), cosmostelemetry.MetricKeyEndBlocker) logger := k.Logger().With("method", "EndBlocker") From fc53e96eb187c9300b18df85da32951e6d7644f9 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 13:36:37 -0700 Subject: [PATCH 28/32] address feedback --- config.yml | 4 ++-- localnet/poktrolld/config/config.toml | 4 ++-- x/proof/keeper/msg_server_submit_proof.go | 2 ++ x/tokenomics/keeper/settle_pending_claims.go | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/config.yml b/config.yml index 91623b55d..e2cabd9a8 100644 --- a/config.yml +++ b/config.yml @@ -99,8 +99,8 @@ validators: config: moniker: "validator1" consensus: - timeout_commit: "5s" - timeout_propose: "5s" + timeout_commit: "2s" + timeout_propose: "2s" instrumentation: prometheus: true log_level: "info" diff --git a/localnet/poktrolld/config/config.toml b/localnet/poktrolld/config/config.toml index 34b8edb5a..bbfa0366f 100644 --- a/localnet/poktrolld/config/config.toml +++ b/localnet/poktrolld/config/config.toml @@ -409,7 +409,7 @@ version = "v0" wal_file = "data/cs.wal/wal" # How long we wait for a proposal block before prevoting nil -timeout_propose = "5s" +timeout_propose = "2s" # How much timeout_propose increases with each round timeout_propose_delta = "5s" # How long we wait after receiving +2/3 prevotes for “anything” (ie. not a single block or nil) @@ -423,7 +423,7 @@ timeout_precommit_delta = "5s" # How long we wait after committing a block, before starting on the new # height (this gives us a chance to receive some more precommits, even # though we already have +2/3). -timeout_commit = "5s" +timeout_commit = "2s" # How many blocks to look back to check existence of the node's consensus votes before joining consensus # When non-zero, the node will panic upon restart diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index 5f48a2cc2..d007e3b88 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -316,6 +316,7 @@ func (k Keeper) getProofRequirementSeedBlockHash( } // finalizeSubmitProofTelemetry finalizes telemetry updates for SubmitProof, incrementing counters as needed. +// Meant to run deferred. func (k msgServer) finalizeSubmitProofTelemetry(session *sessiontypes.Session, msg *types.MsgSubmitProof, isExistingProof bool, numRelays, numClaimComputeUnits uint64, err error) { if !isExistingProof { serviceId := session.Header.ServiceId @@ -329,6 +330,7 @@ func (k msgServer) finalizeSubmitProofTelemetry(session *sessiontypes.Session, m } // finalizeProofRequirementTelemetry finalizes telemetry updates for proof requirements. +// Meant to run deferred. func (k Keeper) finalizeProofRequirementTelemetry(requirementReason types.ProofRequirementReason, claim *types.Claim, err error) { telemetry.ProofRequirementCounter( requirementReason, diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index 9f8a070be..b8e19a5d4 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -433,6 +433,7 @@ func (k Keeper) slashSupplierStake( } // finalizeTelemetry logs telemetry metrics for a claim based on its stage (e.g., EXPIRED, SETTLED). +// Meant to run deferred. func (k Keeper) finalizeTelemetry( proofStage prooftypes.ClaimProofStage, serviceId string, From a247a8e59090bd5c997dfa1a9c879edb2586ec74 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 13:42:12 -0700 Subject: [PATCH 29/32] clarify comments --- telemetry/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 5185196f7..3fd0ed13d 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -8,7 +8,7 @@ import ( ) // globalTelemetryConfig stores poktroll specific telemetry configurations. -// It is set on initialization and does not change for the lifetime of the program. +// This value is initialized only once at the start of the program and remains unchanged throughout its lifetime. var globalTelemetryConfig PoktrollTelemetryConfig // PoktrollTelemetryConfig represents the telemetry protion of the custom poktroll config section in `app.toml`. From 5f693e5f9bccc4f472e98a259060489ca80e405c Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 13:43:29 -0700 Subject: [PATCH 30/32] clarify comments --- telemetry/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 3fd0ed13d..348b73898 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -8,7 +8,7 @@ import ( ) // globalTelemetryConfig stores poktroll specific telemetry configurations. -// This value is initialized only once at the start of the program and remains unchanged throughout its lifetime. +// Set once on initialization and remains constant during runtime. var globalTelemetryConfig PoktrollTelemetryConfig // PoktrollTelemetryConfig represents the telemetry protion of the custom poktroll config section in `app.toml`. From a13c2ec52114de650614a0c7a19bbabd95eb02d3 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 16:43:00 -0700 Subject: [PATCH 31/32] address feedback --- x/tokenomics/keeper/settle_pending_claims.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index 6d7fcda04..c16c55a97 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -480,7 +480,7 @@ func (k Keeper) getApplicationInitialStakeMap( // finalizeTelemetry logs telemetry metrics for a claim based on its stage (e.g., EXPIRED, SETTLED). // Meant to run deferred. func (k Keeper) finalizeTelemetry( - proofStage prooftypes.ClaimProofStage, + claimProofStage prooftypes.ClaimProofStage, serviceId string, applicationAddress string, supplierOperatorAddress string, @@ -488,7 +488,7 @@ func (k Keeper) finalizeTelemetry( numClaimComputeUnits uint64, err error, ) { - telemetry.ClaimCounter(proofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(proofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(proofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimCounter(claimProofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(claimProofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(claimProofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } From 6cd43f0fce41045df6a231eebb7b2d6b78de0312 Mon Sep 17 00:00:00 2001 From: Dmitry K Date: Wed, 30 Oct 2024 17:13:54 -0700 Subject: [PATCH 32/32] fix the cycle --- api/poktroll/tokenomics/event.pulsar.go | 2 +- telemetry/event_counters.go | 18 ++++++++---------- x/proof/keeper/msg_server_create_claim.go | 7 ++++--- x/proof/keeper/msg_server_submit_proof.go | 9 +++++---- x/tokenomics/keeper/settle_pending_claims.go | 6 +++--- x/tokenomics/types/event.pb.go | 2 +- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/api/poktroll/tokenomics/event.pulsar.go b/api/poktroll/tokenomics/event.pulsar.go index cf18db79d..4a8a3c0e5 100644 --- a/api/poktroll/tokenomics/event.pulsar.go +++ b/api/poktroll/tokenomics/event.pulsar.go @@ -3730,7 +3730,7 @@ func (x *EventSupplierSlashed) GetSlashingAmount() *v1beta1.Coin { } // EventApplicationReimbursementRequest is emitted when an application requests -// a reimbursement +// a reimbursement. type EventApplicationReimbursementRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache diff --git a/telemetry/event_counters.go b/telemetry/event_counters.go index c526d44f9..2a36580be 100644 --- a/telemetry/event_counters.go +++ b/telemetry/event_counters.go @@ -10,8 +10,6 @@ import ( "github.com/cosmos/cosmos-sdk/telemetry" "github.com/hashicorp/go-metrics" - - prooftypes "github.com/pokt-network/poktroll/x/proof/types" ) const ( @@ -57,7 +55,7 @@ func EventSuccessCounter( // probabilistic selection, above compute unit threshold). // If err is not nil, the counter is not incremented but Prometheus will ingest this event. func ProofRequirementCounter( - reason prooftypes.ProofRequirementReason, + reason string, serviceId string, applicationAddress string, supplierOperatorAddress string, @@ -69,7 +67,7 @@ func ProofRequirementCounter( incrementAmount := 1 labels := []metrics.Label{ - {Name: "reason", Value: reason.String()}, + {Name: "reason", Value: reason}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) labels = appendHighCardinalityLabels( @@ -94,7 +92,7 @@ func ProofRequirementCounter( // which are represented by on-chain claims at the given ClaimProofStage. // If err is not nil, the counter is not incremented but Prometheus will ingest this event. func ClaimComputeUnitsCounter( - claimProofStage prooftypes.ClaimProofStage, + claimProofStage string, numComputeUnits uint64, serviceId string, applicationAddress string, @@ -107,7 +105,7 @@ func ClaimComputeUnitsCounter( incrementAmount := numComputeUnits labels := []metrics.Label{ - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) labels = appendHighCardinalityLabels( @@ -133,7 +131,7 @@ func ClaimComputeUnitsCounter( // If err is not nil, the counter is not incremented and an "error" label is added // with the error's message. I.e., Prometheus will ingest this event. func ClaimRelaysCounter( - claimProofStage prooftypes.ClaimProofStage, + claimProofStage string, numRelays uint64, serviceId string, applicationAddress string, @@ -146,7 +144,7 @@ func ClaimRelaysCounter( incrementAmount := numRelays labels := []metrics.Label{ - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) labels = appendHighCardinalityLabels( @@ -171,7 +169,7 @@ func ClaimRelaysCounter( // ClaimProofStage. // If err is not nil, the counter is not incremented but Prometheus will ingest this event. func ClaimCounter( - claimProofStage prooftypes.ClaimProofStage, + claimProofStage string, numClaims uint64, serviceId string, applicationAddress string, @@ -184,7 +182,7 @@ func ClaimCounter( incrementAmount := numClaims labels := []metrics.Label{ - {Name: "claim_proof_stage", Value: claimProofStage.String()}, + {Name: "claim_proof_stage", Value: claimProofStage}, } labels = appendMediumCardinalityLabels(labels, toMetricLabel("service_id", serviceId)) diff --git a/x/proof/keeper/msg_server_create_claim.go b/x/proof/keeper/msg_server_create_claim.go index 79fcc8c2c..f95e262b1 100644 --- a/x/proof/keeper/msg_server_create_claim.go +++ b/x/proof/keeper/msg_server_create_claim.go @@ -158,9 +158,10 @@ func (k msgServer) finalizeCreateClaimTelemetry(session *sessiontypes.Session, m serviceId := session.Header.ServiceId applicationAddress := session.Header.ApplicationAddress supplierOperatorAddress := msg.GetSupplierOperatorAddress() + claimProofStage := types.ClaimProofStage_CLAIMED.String() - telemetry.ClaimCounter(types.ClaimProofStage_CLAIMED, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_CLAIMED, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_CLAIMED, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimCounter(claimProofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(claimProofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(claimProofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } } diff --git a/x/proof/keeper/msg_server_submit_proof.go b/x/proof/keeper/msg_server_submit_proof.go index d007e3b88..c19fbd2e6 100644 --- a/x/proof/keeper/msg_server_submit_proof.go +++ b/x/proof/keeper/msg_server_submit_proof.go @@ -322,10 +322,11 @@ func (k msgServer) finalizeSubmitProofTelemetry(session *sessiontypes.Session, m serviceId := session.Header.ServiceId applicationAddress := session.Header.ApplicationAddress supplierOperatorAddress := msg.GetSupplierOperatorAddress() + claimProofStage := types.ClaimProofStage_PROVEN.String() - telemetry.ClaimCounter(types.ClaimProofStage_PROVEN, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(types.ClaimProofStage_PROVEN, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(types.ClaimProofStage_PROVEN, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimCounter(claimProofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(claimProofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(claimProofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } } @@ -333,7 +334,7 @@ func (k msgServer) finalizeSubmitProofTelemetry(session *sessiontypes.Session, m // Meant to run deferred. func (k Keeper) finalizeProofRequirementTelemetry(requirementReason types.ProofRequirementReason, claim *types.Claim, err error) { telemetry.ProofRequirementCounter( - requirementReason, + requirementReason.String(), claim.SessionHeader.ServiceId, claim.SessionHeader.ApplicationAddress, claim.SupplierOperatorAddress, diff --git a/x/tokenomics/keeper/settle_pending_claims.go b/x/tokenomics/keeper/settle_pending_claims.go index c16c55a97..7555563a4 100644 --- a/x/tokenomics/keeper/settle_pending_claims.go +++ b/x/tokenomics/keeper/settle_pending_claims.go @@ -488,7 +488,7 @@ func (k Keeper) finalizeTelemetry( numClaimComputeUnits uint64, err error, ) { - telemetry.ClaimCounter(claimProofStage, 1, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimRelaysCounter(claimProofStage, numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) - telemetry.ClaimComputeUnitsCounter(claimProofStage, numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimCounter(claimProofStage.String(), 1, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimRelaysCounter(claimProofStage.String(), numRelays, serviceId, applicationAddress, supplierOperatorAddress, err) + telemetry.ClaimComputeUnitsCounter(claimProofStage.String(), numClaimComputeUnits, serviceId, applicationAddress, supplierOperatorAddress, err) } diff --git a/x/tokenomics/types/event.pb.go b/x/tokenomics/types/event.pb.go index 8570fd911..5e69f719f 100644 --- a/x/tokenomics/types/event.pb.go +++ b/x/tokenomics/types/event.pb.go @@ -370,7 +370,7 @@ func (m *EventSupplierSlashed) GetSlashingAmount() *types1.Coin { } // EventApplicationReimbursementRequest is emitted when an application requests -// a reimbursement +// a reimbursement. type EventApplicationReimbursementRequest struct { ApplicationAddr string `protobuf:"bytes,1,opt,name=application_addr,json=applicationAddr,proto3" json:"application_addr,omitempty"` SupplierOperatorAddr string `protobuf:"bytes,2,opt,name=supplier_operator_addr,json=supplierOperatorAddr,proto3" json:"supplier_operator_addr,omitempty"`