Skip to content

Commit

Permalink
remote write: increase time threshold for resharding (prometheus#14450)
Browse files Browse the repository at this point in the history
Don't reshard if we haven't successfully sent a sample in the last
shardUpdateDuration seconds.

Signed-off-by: Callum Styan <[email protected]>
Co-authored-by: kushagra Shukla <[email protected]>
  • Loading branch information
cstyan and kushalShukla-web authored Jul 30, 2024
1 parent 84b819a commit 1561815
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
6 changes: 3 additions & 3 deletions storage/remote/queue_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1109,9 +1109,9 @@ func (t *QueueManager) shouldReshard(desiredShards int) bool {
if desiredShards == t.numShards {
return false
}
// We shouldn't reshard if Prometheus hasn't been able to send to the
// remote endpoint successfully within some period of time.
minSendTimestamp := time.Now().Add(-2 * time.Duration(t.cfg.BatchSendDeadline)).Unix()
// We shouldn't reshard if Prometheus hasn't been able to send
// since the last time it checked if it should reshard.
minSendTimestamp := time.Now().Add(-1 * shardUpdateDuration).Unix()
lsts := t.lastSendTimestamp.Load()
if lsts < minSendTimestamp {
level.Warn(t.logger).Log("msg", "Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp)
Expand Down
13 changes: 8 additions & 5 deletions storage/remote/queue_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,32 +703,35 @@ func TestShouldReshard(t *testing.T) {
startingShards int
samplesIn, samplesOut, lastSendTimestamp int64
expectedToReshard bool
sendDeadline model.Duration
}
cases := []testcase{
{
// Resharding shouldn't take place if the last successful send was > batch send deadline*2 seconds ago.
// resharding shouldn't take place if we haven't successfully sent
// since the last shardUpdateDuration, even if the send deadline is very low
startingShards: 10,
samplesIn: 1000,
samplesOut: 10,
lastSendTimestamp: time.Now().Unix() - int64(3*time.Duration(config.DefaultQueueConfig.BatchSendDeadline)/time.Second),
lastSendTimestamp: time.Now().Unix() - int64(shardUpdateDuration),
expectedToReshard: false,
sendDeadline: model.Duration(100 * time.Millisecond),
},
{
startingShards: 5,
startingShards: 10,
samplesIn: 1000,
samplesOut: 10,
lastSendTimestamp: time.Now().Unix(),
expectedToReshard: true,
sendDeadline: config.DefaultQueueConfig.BatchSendDeadline,
},
}

for _, c := range cases {
_, m := newTestClientAndQueueManager(t, defaultFlushDeadline, config.RemoteWriteProtoMsgV1)
_, m := newTestClientAndQueueManager(t, time.Duration(c.sendDeadline), config.RemoteWriteProtoMsgV1)
m.numShards = c.startingShards
m.dataIn.incr(c.samplesIn)
m.dataOut.incr(c.samplesOut)
m.lastSendTimestamp.Store(c.lastSendTimestamp)

m.Start()

desiredShards := m.calculateDesiredShards()
Expand Down

0 comments on commit 1561815

Please sign in to comment.