Skip to content

Commit

Permalink
Fixing matching:TestCheckIdleTaskList test flackiness
Browse files Browse the repository at this point in the history
For some reason it almost always fails in IDE (4/5 cases, goland),
but passes in console. At the same time, the test clearly had issues:
 - it's been 3 subtests in one
 - only the first subtest checked task-list became stopped when unused
 - the third subtest (the one which has been failing) never succeeded
   with `tlm.AddTask` since writer never started - instead of tlm.Start()
   we used special version which supposed to mimic it, but never started
   the writer.

The test is still problematic since it continues using time.Sleep all
around, but so do all the other tests for matching :-(

More comments and error-messages also bring more clarity to what the
test is actually for.
  • Loading branch information
dkrotx committed Dec 18, 2023
1 parent 59f540d commit c7d4ed1
Showing 1 changed file with 68 additions and 58 deletions.
126 changes: 68 additions & 58 deletions service/matching/taskListManager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ package matching
import (
"context"
"sync"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -219,68 +218,78 @@ func TestDescribeTaskList(t *testing.T) {
require.Zero(t, taskListStatus.GetBacklogCountHint())
}

func tlMgrStartWithoutNotifyEvent(tlm *taskListManagerImpl) {
// mimic tlm.Start() but avoid calling notifyEvent
tlm.liveness.Start()
tlm.startWG.Done()
go tlm.taskReader.dispatchBufferedTasks(defaultTaskBufferIsolationGroup)
go tlm.taskReader.getTasksPump()
}

func TestCheckIdleTaskList(t *testing.T) {
controller := gomock.NewController(t)
logger := testlogger.New(t)

cfg := NewConfig(dynamicconfig.NewNopCollection(), "some random hostname")
cfg.IdleTasklistCheckInterval = dynamicconfig.GetDurationPropertyFnFilteredByTaskListInfo(10 * time.Millisecond)
logger := testlogger.New(t)

// Idle
tlm := createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(20 * time.Millisecond)
require.False(t, atomic.CompareAndSwapInt32(&tlm.stopped, 0, 1))

// Active poll-er
tlm = createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Microsecond)
_, _ = tlm.GetTask(ctx, nil)
cancel()
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)
tlm.Stop()
require.Equal(t, int32(1), tlm.stopped)
t.Run("Idle task-list", func(t *testing.T) {
ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(logger, ctrl, cfg)
require.NoError(t, tlm.Start())

// Active adding task
domainID := uuid.New()
workflowID := "some random workflowID"
runID := "some random runID"
require.Equal(t, int32(0), tlm.stopped, "idle check interval had not passed yet")
time.Sleep(20 * time.Millisecond)
require.Equal(t, int32(1), tlm.stopped, "idle check interval should have pass")
})

addTaskParam := addTaskParams{
execution: &types.WorkflowExecution{
WorkflowID: workflowID,
RunID: runID,
},
taskInfo: &persistence.TaskInfo{
DomainID: domainID,
WorkflowID: workflowID,
RunID: runID,
ScheduleID: 2,
ScheduleToStartTimeout: 5,
CreatedTime: time.Now(),
},
}
tlm = createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(8 * time.Millisecond)
ctx, cancel = context.WithTimeout(context.Background(), time.Microsecond)
_, _ = tlm.AddTask(ctx, addTaskParam)
cancel()
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)
tlm.Stop()
require.Equal(t, int32(1), tlm.stopped)
t.Run("Active poll-er", func(t *testing.T) {
ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(logger, ctrl, cfg)
require.NoError(t, tlm.Start())

time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, _ = tlm.GetTask(ctx, nil)
cancel()

// task list manager should have been stopped,
// but GetTask extends auto-stop until the next check-idle-task-list-interval
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)

time.Sleep(20 * time.Millisecond)
require.Equal(t, int32(1), tlm.stopped, "check-idle-task-list definitely passed")
})

t.Run("Active adding task", func(t *testing.T) {
domainID := uuid.New()
workflowID := uuid.New()
runID := uuid.New()

addTaskParam := addTaskParams{
execution: &types.WorkflowExecution{
WorkflowID: workflowID,
RunID: runID,
},
taskInfo: &persistence.TaskInfo{
DomainID: domainID,
WorkflowID: workflowID,
RunID: runID,
ScheduleID: 2,
ScheduleToStartTimeout: 5,
CreatedTime: time.Now(),
},
}

ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(logger, ctrl, cfg)
require.NoError(t, tlm.Start())

time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := tlm.AddTask(ctx, addTaskParam)
require.NoError(t, err)
cancel()

// task list manager should have been stopped,
// but AddTask extends auto-stop until the next check-idle-task-list-interval
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)

time.Sleep(20 * time.Millisecond)
require.Equal(t, int32(1), tlm.stopped, "check-idle-task-list definitely passed")
})
}

func TestAddTaskStandby(t *testing.T) {
Expand All @@ -291,7 +300,8 @@ func TestAddTaskStandby(t *testing.T) {
cfg.IdleTasklistCheckInterval = dynamicconfig.GetDurationPropertyFnFilteredByTaskListInfo(10 * time.Millisecond)

tlm := createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
require.NoError(t, tlm.Start())

// stop taskWriter so that we can check if there's any call to it
// otherwise the task persist process is async and hard to test
tlm.taskWriter.Stop()
Expand Down

0 comments on commit c7d4ed1

Please sign in to comment.