Skip to content

Commit

Permalink
Fixing matching:TestCheckIdleTaskList test flackiness (#5494)
Browse files Browse the repository at this point in the history
For some reason it almost always fails in IDE (4/5 cases, goland),
but passes in console. At the same time, the test clearly had issues:
 - it's been 3 subtests in one
 - only the first subtest checked task-list became stopped when unused
 - the third subtest (the one which has been failing) never succeeded
   with `tlm.AddTask` since writer never started - instead of tlm.Start()
   we used special version which supposed to mimic it, but never started
   the writer.

The test is still problematic since it continues using time.Sleep all
around, but so do all the other tests for matching :-(

More comments and error-messages also bring more clarity to what the
test is actually for.
  • Loading branch information
dkrotx authored Dec 19, 2023
1 parent e869984 commit 2829cbb
Showing 1 changed file with 67 additions and 57 deletions.
124 changes: 67 additions & 57 deletions service/matching/taskListManager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,68 +219,77 @@ func TestDescribeTaskList(t *testing.T) {
require.Zero(t, taskListStatus.GetBacklogCountHint())
}

func tlMgrStartWithoutNotifyEvent(tlm *taskListManagerImpl) {
// mimic tlm.Start() but avoid calling notifyEvent
tlm.liveness.Start()
tlm.startWG.Done()
go tlm.taskReader.dispatchBufferedTasks(defaultTaskBufferIsolationGroup)
go tlm.taskReader.getTasksPump()
}

func TestCheckIdleTaskList(t *testing.T) {
controller := gomock.NewController(t)
logger := testlogger.New(t)

cfg := NewConfig(dynamicconfig.NewNopCollection(), "some random hostname")
cfg.IdleTasklistCheckInterval = dynamicconfig.GetDurationPropertyFnFilteredByTaskListInfo(10 * time.Millisecond)

// Idle
tlm := createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(20 * time.Millisecond)
require.False(t, atomic.CompareAndSwapInt32(&tlm.stopped, 0, 1))

// Active poll-er
tlm = createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Microsecond)
_, _ = tlm.GetTask(ctx, nil)
cancel()
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)
tlm.Stop()
require.Equal(t, int32(1), tlm.stopped)
t.Run("Idle task-list", func(t *testing.T) {
ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(testlogger.New(t), ctrl, cfg)
require.NoError(t, tlm.Start())

// Active adding task
domainID := uuid.New()
workflowID := "some random workflowID"
runID := "some random runID"
require.EqualValues(t, 0, atomic.LoadInt32(&tlm.stopped), "idle check interval had not passed yet")
time.Sleep(20 * time.Millisecond)
require.EqualValues(t, 1, atomic.LoadInt32(&tlm.stopped), "idle check interval should have pass")
})

addTaskParam := addTaskParams{
execution: &types.WorkflowExecution{
WorkflowID: workflowID,
RunID: runID,
},
taskInfo: &persistence.TaskInfo{
DomainID: domainID,
WorkflowID: workflowID,
RunID: runID,
ScheduleID: 2,
ScheduleToStartTimeout: 5,
CreatedTime: time.Now(),
},
}
tlm = createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
time.Sleep(8 * time.Millisecond)
ctx, cancel = context.WithTimeout(context.Background(), time.Microsecond)
_, _ = tlm.AddTask(ctx, addTaskParam)
cancel()
time.Sleep(6 * time.Millisecond)
require.Equal(t, int32(0), tlm.stopped)
tlm.Stop()
require.Equal(t, int32(1), tlm.stopped)
t.Run("Active poll-er", func(t *testing.T) {
ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(testlogger.New(t), ctrl, cfg)
require.NoError(t, tlm.Start())

time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, _ = tlm.GetTask(ctx, nil)
cancel()

// task list manager should have been stopped,
// but GetTask extends auto-stop until the next check-idle-task-list-interval
time.Sleep(6 * time.Millisecond)
require.EqualValues(t, 0, atomic.LoadInt32(&tlm.stopped))

time.Sleep(20 * time.Millisecond)
require.EqualValues(t, 1, atomic.LoadInt32(&tlm.stopped), "idle check interval should have pass")
})

t.Run("Active adding task", func(t *testing.T) {
domainID := uuid.New()
workflowID := uuid.New()
runID := uuid.New()

addTaskParam := addTaskParams{
execution: &types.WorkflowExecution{
WorkflowID: workflowID,
RunID: runID,
},
taskInfo: &persistence.TaskInfo{
DomainID: domainID,
WorkflowID: workflowID,
RunID: runID,
ScheduleID: 2,
ScheduleToStartTimeout: 5,
CreatedTime: time.Now(),
},
}

ctrl := gomock.NewController(t)
tlm := createTestTaskListManagerWithConfig(testlogger.New(t), ctrl, cfg)
require.NoError(t, tlm.Start())

time.Sleep(8 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := tlm.AddTask(ctx, addTaskParam)
require.NoError(t, err)
cancel()

// task list manager should have been stopped,
// but AddTask extends auto-stop until the next check-idle-task-list-interval
time.Sleep(6 * time.Millisecond)
require.EqualValues(t, 0, atomic.LoadInt32(&tlm.stopped))

time.Sleep(20 * time.Millisecond)
require.EqualValues(t, 1, atomic.LoadInt32(&tlm.stopped), "idle check interval should have pass")
})
}

func TestAddTaskStandby(t *testing.T) {
Expand All @@ -291,7 +300,8 @@ func TestAddTaskStandby(t *testing.T) {
cfg.IdleTasklistCheckInterval = dynamicconfig.GetDurationPropertyFnFilteredByTaskListInfo(10 * time.Millisecond)

tlm := createTestTaskListManagerWithConfig(logger, controller, cfg)
tlMgrStartWithoutNotifyEvent(tlm)
require.NoError(t, tlm.Start())

// stop taskWriter so that we can check if there's any call to it
// otherwise the task persist process is async and hard to test
tlm.taskWriter.Stop()
Expand Down

0 comments on commit 2829cbb

Please sign in to comment.