From e480fafab0640dfc0d32ce237df796c164c62415 Mon Sep 17 00:00:00 2001 From: narrieta Date: Fri, 12 Jan 2024 13:06:39 -0800 Subject: [PATCH 1/2] Don't check Agent log from the top after each test suite --- .../orchestrator/lib/agent_test_suite.py | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 7b2becd59..caa7e54d3 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -573,10 +573,12 @@ def _execute(self) -> None: test_suite_success = False raise + check_log_start_time = datetime.datetime.min + for suite in self._test_suites: log.info("Executing test suite %s", suite.name) self._lisa_log.info("Executing Test Suite %s", suite.name) - case_success = self._execute_test_suite(suite, test_context) + case_success, check_log_start_time = self._execute_test_suite(suite, test_context, check_log_start_time) test_suite_success = case_success and test_suite_success if not case_success: failed_cases.append(suite.name) @@ -611,9 +613,10 @@ def _execute(self) -> None: if not test_suite_success or unexpected_error: raise TestFailedException(self._environment_name, failed_cases) - def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext) -> bool: + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext, check_log_start_time: datetime.datetime) -> Tuple[bool, datetime.datetime]: """ - Executes the given test suite and returns True if all the tests in the suite succeeded. + Executes the given test suite and returns a tuple of a bool indicating whether all the tests in the suite succeeded, and the timestamp that should be used + for the next check of the agent log. """ suite_name = suite.name suite_full_name = f"{suite_name}-{self._environment_name}" @@ -631,7 +634,6 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte summary: List[str] = [] ignore_error_rules: List[Dict[str, Any]] = [] - before_timestamp = datetime.datetime.min for test in suite.tests: test_full_name = f"{suite_name}-{test.name}" @@ -705,13 +707,11 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte ignore_error_rules.extend(test_instance.get_ignore_error_rules()) - # If the test has a timestamp before which errors should be ignored in the agent log, use that timestamp - # if multiple tests have this setting, use the earliest timestamp - if test_instance.get_ignore_errors_before_timestamp() != datetime.datetime.min: - if before_timestamp != datetime.datetime.min: - before_timestamp = min(before_timestamp, test_instance.get_ignore_errors_before_timestamp()) - else: - before_timestamp = test_instance.get_ignore_errors_before_timestamp() + # Check if the test is requesting to override the timestamp for the agent log check. + # Note that if multiple tests in the suite provide an override, we'll use the earliest timestamp. + test_check_log_start_time = test_instance.get_ignore_errors_before_timestamp() + if test_check_log_start_time != datetime.datetime.min: + check_log_start_time = min(check_log_start_time, test_check_log_start_time) if not test_success and test.blocks_suite: log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) @@ -737,11 +737,12 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, before_timestamp) + next_check_log_start_time = datetime.datetime.utcnow() + suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, check_log_start_time) - return suite_success + return suite_success, next_check_log_start_time - def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]], before_timestamp: datetime) -> bool: + def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]], check_log_start_time: datetime.datetime) -> bool: """ Checks the agent log on the test nodes for errors; returns true on success (no errors in the logs) """ @@ -759,22 +760,15 @@ def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any] start_time: datetime.datetime = datetime.datetime.now() try: - self._lisa_log.info("Checking agent log on the test node %s", node_name) - log.info("Checking agent log on the test node %s", node_name) + message = f"Checking agent log on test node {node_name}, starting at {check_log_start_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}" + self._lisa_log.info(message) + log.info(message) output = ssh_client.run_command("check-agent-log.py -j") errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) - # Individual tests may have rules to ignore known errors; filter those out - if len(ignore_error_rules) > 0: - new = [] - for e in errors: - # Ignore errors that occurred before the timestamp - if e.timestamp < before_timestamp: - continue - if not AgentLog.matches_ignore_rule(e, ignore_error_rules): - new.append(e) - errors = new + # Filter out errors that occurred before the starting timestamp or that match an ignore rule + errors = [e for e in errors if e.timestamp >= check_log_start_time and (len(ignore_error_rules) == 0 or not AgentLog.matches_ignore_rule(e, ignore_error_rules))] if len(errors) == 0: # If no errors, we are done; don't create a log or test result. From 4cb562f1cd25ffbac596394c27c3f78a2059d1cd Mon Sep 17 00:00:00 2001 From: narrieta Date: Tue, 16 Jan 2024 16:31:06 -0800 Subject: [PATCH 2/2] fix initialization of override --- tests_e2e/orchestrator/lib/agent_test_suite.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index caa7e54d3..2a7241d78 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -621,6 +621,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte suite_name = suite.name suite_full_name = f"{suite_name}-{self._environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() + check_log_start_time_override = datetime.datetime.max # tests can override the timestamp for the agent log check with the get_ignore_errors_before_timestamp() method with set_thread_name(suite_full_name): # The thread name is added to the LISA log log_path: Path = self._log_path / f"{suite_full_name}.log" @@ -711,7 +712,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte # Note that if multiple tests in the suite provide an override, we'll use the earliest timestamp. test_check_log_start_time = test_instance.get_ignore_errors_before_timestamp() if test_check_log_start_time != datetime.datetime.min: - check_log_start_time = min(check_log_start_time, test_check_log_start_time) + check_log_start_time_override = min(check_log_start_time_override, test_check_log_start_time) if not test_success and test.blocks_suite: log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) @@ -738,7 +739,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte self._mark_log_as_failed() next_check_log_start_time = datetime.datetime.utcnow() - suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, check_log_start_time) + suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, check_log_start_time_override if check_log_start_time_override != datetime.datetime.max else check_log_start_time) return suite_success, next_check_log_start_time