-
Notifications
You must be signed in to change notification settings - Fork 375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support to execute tests on multiple distros #2725
Changes from all commits
c257464
2a3c87e
7d4200d
d681153
4b6550f
dad59f7
19aa023
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,14 +23,20 @@ RUN \ | |
# \ | ||
# Install basic dependencies \ | ||
# \ | ||
apt-get install -y git python3.10 python3.10-dev curl && \ | ||
apt-get install -y git python3.10 python3.10-dev && \ | ||
ln /usr/bin/python3.10 /usr/bin/python3 && \ | ||
\ | ||
# \ | ||
# Install LISA dependencies \ | ||
# \ | ||
apt-get install -y git gcc libgirepository1.0-dev libcairo2-dev qemu-utils libvirt-dev \ | ||
python3-pip python3-venv && \ | ||
\ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latest Ubuntu image on Dockerhub doesn't have zip, so installing explicitly |
||
# \ | ||
# Install test dependencies \ | ||
# \ | ||
apt-get install -y zip && \ | ||
\ | ||
# \ | ||
# Create user waagent, which is used to execute the tests \ | ||
# \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,11 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from assertpy import assert_that | ||
import re | ||
|
||
from assertpy import fail | ||
from pathlib import Path | ||
from shutil import rmtree | ||
from threading import current_thread, RLock | ||
from typing import List, Type | ||
|
||
# Disable those warnings, since 'lisa' is an external, non-standard, dependency | ||
|
@@ -40,121 +42,145 @@ | |
from tests_e2e.scenarios.lib.logging import log | ||
|
||
|
||
class AgentLisaTestContext(AgentTestContext): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made this class local to AgentTestSuite |
||
""" | ||
Execution context for LISA tests. | ||
""" | ||
def __init__(self, vm: VmIdentifier, node: Node): | ||
super().__init__( | ||
vm=vm, | ||
paths=AgentTestContext.Paths(remote_working_directory=Path('/home')/node.connection_info['username']), | ||
connection=AgentTestContext.Connection( | ||
ip_address=node.connection_info['address'], | ||
username=node.connection_info['username'], | ||
private_key_file=node.connection_info['private_key_file'], | ||
ssh_port=node.connection_info['port']) | ||
) | ||
self._node = node | ||
|
||
@property | ||
def node(self) -> Node: | ||
return self._node | ||
|
||
|
||
class AgentTestSuite(TestSuite): | ||
""" | ||
Base class for Agent test suites. It provides facilities for setup, execution of tests and reporting results. Derived | ||
classes use the execute() method to run the tests in their corresponding suites. | ||
""" | ||
|
||
class _Context(AgentTestContext): | ||
def __init__(self, vm: VmIdentifier, paths: AgentTestContext.Paths, connection: AgentTestContext.Connection): | ||
super().__init__(vm=vm, paths=paths, connection=connection) | ||
# These are initialized by AgentTestSuite._set_context(). | ||
self.node: Node = None | ||
self.runbook_name: str = None | ||
self.suite_name: str = None | ||
|
||
def __init__(self, metadata: TestSuiteMetadata) -> None: | ||
super().__init__(metadata) | ||
# The context is initialized by execute() | ||
self.__context: AgentLisaTestContext = None | ||
|
||
@property | ||
def context(self) -> AgentLisaTestContext: | ||
if self.__context is None: | ||
raise Exception("The context for the AgentTestSuite has not been initialized") | ||
return self.__context | ||
# The context is initialized by _set_context() via the call to execute() | ||
self.__context: AgentTestSuite._Context = None | ||
|
||
def _set_context(self, node: Node): | ||
connection_info = node.connection_info | ||
node_context = get_node_context(node) | ||
|
||
runbook = node.capability.get_extended_runbook(AzureNodeSchema, AZURE) | ||
# Remove the resource group and node suffix, e.g. "e1-n0" in "lisa-20230110-162242-963-e1-n0" | ||
runbook_name = re.sub(r"-\w+-\w+$", "", runbook.name) | ||
|
||
self.__context = AgentLisaTestContext( | ||
VmIdentifier( | ||
self.__context = AgentTestSuite._Context( | ||
vm=VmIdentifier( | ||
location=runbook.location, | ||
subscription=node.features._platform.subscription_id, | ||
resource_group=node_context.resource_group_name, | ||
name=node_context.vm_name | ||
), | ||
node | ||
) | ||
name=node_context.vm_name), | ||
paths=AgentTestContext.Paths( | ||
# The runbook name is unique on each run, so we will use different working directory every time | ||
working_directory=Path().home()/"tmp"/runbook_name, | ||
remote_working_directory=Path('/home')/connection_info['username']), | ||
connection=AgentTestContext.Connection( | ||
ip_address=connection_info['address'], | ||
username=connection_info['username'], | ||
private_key_file=connection_info['private_key_file'], | ||
ssh_port=connection_info['port'])) | ||
|
||
def _setup(self) -> None: | ||
""" | ||
Prepares the test suite for execution | ||
""" | ||
log.info("Test Node: %s", self.context.vm.name) | ||
log.info("Resource Group: %s", self.context.vm.resource_group) | ||
log.info("Working directory: %s", self.context.working_directory) | ||
self.__context.node = node | ||
self.__context.suite_name = f"{self._metadata.full_name}:{runbook.marketplace.offer}-{runbook.marketplace.sku}" | ||
|
||
if self.context.working_directory.exists(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cleanup of the working directory was getting in the way of debugging, since sometimes one needs to check its contents. Instead of cleaning up, now I use a unique working directory per runbook execution. The cleanup was mainly targeted to the developer scenario, so with these changes development machines may start accumulating many test files. That can be handled by our helper scripts. |
||
log.info("Removing existing working directory: %s", self.context.working_directory) | ||
try: | ||
rmtree(self.context.working_directory.as_posix()) | ||
except Exception as exception: | ||
log.warning("Failed to remove the working directory: %s", exception) | ||
self.context.working_directory.mkdir() | ||
@property | ||
def context(self): | ||
if self.__context is None: | ||
raise Exception("The context for the AgentTestSuite has not been initialized") | ||
return self.__context | ||
|
||
def _clean_up(self) -> None: | ||
# | ||
# Test suites within the same runbook may be executed concurrently, and setup needs to be done only once. | ||
# We use this lock to allow only 1 thread to do the setup. Setup completion is marked using the 'completed' | ||
# file: the thread doing the setup creates the file and threads that find that the file already exists | ||
# simply skip setup. | ||
# | ||
_setup_lock = RLock() | ||
|
||
def _setup(self) -> None: | ||
""" | ||
Cleans up any leftovers from the test suite run. | ||
Prepares the test suite for execution (currently, it just builds the agent package) | ||
|
||
Returns the path to the agent package. | ||
""" | ||
AgentTestSuite._setup_lock.acquire() | ||
|
||
try: | ||
log.info("Removing %s", self.context.working_directory) | ||
rmtree(self.context.working_directory.as_posix(), ignore_errors=True) | ||
except: # pylint: disable=bare-except | ||
log.exception("Failed to cleanup the test run") | ||
log.info("") | ||
log.info("**************************************** [Build] ****************************************") | ||
log.info("") | ||
completed: Path = self.context.working_directory/"completed" | ||
|
||
def _setup_node(self) -> None: | ||
""" | ||
Prepares the remote node for executing the test suite. | ||
""" | ||
agent_package_path = self._build_agent_package() | ||
self._install_agent_on_node(agent_package_path) | ||
if completed.exists(): | ||
log.info("Found %s. Build has already been done, skipping", completed) | ||
return | ||
|
||
log.info("Creating working directory: %s", self.context.working_directory) | ||
self.context.working_directory.mkdir(parents=True) | ||
self._build_agent_package() | ||
|
||
def _build_agent_package(self) -> Path: | ||
log.info("Completed setup, creating %s", completed) | ||
completed.touch() | ||
|
||
finally: | ||
AgentTestSuite._setup_lock.release() | ||
|
||
def _build_agent_package(self) -> None: | ||
""" | ||
Builds the agent package and returns the path to the package. | ||
""" | ||
build_path = self.context.working_directory/"build" | ||
|
||
log.info("Building agent package to %s", build_path) | ||
log.info("Building agent package to %s", self.context.working_directory) | ||
|
||
makepkg.run(agent_family="Test", output_directory=str(build_path), log=log) | ||
makepkg.run(agent_family="Test", output_directory=str(self.context.working_directory), log=log) | ||
|
||
package_path = build_path/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" | ||
package_path: Path = self._get_agent_package_path() | ||
if not package_path.exists(): | ||
raise Exception(f"Can't find the agent package at {package_path}") | ||
|
||
log.info("Agent package: %s", package_path) | ||
log.info("Built agent package as %s", package_path) | ||
|
||
def _get_agent_package_path(self) -> Path: | ||
""" | ||
Returns the path to the agent package. | ||
""" | ||
return self.context.working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" | ||
|
||
return package_path | ||
def _clean_up(self) -> None: | ||
""" | ||
Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use. | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cleanup used to remove the working directory. Since this is part of the protocol enforced by self.execute(), I left it here as an empty method in case we need it in the future. |
||
|
||
def _install_agent_on_node(self, agent_package: Path) -> None: | ||
def _setup_node(self) -> None: | ||
""" | ||
Prepares the remote node for executing the test suite. | ||
""" | ||
log.info("") | ||
log.info("************************************** [Node Setup] **************************************") | ||
log.info("") | ||
log.info("Test Node: %s", self.context.vm.name) | ||
log.info("Resource Group: %s", self.context.vm.resource_group) | ||
log.info("") | ||
|
||
self._install_agent_on_node() | ||
|
||
def _install_agent_on_node(self) -> None: | ||
""" | ||
Installs the given agent package on the test node. | ||
""" | ||
agent_package_path: Path = self._get_agent_package_path() | ||
|
||
# The install script needs to unzip the agent package; ensure unzip is installed on the test node | ||
log.info("Installing unzip tool on %s", self.context.node.name) | ||
self.context.node.os.install_packages("unzip") | ||
|
||
log.info("Installing %s on %s", agent_package, self.context.node.name) | ||
agent_package_remote_path = self.context.remote_working_directory / agent_package.name | ||
log.info("Copying %s to %s:%s", agent_package, self.context.node.name, agent_package_remote_path) | ||
self.context.node.shell.copy(agent_package, agent_package_remote_path) | ||
log.info("Installing %s on %s", agent_package_path, self.context.node.name) | ||
agent_package_remote_path = self.context.remote_working_directory/agent_package_path.name | ||
log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_remote_path) | ||
self.context.node.shell.copy(agent_package_path, agent_package_remote_path) | ||
self.execute_script_on_node( | ||
self.context.test_source_directory/"orchestrator"/"scripts"/"install-agent", | ||
parameters=f"--package {agent_package_remote_path} --version {AGENT_VERSION}", | ||
|
@@ -172,8 +198,8 @@ def _collect_node_logs(self) -> None: | |
self.execute_script_on_node(self.context.test_source_directory/"orchestrator"/"scripts"/"collect-logs", sudo=True) | ||
|
||
# Copy the tarball to the local logs directory | ||
remote_path = self.context.remote_working_directory / "logs.tgz" | ||
local_path = Path.home()/'logs'/'vm-logs-{0}.tgz'.format(self.context.node.name) | ||
remote_path = "/tmp/waagent-logs.tgz" | ||
local_path = Path.home()/'logs'/'vm-logs-{0}.tgz'.format(self.context.suite_name) | ||
log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path) | ||
self.context.node.shell.copy_back(remote_path, local_path) | ||
except: # pylint: disable=bare-except | ||
|
@@ -186,12 +212,11 @@ def execute(self, node: Node, test_suite: List[Type[AgentTest]]) -> None: | |
""" | ||
self._set_context(node) | ||
|
||
log.info("") | ||
log.info("**************************************** [Setup] ****************************************") | ||
log.info("") | ||
|
||
failed: List[str] = [] | ||
|
||
thread_name = current_thread().name | ||
current_thread().name = self.context.suite_name | ||
|
||
try: | ||
self._setup() | ||
|
||
|
@@ -233,15 +258,21 @@ def execute(self, node: Node, test_suite: List[Type[AgentTest]]) -> None: | |
for r in results: | ||
log.info("\t%s", r) | ||
log.info("") | ||
|
||
finally: | ||
self._collect_node_logs() | ||
|
||
except: # pylint: disable=bare-except | ||
# Log the error here so the it is decorated with the thread name, then re-raise | ||
log.exception("Test suite failed") | ||
raise | ||
|
||
finally: | ||
self._clean_up() | ||
current_thread().name = thread_name | ||
|
||
# Fail the entire test suite if any test failed | ||
assert_that(failed).described_as("One or more tests failed").is_length(0) | ||
if len(failed) > 0: | ||
fail(f"{[self.context.suite_name]} One or more tests failed: {failed}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using fail() instead of assert)that() improves the formatting of the error message a little bit. |
||
|
||
def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: bool = False) -> int: | ||
""" | ||
|
@@ -259,17 +290,16 @@ def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: | |
|
||
result = custom_script.run(parameters=parameters, sudo=sudo) | ||
|
||
if result.exit_code != 0: | ||
output = result.stdout if result.stderr == "" else f"{result.stdout}\n{result.stderr}" | ||
raise Exception(f"[{command_line}] failed:\n{output}") | ||
|
||
if result.stdout != "": | ||
separator = "\n" if "\n" in result.stdout else " " | ||
log.info("stdout:%s%s", separator, result.stdout) | ||
if result.stderr != "": | ||
separator = "\n" if "\n" in result.stderr else " " | ||
log.error("stderr:%s%s", separator, result.stderr) | ||
|
||
if result.exit_code != 0: | ||
raise Exception(f"[{command_line}] failed. Exit code: {result.exit_code}") | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to raise the exception after logging the command's output |
||
return result.exit_code | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,24 +2,33 @@ | |
# | ||
# Collects the logs needed to debug agent issues into a compressed tarball. | ||
# | ||
set -euxo pipefail | ||
|
||
logs_file_name="$HOME/logs.tgz" | ||
# Note that we do "set -euxo pipefail" only after executing "tar". That command exits with code 1 on warnings | ||
# and we do not want to consider those as failures. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what was preventing log collection on warnings |
||
logs_file_name="/tmp/waagent-logs.tgz" | ||
|
||
echo "Collecting logs to $logs_file_name ..." | ||
|
||
tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='mdsd' --exclude='scx*' \ | ||
--exclude='*.so' --exclude='*__LinuxDiagnostic__*' --exclude='*.zip' --exclude='*.deb' --exclude='*.rpm' \ | ||
--warning=no-file-changed \ | ||
-czf "$logs_file_name" \ | ||
/var/log \ | ||
/var/lib/waagent/ \ | ||
/etc/waagent.conf | ||
|
||
# tar exits with 1 on warnings; ignore those | ||
set -euxo pipefail | ||
|
||
# Ignore warnings (exit code 1) | ||
exit_code=$? | ||
if [ "$exit_code" != "1" ] && [ "$exit_code" != "0" ]; then | ||
|
||
if [ "$exit_code" == "1" ]; then | ||
echo "WARNING: tar exit code is 1" | ||
elif [ "$exit_code" != "0" ]; then | ||
exit $exit_code | ||
fi | ||
|
||
chmod +r "$logs_file_name" | ||
chmod a+r "$logs_file_name" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "a" (i.e. "all") needs to be given explicitly, since Mariner's UMASK turns the read permission off |
||
|
||
ls -l "$logs_file_name" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curl was previously use to download Poetry in order to setup LISA, but their new installation is not based on Poetry so curl is no longer needed