diff --git a/tests_e2e/docker/Dockerfile b/tests_e2e/docker/Dockerfile index 0489d3907..752aa4ff2 100644 --- a/tests_e2e/docker/Dockerfile +++ b/tests_e2e/docker/Dockerfile @@ -23,7 +23,7 @@ RUN \ # \ # Install basic dependencies \ # \ - apt-get install -y git python3.10 python3.10-dev curl && \ + apt-get install -y git python3.10 python3.10-dev && \ ln /usr/bin/python3.10 /usr/bin/python3 && \ \ # \ @@ -31,6 +31,12 @@ RUN \ # \ apt-get install -y git gcc libgirepository1.0-dev libcairo2-dev qemu-utils libvirt-dev \ python3-pip python3-venv && \ + \ + # \ + # Install test dependencies \ + # \ + apt-get install -y zip && \ + \ # \ # Create user waagent, which is used to execute the tests \ # \ diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index c4d94a242..ee2f511a6 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -14,9 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from assertpy import assert_that +import re + +from assertpy import fail from pathlib import Path -from shutil import rmtree +from threading import current_thread, RLock from typing import List, Type # Disable those warnings, since 'lisa' is an external, non-standard, dependency @@ -40,121 +42,145 @@ from tests_e2e.scenarios.lib.logging import log -class AgentLisaTestContext(AgentTestContext): - """ - Execution context for LISA tests. - """ - def __init__(self, vm: VmIdentifier, node: Node): - super().__init__( - vm=vm, - paths=AgentTestContext.Paths(remote_working_directory=Path('/home')/node.connection_info['username']), - connection=AgentTestContext.Connection( - ip_address=node.connection_info['address'], - username=node.connection_info['username'], - private_key_file=node.connection_info['private_key_file'], - ssh_port=node.connection_info['port']) - ) - self._node = node - - @property - def node(self) -> Node: - return self._node - - class AgentTestSuite(TestSuite): """ Base class for Agent test suites. It provides facilities for setup, execution of tests and reporting results. Derived classes use the execute() method to run the tests in their corresponding suites. """ + + class _Context(AgentTestContext): + def __init__(self, vm: VmIdentifier, paths: AgentTestContext.Paths, connection: AgentTestContext.Connection): + super().__init__(vm=vm, paths=paths, connection=connection) + # These are initialized by AgentTestSuite._set_context(). + self.node: Node = None + self.runbook_name: str = None + self.suite_name: str = None + def __init__(self, metadata: TestSuiteMetadata) -> None: super().__init__(metadata) - # The context is initialized by execute() - self.__context: AgentLisaTestContext = None - - @property - def context(self) -> AgentLisaTestContext: - if self.__context is None: - raise Exception("The context for the AgentTestSuite has not been initialized") - return self.__context + # The context is initialized by _set_context() via the call to execute() + self.__context: AgentTestSuite._Context = None def _set_context(self, node: Node): + connection_info = node.connection_info node_context = get_node_context(node) - runbook = node.capability.get_extended_runbook(AzureNodeSchema, AZURE) + # Remove the resource group and node suffix, e.g. "e1-n0" in "lisa-20230110-162242-963-e1-n0" + runbook_name = re.sub(r"-\w+-\w+$", "", runbook.name) - self.__context = AgentLisaTestContext( - VmIdentifier( + self.__context = AgentTestSuite._Context( + vm=VmIdentifier( location=runbook.location, subscription=node.features._platform.subscription_id, resource_group=node_context.resource_group_name, - name=node_context.vm_name - ), - node - ) + name=node_context.vm_name), + paths=AgentTestContext.Paths( + # The runbook name is unique on each run, so we will use different working directory every time + working_directory=Path().home()/"tmp"/runbook_name, + remote_working_directory=Path('/home')/connection_info['username']), + connection=AgentTestContext.Connection( + ip_address=connection_info['address'], + username=connection_info['username'], + private_key_file=connection_info['private_key_file'], + ssh_port=connection_info['port'])) - def _setup(self) -> None: - """ - Prepares the test suite for execution - """ - log.info("Test Node: %s", self.context.vm.name) - log.info("Resource Group: %s", self.context.vm.resource_group) - log.info("Working directory: %s", self.context.working_directory) + self.__context.node = node + self.__context.suite_name = f"{self._metadata.full_name}:{runbook.marketplace.offer}-{runbook.marketplace.sku}" - if self.context.working_directory.exists(): - log.info("Removing existing working directory: %s", self.context.working_directory) - try: - rmtree(self.context.working_directory.as_posix()) - except Exception as exception: - log.warning("Failed to remove the working directory: %s", exception) - self.context.working_directory.mkdir() + @property + def context(self): + if self.__context is None: + raise Exception("The context for the AgentTestSuite has not been initialized") + return self.__context - def _clean_up(self) -> None: + # + # Test suites within the same runbook may be executed concurrently, and setup needs to be done only once. + # We use this lock to allow only 1 thread to do the setup. Setup completion is marked using the 'completed' + # file: the thread doing the setup creates the file and threads that find that the file already exists + # simply skip setup. + # + _setup_lock = RLock() + + def _setup(self) -> None: """ - Cleans up any leftovers from the test suite run. + Prepares the test suite for execution (currently, it just builds the agent package) + + Returns the path to the agent package. """ + AgentTestSuite._setup_lock.acquire() + try: - log.info("Removing %s", self.context.working_directory) - rmtree(self.context.working_directory.as_posix(), ignore_errors=True) - except: # pylint: disable=bare-except - log.exception("Failed to cleanup the test run") + log.info("") + log.info("**************************************** [Build] ****************************************") + log.info("") + completed: Path = self.context.working_directory/"completed" - def _setup_node(self) -> None: - """ - Prepares the remote node for executing the test suite. - """ - agent_package_path = self._build_agent_package() - self._install_agent_on_node(agent_package_path) + if completed.exists(): + log.info("Found %s. Build has already been done, skipping", completed) + return + + log.info("Creating working directory: %s", self.context.working_directory) + self.context.working_directory.mkdir(parents=True) + self._build_agent_package() - def _build_agent_package(self) -> Path: + log.info("Completed setup, creating %s", completed) + completed.touch() + + finally: + AgentTestSuite._setup_lock.release() + + def _build_agent_package(self) -> None: """ Builds the agent package and returns the path to the package. """ - build_path = self.context.working_directory/"build" - - log.info("Building agent package to %s", build_path) + log.info("Building agent package to %s", self.context.working_directory) - makepkg.run(agent_family="Test", output_directory=str(build_path), log=log) + makepkg.run(agent_family="Test", output_directory=str(self.context.working_directory), log=log) - package_path = build_path/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + package_path: Path = self._get_agent_package_path() if not package_path.exists(): raise Exception(f"Can't find the agent package at {package_path}") - log.info("Agent package: %s", package_path) + log.info("Built agent package as %s", package_path) + + def _get_agent_package_path(self) -> Path: + """ + Returns the path to the agent package. + """ + return self.context.working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" - return package_path + def _clean_up(self) -> None: + """ + Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use. + """ - def _install_agent_on_node(self, agent_package: Path) -> None: + def _setup_node(self) -> None: + """ + Prepares the remote node for executing the test suite. + """ + log.info("") + log.info("************************************** [Node Setup] **************************************") + log.info("") + log.info("Test Node: %s", self.context.vm.name) + log.info("Resource Group: %s", self.context.vm.resource_group) + log.info("") + + self._install_agent_on_node() + + def _install_agent_on_node(self) -> None: """ Installs the given agent package on the test node. """ + agent_package_path: Path = self._get_agent_package_path() + # The install script needs to unzip the agent package; ensure unzip is installed on the test node log.info("Installing unzip tool on %s", self.context.node.name) self.context.node.os.install_packages("unzip") - log.info("Installing %s on %s", agent_package, self.context.node.name) - agent_package_remote_path = self.context.remote_working_directory / agent_package.name - log.info("Copying %s to %s:%s", agent_package, self.context.node.name, agent_package_remote_path) - self.context.node.shell.copy(agent_package, agent_package_remote_path) + log.info("Installing %s on %s", agent_package_path, self.context.node.name) + agent_package_remote_path = self.context.remote_working_directory/agent_package_path.name + log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_remote_path) + self.context.node.shell.copy(agent_package_path, agent_package_remote_path) self.execute_script_on_node( self.context.test_source_directory/"orchestrator"/"scripts"/"install-agent", parameters=f"--package {agent_package_remote_path} --version {AGENT_VERSION}", @@ -172,8 +198,8 @@ def _collect_node_logs(self) -> None: self.execute_script_on_node(self.context.test_source_directory/"orchestrator"/"scripts"/"collect-logs", sudo=True) # Copy the tarball to the local logs directory - remote_path = self.context.remote_working_directory / "logs.tgz" - local_path = Path.home()/'logs'/'vm-logs-{0}.tgz'.format(self.context.node.name) + remote_path = "/tmp/waagent-logs.tgz" + local_path = Path.home()/'logs'/'vm-logs-{0}.tgz'.format(self.context.suite_name) log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path) self.context.node.shell.copy_back(remote_path, local_path) except: # pylint: disable=bare-except @@ -186,12 +212,11 @@ def execute(self, node: Node, test_suite: List[Type[AgentTest]]) -> None: """ self._set_context(node) - log.info("") - log.info("**************************************** [Setup] ****************************************") - log.info("") - failed: List[str] = [] + thread_name = current_thread().name + current_thread().name = self.context.suite_name + try: self._setup() @@ -233,15 +258,21 @@ def execute(self, node: Node, test_suite: List[Type[AgentTest]]) -> None: for r in results: log.info("\t%s", r) log.info("") - finally: self._collect_node_logs() + except: # pylint: disable=bare-except + # Log the error here so the it is decorated with the thread name, then re-raise + log.exception("Test suite failed") + raise + finally: self._clean_up() + current_thread().name = thread_name # Fail the entire test suite if any test failed - assert_that(failed).described_as("One or more tests failed").is_length(0) + if len(failed) > 0: + fail(f"{[self.context.suite_name]} One or more tests failed: {failed}") def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: bool = False) -> int: """ @@ -259,10 +290,6 @@ def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: result = custom_script.run(parameters=parameters, sudo=sudo) - if result.exit_code != 0: - output = result.stdout if result.stderr == "" else f"{result.stdout}\n{result.stderr}" - raise Exception(f"[{command_line}] failed:\n{output}") - if result.stdout != "": separator = "\n" if "\n" in result.stdout else " " log.info("stdout:%s%s", separator, result.stdout) @@ -270,6 +297,9 @@ def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: separator = "\n" if "\n" in result.stderr else " " log.error("stderr:%s%s", separator, result.stderr) + if result.exit_code != 0: + raise Exception(f"[{command_line}] failed. Exit code: {result.exit_code}") + return result.exit_code diff --git a/tests_e2e/orchestrator/scripts/collect-logs b/tests_e2e/orchestrator/scripts/collect-logs index 46a23aff1..eadf0483a 100755 --- a/tests_e2e/orchestrator/scripts/collect-logs +++ b/tests_e2e/orchestrator/scripts/collect-logs @@ -2,24 +2,33 @@ # # Collects the logs needed to debug agent issues into a compressed tarball. # -set -euxo pipefail -logs_file_name="$HOME/logs.tgz" +# Note that we do "set -euxo pipefail" only after executing "tar". That command exits with code 1 on warnings +# and we do not want to consider those as failures. + +logs_file_name="/tmp/waagent-logs.tgz" echo "Collecting logs to $logs_file_name ..." tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='mdsd' --exclude='scx*' \ --exclude='*.so' --exclude='*__LinuxDiagnostic__*' --exclude='*.zip' --exclude='*.deb' --exclude='*.rpm' \ + --warning=no-file-changed \ -czf "$logs_file_name" \ /var/log \ /var/lib/waagent/ \ /etc/waagent.conf -# tar exits with 1 on warnings; ignore those +set -euxo pipefail + +# Ignore warnings (exit code 1) exit_code=$? -if [ "$exit_code" != "1" ] && [ "$exit_code" != "0" ]; then + +if [ "$exit_code" == "1" ]; then + echo "WARNING: tar exit code is 1" +elif [ "$exit_code" != "0" ]; then exit $exit_code fi -chmod +r "$logs_file_name" +chmod a+r "$logs_file_name" +ls -l "$logs_file_name" diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 439bdcec6..0b513569f 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -51,9 +51,22 @@ if [ "$#" -ne 0 ] || [ -z ${package+x} ] || [ -z ${version+x} ]; then fi # -# The service name is walinuxagent in Ubuntu and waagent elsewhere +# Find the command to manage services # -if service walinuxagent status > /dev/null;then +if command -v systemctl &> /dev/null; then + service-status() { systemctl --no-pager -l status $1; } + service-stop() { systemctl stop $1; } + service-start() { systemctl start $1; } +else + service-status() { service $1 status; } + service-stop() { service $1 stop; } + service-start() { service $1 start; } +fi + +# +# Find the service name (walinuxagent in Ubuntu and waagent elsewhere) +# +if service-status walinuxagent > /dev/null 2>&1;then service_name="walinuxagent" else service_name="waagent" @@ -71,16 +84,12 @@ sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf # Restart the service # echo "Restarting service..." -service $service_name stop +service-stop $service_name # Rename the previous log to ensure the new log starts with the agent we just installed mv /var/log/waagent.log /var/log/waagent."$(date --iso-8601=seconds)".log -if command -v systemctl &> /dev/null; then - systemctl daemon-reload -fi - -service $service_name start +service-start $service_name # # Verify that the new agent is running and output its status. Note that the extension handler @@ -108,13 +117,7 @@ else fi waagent --version - printf "\n" - -if command -v systemctl &> /dev/null; then - systemctl --no-pager -l status $service_name -else - service $service_name status -fi +service-status $service_name exit $exit_code diff --git a/tests_e2e/scenarios/lib/agent_test_context.py b/tests_e2e/scenarios/lib/agent_test_context.py index b35e93a80..6c008fe48 100644 --- a/tests_e2e/scenarios/lib/agent_test_context.py +++ b/tests_e2e/scenarios/lib/agent_test_context.py @@ -26,18 +26,20 @@ class AgentTestContext: """ Execution context for agent tests. Defines the test VM, working directories and connection info for the tests. - """ + NOTE: The context is shared by all tests in the same runbook execution. Tests within the same test suite + are executed sequentially, but multiple test suites may be executed concurrently depending on the + concurrency level of the runbook. + """ class Paths: # E1101: Instance of 'list' has no '_path' member (no-member) DEFAULT_TEST_SOURCE_DIRECTORY = Path(tests_e2e.__path__._path[0]) # pylint: disable=E1101 - DEFAULT_WORKING_DIRECTORY = Path().home() / "waagent-tmp" def __init__( self, + working_directory: Path, remote_working_directory: Path, - test_source_directory: Path = DEFAULT_TEST_SOURCE_DIRECTORY, - working_directory: Path = DEFAULT_WORKING_DIRECTORY + test_source_directory: Path = DEFAULT_TEST_SOURCE_DIRECTORY ): self._test_source_directory: Path = test_source_directory self._working_directory: Path = working_directory @@ -87,14 +89,15 @@ def test_source_directory(self) -> Path: @property def working_directory(self) -> Path: """ - Tests create temporary files under this directory + Tests can create temporary files under this directory. + """ return self._paths._working_directory @property def remote_working_directory(self) -> Path: """ - Tests create temporary files under this directory on the test VM + Tests can create temporary files under this directory on the test VM. """ return self._paths._remote_working_directory @@ -132,7 +135,7 @@ def from_args(): parser.add_argument('-rw', '--remote-working-directory', dest="remote_working_directory", required=False, default=str(Path('/home')/os.getenv("USER"))) parser.add_argument('-t', '--test-source-directory', dest="test_source_directory", required=False, default=str(AgentTestContext.Paths.DEFAULT_TEST_SOURCE_DIRECTORY)) - parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(AgentTestContext.Paths.DEFAULT_WORKING_DIRECTORY)) + parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(Path().home()/"tmp")) parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) @@ -152,9 +155,9 @@ def from_args(): resource_group=args.group, name=args.vm), paths=AgentTestContext.Paths( + working_directory=working_directory, remote_working_directory=Path(args.remote_working_directory), - test_source_directory=Path(args.test_source_directory), - working_directory=working_directory), + test_source_directory=Path(args.test_source_directory)), connection=AgentTestContext.Connection( ip_address=args.ip_address if args.ip_address is not None else args.vm, username=args.username, diff --git a/tests_e2e/scenarios/lib/logging.py b/tests_e2e/scenarios/lib/logging.py index 2cb523d6b..d0629a21d 100644 --- a/tests_e2e/scenarios/lib/logging.py +++ b/tests_e2e/scenarios/lib/logging.py @@ -32,6 +32,6 @@ log.setLevel(logging.INFO) -formatter = logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%SZ") +formatter = logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)s] [%(threadName)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%SZ") for handler in log.handlers: handler.setFormatter(formatter) diff --git a/tests_e2e/scenarios/runbooks/daily.yml b/tests_e2e/scenarios/runbooks/daily.yml index fe723b0f9..016e3695a 100644 --- a/tests_e2e/scenarios/runbooks/daily.yml +++ b/tests_e2e/scenarios/runbooks/daily.yml @@ -1,29 +1,23 @@ -name: azure +name: Daily + +testcase: + - criteria: + area: bvt + extension: - "../testsuites" + variable: - name: location value: "westus2" - name: subscription_id value: "" - - name: resource_group_name - value: "" - # - # Set the vm_name to run on an existing VM - # - - name: vm_name - value: "" - name: marketplace_image - value: "Canonical UbuntuServer 18.04-LTS latest" + value: "" - name: vhd value: "" - name: vm_size value: "" - # - # Turn off deploy to run on an existing VM - # - - name: deploy - value: true - name: keep_environment value: "no" - name: wait_delete @@ -36,16 +30,7 @@ variable: - name: admin_password value: "" is_secret: true - - name: proxy_host - value: "" - - name: proxy_user - value: "" - - name: proxy_identity_file - value: "" - is_secret: true -notifier: - - type: env_stats - - type: junit + platform: - type: azure admin_username: $(user) @@ -53,8 +38,7 @@ platform: admin_password: $(admin_password) keep_environment: $(keep_environment) azure: - resource_group_name: $(resource_group_name) - deploy: $(deploy) + deploy: True subscription_id: $(subscription_id) wait_delete: $(wait_delete) requirement: @@ -64,19 +48,27 @@ platform: marketplace: "$(marketplace_image)" vhd: $(vhd) location: $(location) - name: $(vm_name) vm_size: $(vm_size) -testcase: - - criteria: - area: bvt +combinator: + type: grid + items: + - name: marketplace_image + value: + - "Canonical UbuntuServer 18.04-LTS latest" + - "Debian debian-10 10 latest" + - "OpenLogic CentOS 7_9 latest" + - "SUSE sles-15-sp2-basic gen2 latest" + - "RedHat RHEL 7-RAW latest" + - "microsoftcblmariner cbl-mariner cbl-mariner-1 latest" + - "microsoftcblmariner cbl-mariner cbl-mariner-2 latest" + +concurrency: 10 + +notifier: + - type: env_stats + - type: junit + +include: + - path: ./include/ssh_proxy.yml -# -# Set to do SSH proxy jumps -# -#dev: -# mock_tcp_ping: True -# jump_boxes: -# - private_key_file: $(proxy_identity_file) -# address: $(proxy_host) -# username: $(proxy_user) diff --git a/tests_e2e/scenarios/runbooks/include/ssh_proxy.yml b/tests_e2e/scenarios/runbooks/include/ssh_proxy.yml new file mode 100644 index 000000000..84704cb44 --- /dev/null +++ b/tests_e2e/scenarios/runbooks/include/ssh_proxy.yml @@ -0,0 +1,19 @@ +variable: + - name: proxy + value: False + - name: proxy_host + value: "" + - name: proxy_user + value: "foo" + - name: proxy_identity_file + value: "" + is_secret: true + +dev: + enabled: $(proxy) + mock_tcp_ping: $(proxy) + jump_boxes: + - private_key_file: $(proxy_identity_file) + address: $(proxy_host) + username: $(proxy_user) + password: "dummy" diff --git a/tests_e2e/scenarios/runbooks/samples/existing_vm.yml b/tests_e2e/scenarios/runbooks/samples/existing_vm.yml new file mode 100644 index 000000000..2d8057a86 --- /dev/null +++ b/tests_e2e/scenarios/runbooks/samples/existing_vm.yml @@ -0,0 +1,65 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Executes the test suites on an existing VM +# +name: ExistingVM + +testcase: + - criteria: + area: bvt + +extension: + - "../../testsuites" + +variable: + - name: subscription_id + value: "" + + - name: resource_group_name + value: "" + - name: vm_name + value: "" + - name: location + value: "" + + - name: user + value: "" + - name: identity_file + value: "" + is_secret: true + +platform: + - type: azure + admin_username: $(user) + admin_private_key_file: $(identity_file) + azure: + resource_group_name: $(resource_group_name) + deploy: false + subscription_id: $(subscription_id) + requirement: + azure: + location: $(location) + name: $(vm_name) + +notifier: + - type: env_stats + - type: junit + +include: + - path: ../include/ssh_proxy.yml diff --git a/tests_e2e/scenarios/runbooks/samples/hello_world.py b/tests_e2e/scenarios/runbooks/samples/local_machine/hello_world.py similarity index 100% rename from tests_e2e/scenarios/runbooks/samples/hello_world.py rename to tests_e2e/scenarios/runbooks/samples/local_machine/hello_world.py diff --git a/tests_e2e/scenarios/runbooks/samples/local.yml b/tests_e2e/scenarios/runbooks/samples/local_machine/local.yml similarity index 93% rename from tests_e2e/scenarios/runbooks/samples/local.yml rename to tests_e2e/scenarios/runbooks/samples/local_machine/local.yml index f5edec65b..c397159f8 100644 --- a/tests_e2e/scenarios/runbooks/samples/local.yml +++ b/tests_e2e/scenarios/runbooks/samples/local_machine/local.yml @@ -15,6 +15,10 @@ # limitations under the License. # +# +# Executes the test suites on the local machine +# + extension: - "." environment: