Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retry and improve the log messages in agent update test #2890

Merged
merged 3 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 31 additions & 26 deletions tests_e2e/tests/agent_update/rsm_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from typing import List, Dict, Any

import requests
from assertpy import assert_that
from assertpy import assert_that, fail
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute.models import VirtualMachine
from msrestazure.azure_cloud import Cloud
Expand Down Expand Up @@ -123,24 +123,24 @@ def run(self) -> None:
self._verify_agent_reported_update_status(version)

def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
log.info('Verifying latest GS includes requested version available to the agent')
output = self._ssh_client.run_command(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True)
log.info('Verified latest GS includes requested version available to the agent. \n%s', output)
# This checks if RSM GS available to the agent after we send the rsm update request
log.info('Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested')
self._run_remote_test(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True)
log.info('Verified latest GS contain requested version after rsm update requested')

def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None:
"""
This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags
1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version.
2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions.
"""
log.info('Modifying agent installed version')
output = self._ssh_client.run_command(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True)
log.info('Updated agent installed version \n%s', output)
log.info('Executing modify_agent_version remote script to update agent installed version to lower than requested version')
self._run_remote_test(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True)
log.info('Successfully updated agent installed version')
if update_config:
log.info('Modifying agent update config flags')
output = self._ssh_client.run_command("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True)
log.info('updated agent update required config \n%s', output)
log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions')
self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True)
log.info('Successfully updated agent update config')

@staticmethod
def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool:
Expand All @@ -161,6 +161,7 @@ def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None:
}
}
}
log.info("updating the vm with osProfile property:\n%s", osprofile)
vm.update(osprofile)

def _request_rsm_update(self, requested_version: str) -> None:
Expand All @@ -173,7 +174,7 @@ def _request_rsm_update(self, requested_version: str) -> None:
# enable the flag
log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag")
self._enable_agent_update_flag(vm)
log.info("Set the enableVMAgentPlatformUpdates flag to True")
log.info("Updated the enableVMAgentPlatformUpdates flag to True")
else:
log.info("Already enableVMAgentPlatformUpdates flag set to True")

Expand All @@ -190,29 +191,33 @@ def _request_rsm_update(self, requested_version: str) -> None:
"targetVersion": requested_version
}

log.info("Attempting rsm upgrade post request to endpoint: {0} with data: {1}".format(url, data))
response = requests.post(url, data=json.dumps(data), headers=headers)
if response.status_code == 202:
log.info("RSM upgrade request accepted")
else:
raise Exception("Error occurred while RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content))
raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content))

def _verify_guest_agent_update(self, requested_version: str) -> None:
"""
Verify current agent version running on rsm requested version
"""
def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
expected_version = f"Goal state agent: {requested_version}"
if expected_version in stdout:
if expected_version in waagent_version:
return True
else:
raise Exception("Guest agent didn't update to requested version {0} but found \n {1}. \n "
"To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, stdout))
return False

log.info("Verifying agent updated to requested version")
retry_if_false(lambda: _check_agent_version(requested_version))
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")
waagent_version: str = ""
log.info("Verifying agent updated to requested version: {0}".format(requested_version))
success: bool = retry_if_false(lambda: _check_agent_version(requested_version))
if not success:
fail("Guest agent didn't update to requested version {0} but found \n {1}. \n "
"To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, waagent_version))
waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Successfully verified agent updated to requested version. Current agent version running:\n {waagent_version}")

def _verify_no_guest_agent_update(self, version: str) -> None:
"""
Expand All @@ -228,17 +233,17 @@ def _verify_agent_reported_supported_feature_flag(self):
RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log
"""

log.info("Verifying agent reported supported feature flag")
self._ssh_client.run_command("agent_update-verify_versioning_supported_feature.py", use_sudo=True)
log.info("Agent reported VersioningGovernance supported feature flag")
log.info("Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request")
self._run_remote_test("agent_update-verify_versioning_supported_feature.py", use_sudo=True)
log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag")

def _verify_agent_reported_update_status(self, version: str):
"""
Verify if the agent reported update status to CRP after update performed
"""

log.info("Verifying agent reported update status for version {0}".format(version))
self._ssh_client.run_command(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True)
log.info("Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format(version))
self._run_remote_test(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True)
log.info("Successfully Agent reported update status for version {0}".format(version))


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import argparse
import glob
import json
import logging
import sys

from assertpy import fail

from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.remote_test import run_remote_test
from tests_e2e.tests.lib.retry import retry_if_false


Expand All @@ -33,27 +35,28 @@ def check_agent_reported_update_status(expected_version: str) -> bool:
for file in file_paths:
with open(file, 'r') as f:
data = json.load(f)
logging.info("Agent status file is %s and it's content %s", file, data)
log.info("Agent status file is %s and it's content %s", file, data)
status = data["__status__"]
guest_agent_status = status["aggregateStatus"]["guestAgentStatus"]
if "updateStatus" in guest_agent_status.keys():
if guest_agent_status["updateStatus"]["expectedVersion"] == expected_version:
log.info("we found the expected version %s in agent status file", expected_version)
return True
log.info("we did not find the expected version %s in agent status file", expected_version)
return False


try:
def main():

parser = argparse.ArgumentParser()
parser.add_argument('-v', '--version', required=True)
args = parser.parse_args()

log.info("checking agent status file to verify if agent reported update status")
found: bool = retry_if_false(lambda: check_agent_reported_update_status(args.version))
if not found:
raise Exception("Agent failed to report update status, so skipping rest of the agent update validations")
fail("Agent failed to report update status, so skipping rest of the agent update validations")


except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)
run_remote_test(main)

sys.exit(0)
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#
import glob
import json
import logging
import sys

from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.remote_test import run_remote_test
from tests_e2e.tests.lib.retry import retry_if_false


Expand All @@ -32,7 +32,7 @@ def check_agent_supports_versioning() -> bool:
for file in file_paths:
with open(file, 'r') as f:
data = json.load(f)
logging.info("Agent status file is %s and it's content %s", file, data)
log.info("Agent status file is %s and it's content %s", file, data)
status = data["__status__"]
supported_features = status["supportedFeatures"]
for supported_feature in supported_features:
Expand All @@ -41,13 +41,15 @@ def check_agent_supports_versioning() -> bool:
return False


try:
def main():
log.info("checking agent status file for VersioningGovernance supported feature flag")
found: bool = retry_if_false(check_agent_supports_versioning)
if not found:
raise Exception("Agent failed to report supported feature flag, so skipping agent update validations")
raise Exception("Agent failed to report supported feature flag. So, skipping agent update validations "
"since CRP will not send RSM requested version in GS if feature flag not found in status")


run_remote_test(main)


except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)

sys.exit(0)
23 changes: 10 additions & 13 deletions tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
# Verify the latest goal state included rsm requested version and if not, retry
#
import argparse
import sys
import logging

from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties
from azurelinuxagent.common.protocol.wire import WireProtocol
from tests_e2e.tests.lib.retry import retry_if_false
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.remote_test import run_remote_test
from tests_e2e.tests.lib.retry import retry_if_false, retry


def get_requested_version(gs: GoalState) -> str:
Expand All @@ -41,6 +41,7 @@ def get_requested_version(gs: GoalState) -> str:


def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: str) -> bool:
log.info("fetching the goal state to check if it includes rsm requested version")
wire_protocol.client.update_goal_state()
goal_state = wire_protocol.client.get_goal_state()
requested_version = get_requested_version(goal_state)
Expand All @@ -50,25 +51,21 @@ def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version:
return False


try:
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--version', required=True)
args = parser.parse_args()

protocol = get_protocol_util().get_protocol(init_goal_state=False)
protocol.client.reset_goal_state(
goal_state_properties=GoalStateProperties.ExtensionsGoalState)
retry(lambda: protocol.client.reset_goal_state(
goal_state_properties=GoalStateProperties.ExtensionsGoalState))

found: bool = retry_if_false(lambda: verify_rsm_requested_version(protocol, args.version))

if not found:
raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version))
raise Exception("The latest goal state didn't contain requested version after we submit the rsm request for: {0}.".format(args.version))
else:
logging.info("Latest GS includes rsm requested version : %s", args.version)
log.info("Successfully verified that latest GS contains rsm requested version : %s", args.version)


except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)

sys.exit(0)
run_remote_test(main)