diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cd7d1f3cd..2439aff16 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -85,6 +85,8 @@ jobs: sudo pip2 install sonic_platform_common-1.0-py2-none-any.whl sudo pip3 install swsssdk-2.0.1-py3-none-any.whl sudo pip3 install sonic_py_common-1.0-py3-none-any.whl + sudo pip3 install sonic_yang_mgmt-1.0-py3-none-any.whl + sudo pip3 install sonic_yang_models-1.0-py3-none-any.whl sudo pip3 install sonic_config_engine-1.0-py3-none-any.whl sudo pip3 install sonic_platform_common-1.0-py3-none-any.whl workingDirectory: $(Pipeline.Workspace)/target/python-wheels/ diff --git a/sonic-chassisd/scripts/chassis_db_init b/sonic-chassisd/scripts/chassis_db_init new file mode 100644 index 000000000..b1aafb881 --- /dev/null +++ b/sonic-chassisd/scripts/chassis_db_init @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +""" + chassis_db_init + Chassis information update tool for SONiC + This tool runs one time at the launch of the platform monitor in order to populate STATE_DB with chassis information such as model, serial number, and revision. +""" + +try: + import os + import sys + + from sonic_py_common import daemon_base, logger + + # If unit testing is occurring, mock swsscommon and module_base + if os.getenv("CHASSIS_DB_INIT_UNIT_TESTING") == "1": + from tests import mock_swsscommon as swsscommon + else: + from swsscommon import swsscommon +except ImportError as e: + raise ImportError(str(e) + " - required module not found") + +# +# Constants +# + +SYSLOG_IDENTIFIER = "chassis_db_init" + +CHASSIS_INFO_TABLE = 'CHASSIS_INFO' +CHASSIS_INFO_KEY_TEMPLATE = 'chassis {}' +CHASSIS_INFO_CARD_NUM_FIELD = 'module_num' +CHASSIS_INFO_SERIAL_FIELD = 'serial' +CHASSIS_INFO_MODEL_FIELD = 'model' +CHASSIS_INFO_REV_FIELD = 'revision' + +CHASSIS_LOAD_ERROR = 1 + +NOT_AVAILABLE = 'N/A' + +# +# Helper functions ============================================================= +# + +# try get information from platform API and return a default value if caught NotImplementedError + + +def try_get(callback, *args, **kwargs): + """ + Handy function to invoke the callback and catch NotImplementedError + :param callback: Callback to be invoked + :param args: Arguments to be passed to callback + :param kwargs: Default return value if exception occur + :return: Default return value if exception occur else return value of the callback + """ + default = kwargs.get('default', NOT_AVAILABLE) + try: + ret = callback(*args) + if ret is None: + ret = default + except NotImplementedError: + ret = default + + return ret + +# +# Functions +# + +def provision_db(platform_chassis, log): + # Init state db connection + state_db = daemon_base.db_connect("STATE_DB") + chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE) + + # Populate DB with chassis hardware info + fvs = swsscommon.FieldValuePairs([ + (CHASSIS_INFO_SERIAL_FIELD, try_get(platform_chassis.get_serial)), + (CHASSIS_INFO_MODEL_FIELD, try_get(platform_chassis.get_model)), + (CHASSIS_INFO_REV_FIELD, try_get(platform_chassis.get_revision)) + ]) + chassis_table.set(CHASSIS_INFO_KEY_TEMPLATE.format(1), fvs) + log.log_info("STATE_DB provisioned with chassis info.") + + return chassis_table + + +# +# Main +# + +def main(): + log = logger.Logger(SYSLOG_IDENTIFIER) + log.log_info("Provisioning Database with Chassis Info...") + + # Load platform api class + try: + import sonic_platform.platform + platform_chassis = sonic_platform.platform.Platform().get_chassis() + except Exception as e: + log.log_error("Failed to load chassis due to {}".format(repr(e))) + sys.exit(CHASSIS_LOAD_ERROR) + + provision_db(platform_chassis, log) + +if __name__ == '__main__': + main() diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 355382911..771b73f5a 100644 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -44,6 +44,13 @@ CHASSIS_MODULE_INFO_NAME_FIELD = 'name' CHASSIS_MODULE_INFO_DESC_FIELD = 'desc' CHASSIS_MODULE_INFO_SLOT_FIELD = 'slot' CHASSIS_MODULE_INFO_OPERSTATUS_FIELD = 'oper_status' +CHASSIS_MODULE_INFO_NUM_ASICS_FIELD = 'num_asics' +CHASSIS_MODULE_INFO_ASICS = 'asics' + +CHASSIS_ASIC_INFO_TABLE = 'CHASSIS_ASIC_TABLE' +CHASSIS_ASIC = 'asic' +CHASSIS_ASIC_PCI_ADDRESS_FIELD = 'asic_pci_address' +CHASSIS_ASIC_ID_IN_MODULE_FIELD = 'asic_id_in_module' CHASSIS_MIDPLANE_INFO_TABLE = 'CHASSIS_MIDPLANE_TABLE' CHASSIS_MIDPLANE_INFO_KEY_TEMPLATE = 'CHASSIS_MIDPLANE {}' @@ -163,6 +170,9 @@ class ModuleUpdater(logger.Logger): CHASSIS_MODULE_INFO_SLOT_FIELD, CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + self.asic_table = swsscommon.Table(chassis_state_db, CHASSIS_ASIC_INFO_TABLE) + self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) if not self.midplane_initialized: self.log_error("Chassisd midplane intialization failed") @@ -182,6 +192,11 @@ class ModuleUpdater(logger.Logger): if self.chassis_table is not None: self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) + if self.asic_table is not None: + asics = list(self.asic_table.getKeys()) + for asic in asics: + self.asic_table._del(asic) + def modules_num_update(self): # Check if module list is populated num_modules = self.chassis.get_num_modules() @@ -194,6 +209,8 @@ class ModuleUpdater(logger.Logger): self.chassis_table.set(CHASSIS_INFO_KEY_TEMPLATE.format(1), fvs) def module_db_update(self): + notOnlineModules = [] + for module_index in range(0, self.num_modules): module_info_dict = self._get_module_info(module_index) if module_info_dict is not None: @@ -211,9 +228,29 @@ class ModuleUpdater(logger.Logger): fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_DESC_FIELD, module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD]), (CHASSIS_MODULE_INFO_SLOT_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]), - (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD])]) + (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]), + (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))]) self.module_table.set(key, fvs) + if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): + notOnlineModules.append(key) + continue + + for asic_id, asic in enumerate(module_info_dict[CHASSIS_MODULE_INFO_ASICS]): + asic_global_id, asic_pci_addr = asic + asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id) + asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr), + (CHASSIS_MODULE_INFO_NAME_FIELD, key), + (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) + self.asic_table.set(asic_key, asic_fvs) + + # Asics that are on the "not online" modules need to be cleaned up + asics = list(self.asic_table.getKeys()) + for asic in asics: + fvs = self.asic_table.get(asic) + if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: + self.asic_table._del(asic) + def _get_module_info(self, module_index): """ Retrieves module info of this module @@ -225,11 +262,14 @@ class ModuleUpdater(logger.Logger): slot = try_get(self.chassis.get_module(module_index).get_slot, default=INVALID_SLOT) status = try_get(self.chassis.get_module(module_index).get_oper_status, default=ModuleBase.MODULE_STATUS_OFFLINE) + asics = try_get(self.chassis.get_module(module_index).get_all_asics, + default=[]) module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] = name module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD] = str(desc) module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = str(slot) module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] = str(status) + module_info_dict[CHASSIS_MODULE_INFO_ASICS] = asics return module_info_dict diff --git a/sonic-chassisd/setup.py b/sonic-chassisd/setup.py index 271c7a794..6fcfb51f3 100644 --- a/sonic-chassisd/setup.py +++ b/sonic-chassisd/setup.py @@ -15,6 +15,7 @@ ], scripts=[ 'scripts/chassisd', + 'scripts/chassis_db_init' ], setup_requires=[ 'pytest-runner', diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index c878fafd6..a1ab20530 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -19,7 +19,8 @@ def get_serial(self): class MockModule(MockDevice): - def __init__(self, module_index, module_name, module_desc, module_type, module_slot): + def __init__(self, module_index, module_name, module_desc, module_type, module_slot, + asic_list=[]): self.module_index = module_index self.module_name = module_name self.module_desc = module_desc @@ -29,7 +30,8 @@ def __init__(self, module_index, module_name, module_desc, module_type, module_s self.admin_state = 1 self.supervisor_slot = 16 self.midplane_access = False - + self.asic_list = asic_list + def get_name(self): return self.module_name @@ -69,6 +71,9 @@ def is_midplane_reachable(self): def set_midplane_reachable(self, up): self.midplane_access = up + def get_all_asics(self): + return self.asic_list + class MockChassis: def __init__(self): self.module_list = [] @@ -92,3 +97,12 @@ def get_module_index(self, module_name): def init_midplane_switch(self): return True + + def get_serial(self): + return "Serial No" + + def get_model(self): + return "Model A" + + def get_revision(self): + return "Rev C" diff --git a/sonic-chassisd/tests/mock_swsscommon.py b/sonic-chassisd/tests/mock_swsscommon.py index 8e9c68761..589084225 100644 --- a/sonic-chassisd/tests/mock_swsscommon.py +++ b/sonic-chassisd/tests/mock_swsscommon.py @@ -19,6 +19,9 @@ def get(self, key): return self.mock_dict[key] return None + def getKeys(self): + return list(self.mock_dict) + def size(self): return len(self.mock_dict) diff --git a/sonic-chassisd/tests/test_chassis_db_init.py b/sonic-chassisd/tests/test_chassis_db_init.py new file mode 100644 index 000000000..e9a9560ba --- /dev/null +++ b/sonic-chassisd/tests/test_chassis_db_init.py @@ -0,0 +1,38 @@ +import os +import sys +from imp import load_source + +from mock import Mock, MagicMock, patch +from sonic_py_common import daemon_base + +from .mock_platform import MockChassis, MockModule +from .mock_module_base import ModuleBase + +SYSLOG_IDENTIFIER = 'chassis_db_init_test' +NOT_AVAILABLE = 'N/A' + +daemon_base.db_connect = MagicMock() + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +scripts_path = os.path.join(modules_path, "scripts") +sys.path.insert(0, modules_path) + +os.environ["CHASSIS_DB_INIT_UNIT_TESTING"] = "1" +load_source('chassis_db_init', scripts_path + '/chassis_db_init') +from chassis_db_init import * + + +def test_provision_db(): + chassis = MockChassis() + log = MagicMock() + serial = "Serial No" + model = "Model A" + revision = "Rev C" + + chassis_table = provision_db(chassis, log) + + fvs = chassis_table.get(CHASSIS_INFO_KEY_TEMPLATE.format(1)) + assert serial == fvs[CHASSIS_INFO_SERIAL_FIELD] + assert model == fvs[CHASSIS_INFO_MODEL_FIELD] + assert revision == fvs[CHASSIS_INFO_REV_FIELD] diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index ef3d2fb67..2950abd81 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -31,6 +31,8 @@ CHASSIS_INFO_KEY_TEMPLATE = 'CHASSIS {}' CHASSIS_INFO_CARD_NUM_FIELD = 'module_num' +CHASSIS_ASIC_PCI_ADDRESS_FIELD = 'asic_pci_address' +CHASSIS_ASIC_ID_IN_MODULE_FIELD = 'asic_id_in_module' def setup_function(): ModuleUpdater.log_notice = MagicMock() @@ -366,3 +368,76 @@ def test_midplane_presence_supervisor(): module_updater.deinit() fvs = midplane_table.get(name) assert fvs == None + +def test_asic_presence(): + chassis = MockChassis() + + #Supervisor + index = 0 + name = "SUPERVISOR0" + desc = "Supervisor card" + slot = 16 + module_type = ModuleBase.MODULE_TYPE_SUPERVISOR + supervisor = MockModule(index, name, desc, module_type, slot) + supervisor.set_midplane_ip() + chassis.module_list.append(supervisor) + + #Linecard + index = 1 + name = "LINE-CARD0" + desc = "36 port 400G card" + slot = 1 + module_type = ModuleBase.MODULE_TYPE_LINE + module = MockModule(index, name, desc, module_type, slot) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Fabric-card with asics + index = 1 + name = "FABRIC-CARD0" + desc = "Switch fabric card" + slot = 17 + module_type = ModuleBase.MODULE_TYPE_FABRIC + fabric_asic_list = [("4", "0000:04:00.0"), ("5", "0000:05:00.0")] + fabric = MockModule(index, name, desc, module_type, slot, fabric_asic_list) + chassis.module_list.append(fabric) + + #Run on supervisor + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.supervisor_slot = supervisor.get_slot() + module_updater.my_slot = supervisor.get_slot() + module_updater.modules_num_update() + module_updater.module_db_update() + module_updater.check_midplane_reachability() + + #Asic presence on fabric module + fabric.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE) + module_updater.module_db_update() + fabric_asic_table = module_updater.asic_table + assert len(fabric_asic_table.getKeys()) == 2 + + def verify_fabric_asic(asic_name, asic_pci_address, module_name, asic_id_in_module): + fvs = fabric_asic_table.get(asic_name) + assert fvs[CHASSIS_ASIC_PCI_ADDRESS_FIELD] == asic_pci_address + assert fvs[CHASSIS_MODULE_INFO_NAME_FIELD] == module_name + assert fvs[CHASSIS_ASIC_ID_IN_MODULE_FIELD] == asic_id_in_module + + verify_fabric_asic("asic4", "0000:04:00.0", name, "0") + verify_fabric_asic("asic5", "0000:05:00.0", name, "1") + + #Card goes down and asics should be gone + fabric.set_oper_status(ModuleBase.MODULE_STATUS_OFFLINE) + module_updater.module_db_update() + assert len(fabric_asic_table.getKeys()) == 0 + + #Deinit + fabric.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE) + module_updater.module_db_update() + module_updater.deinit() + midplane_table = module_updater.midplane_table + fvs = midplane_table.get(name) + assert fvs == None + fvs = fabric_asic_table.get("asic4") + assert fvs == None + fvs = fabric_asic_table.get("asic5") + assert fvs == None diff --git a/sonic-pcied/pytest.ini b/sonic-pcied/pytest.ini new file mode 100644 index 000000000..d90ee9ed9 --- /dev/null +++ b/sonic-pcied/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --cov=scripts --cov-report html --cov-report term --cov-report xml --junitxml=test-results.xml -vv diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index d69ddcb92..7265063ff 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -5,31 +5,62 @@ PCIe device monitoring daemon for SONiC """ -try: - import os - import signal - import sys - import threading - - import swsssdk - from sonic_py_common import daemon_base, device_info - from swsscommon import swsscommon -except ImportError as e: - raise ImportError(str(e) + " - required module not found") +import os +import signal +import sys +import threading + +from sonic_py_common import daemon_base, device_info +from swsscommon import swsscommon # # Constants ==================================================================== # + +# TODO: Once we no longer support Python 2, we can eliminate this and get the +# name using the 'name' field (e.g., `signal.SIGINT.name`) starting with Python 3.5 +SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n) + for n in dir(signal) if n.startswith('SIG') and '_' not in n) + SYSLOG_IDENTIFIER = "pcied" PCIE_RESULT_REGEX = "PCIe Device Checking All Test" -PCIE_TABLE_NAME = "PCIE_STATUS" PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE" - -PCIE_CONF_FILE = 'pcie.yaml' +PCIE_STATUS_TABLE_NAME = "PCIE_DEVICES" PCIED_MAIN_THREAD_SLEEP_SECS = 60 -REDIS_HOSTIP = "127.0.0.1" + +PCIEUTIL_CONF_FILE_ERROR = 1 +PCIEUTIL_LOAD_ERROR = 2 + +platform_pcieutil = None + +exit_code = 0 + +# wrapper functions to call the platform api +def load_platform_pcieutil(): + _platform_pcieutil = None + (platform_path, _) = device_info.get_paths_to_platform_and_hwsku_dirs() + try: + from sonic_platform.pcie import Pcie + _platform_pcieutil = Pcie(platform_path) + except ImportError as e: + self.log_error("Failed to load platform Pcie module. Error : {}".format(str(e)), True) + try: + from sonic_platform_base.sonic_pcie.pcie_common import PcieUtil + _platform_pcieutil = PcieUtil(platform_path) + except ImportError as e: + self.log_error("Failed to load default PcieUtil module. Error : {}".format(str(e)), True) + return _platform_pcieutil + +def read_id_file(device_name): + id = None + dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name + + if os.path.exists(dev_id_path): + with open(dev_id_path, 'r') as fd: + id = fd.read().strip() + return id # # Daemon ======================================================================= @@ -40,134 +71,129 @@ class DaemonPcied(daemon_base.DaemonBase): def __init__(self, log_identifier): super(DaemonPcied, self).__init__(log_identifier) - (platform_path, _) = device_info.get_paths_to_platform_and_hwsku_dirs() - pciefilePath = os.path.join(platform_path, PCIE_CONF_FILE) - if not os.path.exists(pciefilePath): - self.log_error("Platform pcie configuration file doesn't exist! Exiting ...") - sys.exit("Platform PCIe Configuration file doesn't exist!") - self.timeout = PCIED_MAIN_THREAD_SLEEP_SECS self.stop_event = threading.Event() - - self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) - self.state_db.connect("STATE_DB") - state_db = daemon_base.db_connect("STATE_DB") - self.device_table = swsscommon.Table(state_db, PCIE_DEVICE_TABLE_NAME) - - # Load AER-fields into STATEDB - def update_aer_to_statedb(self, device_name, aer_stats): + self.state_db = None + self.device_table = None + self.table = None + self.resultInfo = [] + self.device_name = None + self.aer_stats = {} + + global platform_pcieutil + + platform_pcieutil = load_platform_pcieutil() + if platform_pcieutil is None: + sys.exit(PCIEUTIL_LOAD_ERROR) + + # Connect to STATE_DB and create pcie device table + self.state_db = daemon_base.db_connect("STATE_DB") + self.device_table = swsscommon.Table(self.state_db, PCIE_DEVICE_TABLE_NAME) + self.status_table = swsscommon.Table(self.state_db, PCIE_STATUS_TABLE_NAME) + + def __del__(self): + if self.device_table: + table_keys = self.device_table.getKeys() + for tk in table_keys: + self.device_table._del(tk) + if self.status_table: + stable_keys = self.status_table.getKeys() + for stk in stable_keys: + self.status_table._del(stk) + + # load aer-fields into statedb + def update_aer_to_statedb(self): + if self.aer_stats is None: + self.log_debug("PCIe device {} has no AER Stats".format(device_name)) + return aer_fields = {} - for field, value in aer_stats['correctable'].items(): - correctable_field = "correctable|" + field - aer_fields[correctable_field] = value - - for field, value in aer_stats['fatal'].items(): - fatal_field = "fatal|" + field - aer_fields[fatal_field] = value - - for field, value in aer_stats['non_fatal'].items(): - non_fatal_field = "non_fatal|" + field - aer_fields[non_fatal_field] = value + for key, fv in self.aer_stats.items(): + for field, value in fv.items(): + key_field = "{}|{}".format(key,field) + aer_fields[key_field] = value if aer_fields: formatted_fields = swsscommon.FieldValuePairs(list(aer_fields.items())) - self.device_table.set(device_name, formatted_fields) + self.device_table.set(self.device_name, formatted_fields) else: - self.log_debug("PCIe device {} has no AER attriutes".format(device_name)) + self.log_debug("PCIe device {} has no AER attriutes".format(self.device_name)) - # Check the PCIe devices - def check_pcie_devices(self): - try: - platform_path, _ = device_info.get_paths_to_platform_and_hwsku_dirs() - from sonic_platform_base.sonic_pcie.pcie_common import PcieUtil - platform_pcieutil = PcieUtil(platform_path) - except ImportError as e: - self.log_error("Failed to load default PcieUtil module. Error : {}".format(str(e)), True) - raise e - resultInfo = platform_pcieutil.get_pcie_check() - err = 0 + # Check the PCIe AER Stats + def check_n_update_pcie_aer_stats(self, Bus, Dev, Fn): + self.device_name = "%02x:%02x.%d" % (Bus, Dev, Fn) - for item in resultInfo: - if item["result"] == "Failed": - self.log_warning("PCIe Device: " + item["name"] + " Not Found") - err += 1 + Id = read_id_file(self.device_name) + self.aer_stats = {} + if Id is not None: + self.device_table.set(self.device_name, [('id', Id)]) + self.aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, dev=Dev, func=Fn) + self.update_aer_to_statedb() + + + # Update the PCIe devices status to DB + def update_pcie_devices_status_db(self, err): if err: - self.update_state_db("PCIE_DEVICES", "status", "FAILED") - self.log_error("PCIe device status check : FAILED") + pcie_status = "FAILED" + self.log_error("PCIe device status check : {}".format(pcie_status)) else: - self.update_state_db("PCIE_DEVICES", "status", "PASSED") - self.log_info("PCIe device status check : PASSED") + pcie_status = "PASSED" + self.log_info("PCIe device status check : {}".format(pcie_status)) + fvs = swsscommon.FieldValuePairs([ + ('status', pcie_status) + ]) - # update AER-attributes to DB - for item in resultInfo: - if item["result"] == "Failed": - continue + self.status_table.set("status", fvs) - Bus = int(item["bus"], 16) - Dev = int(item["dev"], 16) - Fn = int(item["fn"], 16) + # Check the PCIe devices + def check_pcie_devices(self): + self.resultInfo = platform_pcieutil.get_pcie_check() + err = 0 + if self.resultInfo is None: + return - device_name = "%02x:%02x.%d" % (Bus, Dev, Fn) - dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name - with open(dev_id_path, 'r') as fd: - Id = fd.read().strip() + for result in self.resultInfo: + if result["result"] == "Failed": + self.log_warning("PCIe Device: " + result["name"] + " Not Found") + err += 1 + else: + Bus = int(result["bus"], 16) + Dev = int(result["dev"], 16) + Fn = int(result["fn"], 16) + # update AER-attributes to DB + self.check_n_update_pcie_aer_stats(Bus, Dev, Fn) - self.device_table.set(device_name, [('id', Id)]) - aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, device=Dev, func=Fn) - self.update_aer_to_statedb(device_name, aer_stats) + # update PCIe Device Status to DB + self.update_pcie_devices_status_db(err) - def read_state_db(self, key1, key2): - return self.state_db.get('STATE_DB', key1, key2) + # Override signal handler from DaemonBase + def signal_handler(self, sig, frame): + FATAL_SIGNALS = [signal.SIGINT, signal.SIGTERM] + NONFATAL_SIGNALS = [signal.SIGHUP] - def update_state_db(self, key1, key2, value): - self.state_db.set('STATE_DB', key1, key2, value) + global exit_code - # Signal handler - def signal_handler(self, sig, frame): - if sig == signal.SIGHUP: - self.log_info("Caught SIGHUP - ignoring...") - elif sig == signal.SIGINT: - self.log_info("Caught SIGINT - exiting...") - self.stop_event.set() - elif sig == signal.SIGTERM: - self.log_info("Caught SIGTERM - exiting...") + if sig in FATAL_SIGNALS: + self.log_info("Caught signal '{}' - exiting...".format(SIGNALS_TO_NAMES_DICT[sig])) + exit_code = 128 + sig # Make sure we exit with a non-zero code so that supervisor will try to restart us self.stop_event.set() + elif sig in NONFATAL_SIGNALS: + self.log_info("Caught signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) else: - self.log_warning("Caught unhandled signal '" + sig + "'") + self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) - # Initialize daemon - def init(self): - self.log_info("Start daemon init...") - - # Deinitialize daemon - def deinit(self): - self.log_info("Start daemon deinit...") - - # Run daemon + # Main daemon logic def run(self): - self.log_info("Starting up...") - - # Start daemon initialization sequence - self.init() - - # Start main loop - self.log_info("Start daemon main loop") - - while not self.stop_event.wait(self.timeout): - # Check the Pcie device status - self.check_pcie_devices() - - self.log_info("Stop daemon main loop") + if self.stop_event.wait(self.timeout): + # We received a fatal signal + return False - # Start daemon deinitialization sequence - self.deinit() - - self.log_info("Shutting down...") + self.check_pcie_devices() + return True # # Main ========================================================================= # @@ -175,7 +201,15 @@ class DaemonPcied(daemon_base.DaemonBase): def main(): pcied = DaemonPcied(SYSLOG_IDENTIFIER) - pcied.run() + + pcied.log_info("Starting up...") + + while pcied.run(): + pass + + pcied.log_info("Shutting down...") + + return exit_code if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/sonic-pcied/setup.cfg b/sonic-pcied/setup.cfg new file mode 100644 index 000000000..b7e478982 --- /dev/null +++ b/sonic-pcied/setup.cfg @@ -0,0 +1,2 @@ +[aliases] +test=pytest diff --git a/sonic-pcied/setup.py b/sonic-pcied/setup.py index 0d7d32118..7c7cbc146 100644 --- a/sonic-pcied/setup.py +++ b/sonic-pcied/setup.py @@ -14,8 +14,19 @@ 'scripts/pcied', ], setup_requires=[ + 'pytest-runner', 'wheel' ], + install_requires=[ + 'enum34; python_version < "3.4"', + 'sonic-py-common', + ], + tests_requires=[ + 'mock>=2.0.0; python_version < "3.3"', + 'pytest', + 'pytest-cov', + 'sonic-platform-common' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', @@ -29,4 +40,5 @@ 'Topic :: System :: Hardware', ], keywords='sonic SONiC PCIe pcie PCIED pcied', + test_suite='setup.get_test_suite' ) diff --git a/sonic-pcied/tests/__init__.py b/sonic-pcied/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sonic-pcied/tests/mock_platform.py b/sonic-pcied/tests/mock_platform.py new file mode 100644 index 000000000..7d8a32a90 --- /dev/null +++ b/sonic-pcied/tests/mock_platform.py @@ -0,0 +1,48 @@ + +""" + Mock implementation of sonic_platform package for unit testing +""" + +# TODO: Clean this up once we no longer need to support Python 2 +import sys +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + + + +pcie_device_list = \ +""" +[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A'}] +""" + +pcie_check_result = \ +""" +[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'}] +""" + +pcie_aer_stats = \ +""" +{'correctable': {}, 'fatal': {}, 'non_fatal': {}} +""" + +#class MockPcieUtil(PcieUtil): +class MockPcieUtil(): + def __init__(self, + pciList=pcie_device_list, + result=pcie_check_result, + aer_stats=pcie_aer_stats): + super(MockPcieUtil, self).__init__() + self._pciList = pciList + self._result = result + self._aer_stats = aer_stats + + def get_pcie_device(self): + return self._pciList + + def get_pcie_check(self): + return self._result + + def get_pcie_aer_stats(self, domain, bus, dev, fn): + return self._aer_stats diff --git a/sonic-pcied/tests/mocked_libs/swsscommon/__init__.py b/sonic-pcied/tests/mocked_libs/swsscommon/__init__.py new file mode 100644 index 000000000..012af621e --- /dev/null +++ b/sonic-pcied/tests/mocked_libs/swsscommon/__init__.py @@ -0,0 +1,5 @@ +''' + Mock implementation of swsscommon package for unit testing +''' + +from . import swsscommon diff --git a/sonic-pcied/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-pcied/tests/mocked_libs/swsscommon/swsscommon.py new file mode 100644 index 000000000..6947a8601 --- /dev/null +++ b/sonic-pcied/tests/mocked_libs/swsscommon/swsscommon.py @@ -0,0 +1,54 @@ +''' + Mock implementation of swsscommon package for unit testing +''' + +STATE_DB = '' + + +class Table: + def __init__(self, db, table_name): + self.table_name = table_name + self.mock_dict = {} + + def _del(self, key): + del self.mock_dict[key] + pass + + def set(self, key, fvs): + self.mock_dict[key] = fvs.fv_dict + pass + + def get(self, key): + if key in self.mock_dict: + return self.mock_dict[key] + return None + + def get_size(self): + return (len(self.mock_dict)) + + +class FieldValuePairs: + fv_dict = {} + + def __init__(self, tuple_list): + if isinstance(tuple_list, list) and isinstance(tuple_list[0], tuple): + self.fv_dict = dict(tuple_list) + + def __setitem__(self, key, kv_tuple): + self.fv_dict[kv_tuple[0]] = kv_tuple[1] + + def __getitem__(self, key): + return self.fv_dict[key] + + def __eq__(self, other): + if not isinstance(other, FieldValuePairs): + # don't attempt to compare against unrelated types + return NotImplemented + + return self.fv_dict == other.fv_dict + + def __repr__(self): + return repr(self.fv_dict) + + def __str__(self): + return repr(self.fv_dict) diff --git a/sonic-pcied/tests/test_DaemonPcied.py b/sonic-pcied/tests/test_DaemonPcied.py new file mode 100644 index 000000000..24044767e --- /dev/null +++ b/sonic-pcied/tests/test_DaemonPcied.py @@ -0,0 +1,226 @@ +import datetime +import os +import sys +from imp import load_source # Replace with importlib once we no longer need to support Python 2 + +import pytest + +# TODO: Clean this up once we no longer need to support Python 2 +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +from sonic_py_common import daemon_base + +from .mock_platform import MockPcieUtil + +SYSLOG_IDENTIFIER = 'pcied_test' +NOT_AVAILABLE = 'N/A' + +daemon_base.db_connect = mock.MagicMock() + +tests_path = os.path.dirname(os.path.abspath(__file__)) + +# Add mocked_libs path so that the file under test can load mocked modules from there +mocked_libs_path = os.path.join(tests_path, "mocked_libs") +sys.path.insert(0, mocked_libs_path) + +# Add path to the file under test so that we can load it +modules_path = os.path.dirname(tests_path) +scripts_path = os.path.join(modules_path, "scripts") +sys.path.insert(0, modules_path) +load_source('pcied', os.path.join(scripts_path, 'pcied')) +import pcied + +pcie_no_aer_stats = \ +""" +{'correctable': {}, 'fatal': {}, 'non_fatal': {}} +""" + +pcie_aer_stats_no_err = \ +""" +{'correctable': {'field1': '0', 'field2': '0'}, + 'fatal': {'field3': '0', 'field4': '0'}, + 'non_fatal': {'field5': '0', 'field6': '0'}} +""" + +pcie_aer_stats_err = \ +""" +{'correctable': {'field1': '1', 'field2': '0'}, + 'fatal': {'field3': '0', 'field4': '1'}, + 'non_fatal': {'field5': '0', 'field6': '1'}} +""" + +pcie_device_list = \ +""" +[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A'}, + {'bus': '00', 'dev': '02', 'fn': '0', 'id': '1f11', 'name': 'PCI B'}, + {'bus': '00', 'dev': '03', 'fn': '0', 'id': '1f13', 'name': 'PCI C'}] +""" + +pcie_check_result_no = [] + +pcie_check_result_pass = \ +""" +[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'}, + {'bus': '00', 'dev': '02', 'fn': '0', 'id': '1f11', 'name': 'PCI B', 'result': 'Passed'}, + {'bus': '00', 'dev': '03', 'fn': '0', 'id': '1f12', 'name': 'PCI C', 'result': 'Passed'}] +""" + +pcie_check_result_fail = \ +""" +[{'bus': '00', 'dev': '01', 'fn': '0', 'id': '1f10', 'name': 'PCI A', 'result': 'Passed'}, + {'bus': '00', 'dev': '02', 'fn': '0', 'id': '1f11', 'name': 'PCI B', 'result': 'Passed'}, + {'bus': '00', 'dev': '03', 'fn': '0', 'id': '1f12', 'name': 'PCI C', 'result': 'Failed'}] +""" + +class TestDaemonPcied(object): + """ + Test cases to cover functionality in DaemonPcied class + """ + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + def test_signal_handler(self): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.stop_event.set = mock.MagicMock() + daemon_pcied.log_info = mock.MagicMock() + daemon_pcied.log_warning = mock.MagicMock() + + # Test SIGHUP + daemon_pcied.signal_handler(pcied.signal.SIGHUP, None) + assert daemon_pcied.log_info.call_count == 1 + daemon_pcied.log_info.assert_called_with("Caught signal 'SIGHUP' - ignoring...") + assert daemon_pcied.log_warning.call_count == 0 + assert daemon_pcied.stop_event.set.call_count == 0 + assert pcied.exit_code == 0 + + # Reset + daemon_pcied.log_info.reset_mock() + daemon_pcied.log_warning.reset_mock() + daemon_pcied.stop_event.set.reset_mock() + + # Test SIGINT + test_signal = pcied.signal.SIGINT + daemon_pcied.signal_handler(test_signal, None) + assert daemon_pcied.log_info.call_count == 1 + daemon_pcied.log_info.assert_called_with("Caught signal 'SIGINT' - exiting...") + assert daemon_pcied.log_warning.call_count == 0 + assert daemon_pcied.stop_event.set.call_count == 1 + assert pcied.exit_code == (128 + test_signal) + + # Reset + daemon_pcied.log_info.reset_mock() + daemon_pcied.log_warning.reset_mock() + daemon_pcied.stop_event.set.reset_mock() + + # Test SIGTERM + test_signal = pcied.signal.SIGTERM + daemon_pcied.signal_handler(test_signal, None) + assert daemon_pcied.log_info.call_count == 1 + daemon_pcied.log_info.assert_called_with("Caught signal 'SIGTERM' - exiting...") + assert daemon_pcied.log_warning.call_count == 0 + assert daemon_pcied.stop_event.set.call_count == 1 + assert pcied.exit_code == (128 + test_signal) + + # Reset + daemon_pcied.log_info.reset_mock() + daemon_pcied.log_warning.reset_mock() + daemon_pcied.stop_event.set.reset_mock() + pcied.exit_code = 0 + + # Test an unhandled signal + daemon_pcied.signal_handler(pcied.signal.SIGUSR1, None) + assert daemon_pcied.log_warning.call_count == 1 + daemon_pcied.log_warning.assert_called_with("Caught unhandled signal 'SIGUSR1' - ignoring...") + assert daemon_pcied.log_info.call_count == 0 + assert daemon_pcied.stop_event.set.call_count == 0 + assert pcied.exit_code == 0 + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + def test_run(self): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.check_pcie_devices = mock.MagicMock() + + daemon_pcied.run() + assert daemon_pcied.check_pcie_devices.call_count == 1 + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + def test_check_pcie_devices(self): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.update_pcie_devices_status_db = mock.MagicMock() + daemon_pcied.check_n_update_pcie_aer_stats = mock.MagicMock() + pcied.platform_pcieutil.get_pcie_check = mock.MagicMock() + + daemon_pcied.check_pcie_devices() + assert daemon_pcied.update_pcie_devices_status_db.call_count == 1 + assert daemon_pcied.check_n_update_pcie_aer_stats.call_count == 0 + + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + def test_update_pcie_devices_status_db(self): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.status_table = mock.MagicMock() + daemon_pcied.log_info = mock.MagicMock() + daemon_pcied.log_error = mock.MagicMock() + + # test for pass resultInfo + daemon_pcied.update_pcie_devices_status_db(0) + assert daemon_pcied.status_table.set.call_count == 1 + assert daemon_pcied.log_info.call_count == 1 + assert daemon_pcied.log_error.call_count == 0 + + daemon_pcied.status_table.set.reset_mock() + daemon_pcied.log_info.reset_mock() + + # test for resultInfo with 1 device failed to detect + daemon_pcied.update_pcie_devices_status_db(1) + assert daemon_pcied.status_table.set.call_count == 1 + assert daemon_pcied.log_info.call_count == 0 + assert daemon_pcied.log_error.call_count == 1 + + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + @mock.patch('pcied.read_id_file') + def test_check_n_update_pcie_aer_stats(self, mock_read): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.device_table = mock.MagicMock() + daemon_pcied.update_aer_to_statedb = mock.MagicMock() + pcied.platform_pcieutil.get_pcie_aer_stats = mock.MagicMock() + + mock_read.return_value = None + daemon_pcied.check_n_update_pcie_aer_stats(0,1,0) + assert daemon_pcied.update_aer_to_statedb.call_count == 0 + assert daemon_pcied.device_table.set.call_count == 0 + assert pcied.platform_pcieutil.get_pcie_aer_stats.call_count == 0 + + mock_read.return_value = '1714' + daemon_pcied.check_n_update_pcie_aer_stats(0,1,0) + assert daemon_pcied.update_aer_to_statedb.call_count == 1 + assert daemon_pcied.device_table.set.call_count == 1 + assert pcied.platform_pcieutil.get_pcie_aer_stats.call_count == 1 + + + @mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) + def test_update_aer_to_statedb(self): + daemon_pcied = pcied.DaemonPcied(SYSLOG_IDENTIFIER) + daemon_pcied.log_debug = mock.MagicMock() + daemon_pcied.device_table = mock.MagicMock() + daemon_pcied.device_name = mock.MagicMock() + daemon_pcied.aer_stats = mock.MagicMock() + + + mocked_expected_fvp = pcied.swsscommon.FieldValuePairs( + [("correctable|field1", '0'), + ("correctable|field2", '0'), + ("fatal|field3", '0'), + ("fatal|field4", '0'), + ("non_fatal|field5", '0'), + ("non_fatal|field6", '0'), + ]) + + daemon_pcied.update_aer_to_statedb() + assert daemon_pcied.log_debug.call_count == 1 + assert daemon_pcied.device_table.set.call_count == 0 + + daemon_pcied.device_table.set.reset_mock() diff --git a/sonic-pcied/tests/test_pcied.py b/sonic-pcied/tests/test_pcied.py new file mode 100644 index 000000000..f3b3e78e9 --- /dev/null +++ b/sonic-pcied/tests/test_pcied.py @@ -0,0 +1,43 @@ +import os +import sys +from imp import load_source # Replace with importlib once we no longer need to support Python 2 + +import pytest + +# TODO: Clean this up once we no longer need to support Python 2 +if sys.version_info.major == 3: + from unittest import mock +else: + import mock +from sonic_py_common import daemon_base, device_info + +from .mock_platform import MockPcieUtil + +tests_path = os.path.dirname(os.path.abspath(__file__)) + +# Add mocked_libs path so that the file under test can load mocked modules from there +mocked_libs_path = os.path.join(tests_path, "mocked_libs") +sys.path.insert(0, mocked_libs_path) + +# Add path to the file under test so that we can load it +modules_path = os.path.dirname(tests_path) +scripts_path = os.path.join(modules_path, "scripts") +sys.path.insert(0, modules_path) +load_source('pcied', os.path.join(scripts_path, 'pcied')) +import pcied + + +daemon_base.db_connect = mock.MagicMock() + + +SYSLOG_IDENTIFIER = 'pcied_test' +NOT_AVAILABLE = 'N/A' + + +@mock.patch('pcied.load_platform_pcieutil', mock.MagicMock()) +@mock.patch('pcied.DaemonPcied.run') +def test_main(mock_run): + mock_run.return_value = False + + pcied.main() + assert mock_run.call_count == 1 diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index 8a41fc054..6be6f2934 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -48,6 +48,7 @@ PSU_INFO_KEY_TEMPLATE = 'PSU {}' PSU_INFO_PRESENCE_FIELD = 'presence' PSU_INFO_MODEL_FIELD = 'model' PSU_INFO_SERIAL_FIELD = 'serial' +PSU_INFO_REV_FIELD = 'revision' PSU_INFO_STATUS_FIELD = 'status' PSU_INFO_TEMP_FIELD = 'temp' PSU_INFO_TEMP_TH_FIELD = 'temp_threshold' @@ -510,6 +511,7 @@ class DaemonPsud(daemon_base.DaemonBase): fvs = swsscommon.FieldValuePairs( [(PSU_INFO_MODEL_FIELD, str(try_get(psu.get_model, NOT_AVAILABLE))), (PSU_INFO_SERIAL_FIELD, str(try_get(psu.get_serial, NOT_AVAILABLE))), + (PSU_INFO_REV_FIELD, str(try_get(psu.get_revision, NOT_AVAILABLE))), (PSU_INFO_TEMP_FIELD, str(temperature)), (PSU_INFO_TEMP_TH_FIELD, str(temperature_threshold)), (PSU_INFO_VOLTAGE_FIELD, str(voltage)), diff --git a/sonic-psud/tests/mock_platform.py b/sonic-psud/tests/mock_platform.py index 6a0bb3517..8d3b13c52 100644 --- a/sonic-psud/tests/mock_platform.py +++ b/sonic-psud/tests/mock_platform.py @@ -273,6 +273,7 @@ def __init__(self, presence=True, model='Module Model', serial='Module Serial', + revision='Module Revision', status=True, voltage=12.0, current=8.0, @@ -287,6 +288,7 @@ def __init__(self, self._presence = presence self._model = model self._serial = serial + self._revision = revision self._status = status self._position_in_parent = position_in_parent self._replaceable = replaceable @@ -365,6 +367,9 @@ def get_model(self): def get_serial(self): return self._serial + def get_revision(self): + return self._revision + def get_status(self): return self._status diff --git a/sonic-psud/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-psud/tests/mocked_libs/swsscommon/swsscommon.py index 6947a8601..13c49dec1 100644 --- a/sonic-psud/tests/mocked_libs/swsscommon/swsscommon.py +++ b/sonic-psud/tests/mocked_libs/swsscommon/swsscommon.py @@ -2,6 +2,8 @@ Mock implementation of swsscommon package for unit testing ''' +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector + STATE_DB = '' diff --git a/sonic-psud/tests/test_DaemonPsud.py b/sonic-psud/tests/test_DaemonPsud.py index 116886657..06033f36e 100644 --- a/sonic-psud/tests/test_DaemonPsud.py +++ b/sonic-psud/tests/test_DaemonPsud.py @@ -10,14 +10,12 @@ from unittest import mock else: import mock -from sonic_py_common import daemon_base from .mock_platform import MockChassis, MockFan, MockPsu SYSLOG_IDENTIFIER = 'psud_test' NOT_AVAILABLE = 'N/A' -daemon_base.db_connect = mock.MagicMock() tests_path = os.path.dirname(os.path.abspath(__file__)) @@ -25,6 +23,9 @@ mocked_libs_path = os.path.join(tests_path, "mocked_libs") sys.path.insert(0, mocked_libs_path) +from sonic_py_common import daemon_base +daemon_base.db_connect = mock.MagicMock() + # Add path to the file under test so that we can load it modules_path = os.path.dirname(tests_path) scripts_path = os.path.join(modules_path, "scripts") @@ -148,13 +149,14 @@ def test_update_single_psu_data(self): psud._wrapper_get_psu_presence.return_value = True psud._wrapper_get_psu_status.return_value = True - psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678') + psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234') psud.platform_chassis = MockChassis() psud.platform_chassis._psu_list.append(psu1) expected_fvp = psud.swsscommon.FieldValuePairs( [(psud.PSU_INFO_MODEL_FIELD, 'Fake Model'), (psud.PSU_INFO_SERIAL_FIELD, '12345678'), + (psud.PSU_INFO_REV_FIELD, '1234'), (psud.PSU_INFO_TEMP_FIELD, '30.0'), (psud.PSU_INFO_TEMP_TH_FIELD, '50.0'), (psud.PSU_INFO_VOLTAGE_FIELD, '12.0'), diff --git a/sonic-psud/tests/test_psud.py b/sonic-psud/tests/test_psud.py index a66a8c29a..405ec30c6 100644 --- a/sonic-psud/tests/test_psud.py +++ b/sonic-psud/tests/test_psud.py @@ -11,7 +11,7 @@ import mock from sonic_py_common import daemon_base -from .mock_platform import MockPsu +from .mock_platform import MockPsu, MockChassis tests_path = os.path.dirname(os.path.abspath(__file__)) @@ -179,6 +179,7 @@ def test_log_on_status_changed(): mock_logger.log_warning.assert_called_with(abnormal_log) +@mock.patch('psud.platform_chassis', mock.MagicMock()) @mock.patch('psud.DaemonPsud.run') def test_main(mock_run): mock_run.return_value = False diff --git a/sonic-syseepromd/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-syseepromd/tests/mocked_libs/swsscommon/swsscommon.py index 8a0a87692..13c49dec1 100644 --- a/sonic-syseepromd/tests/mocked_libs/swsscommon/swsscommon.py +++ b/sonic-syseepromd/tests/mocked_libs/swsscommon/swsscommon.py @@ -2,6 +2,8 @@ Mock implementation of swsscommon package for unit testing ''' +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector + STATE_DB = '' @@ -23,6 +25,9 @@ def get(self, key): return self.mock_dict[key] return None + def get_size(self): + return (len(self.mock_dict)) + class FieldValuePairs: fv_dict = {} diff --git a/sonic-syseepromd/tests/test_syseepromd.py b/sonic-syseepromd/tests/test_syseepromd.py index e25b94ce3..4ac106b8a 100644 --- a/sonic-syseepromd/tests/test_syseepromd.py +++ b/sonic-syseepromd/tests/test_syseepromd.py @@ -9,19 +9,19 @@ from unittest import mock else: import mock -from sonic_py_common import daemon_base SYSLOG_IDENTIFIER = 'syseepromd_test' NOT_AVAILABLE = 'N/A' -daemon_base.db_connect = mock.MagicMock() - tests_path = os.path.dirname(os.path.abspath(__file__)) # Add mocked_libs path so that the file under test can load mocked modules from there mocked_libs_path = os.path.join(tests_path, 'mocked_libs') sys.path.insert(0, mocked_libs_path) +from sonic_py_common import daemon_base +daemon_base.db_connect = mock.MagicMock() + # Add path to the file under test so that we can load it modules_path = os.path.dirname(tests_path) scripts_path = os.path.join(modules_path, 'scripts') diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 70012f857..bc25923db 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -191,7 +191,7 @@ class FanUpdater(logger.Logger): FAN_INFO_TABLE_NAME = 'FAN_INFO' FAN_DRAWER_INFO_TABLE_NAME = 'FAN_DRAWER_INFO' - def __init__(self, chassis): + def __init__(self, chassis, task_stopping_event): """ Initializer for FanUpdater :param chassis: Object representing a platform chassis @@ -199,6 +199,7 @@ class FanUpdater(logger.Logger): super(FanUpdater, self).__init__(SYSLOG_IDENTIFIER) self.chassis = chassis + self.task_stopping_event = task_stopping_event self.fan_status_dict = {} state_db = daemon_base.db_connect("STATE_DB") self.table = swsscommon.Table(state_db, FanUpdater.FAN_INFO_TABLE_NAME) @@ -236,8 +237,12 @@ class FanUpdater(logger.Logger): FanStatus.reset_fan_counter() for drawer_index, drawer in enumerate(self.chassis.get_all_fan_drawers()): + if self.task_stopping_event.is_set(): + return self._refresh_fan_drawer_status(drawer, drawer_index) for fan_index, fan in enumerate(drawer.get_all_fans()): + if self.task_stopping_event.is_set(): + return try: self._refresh_fan_status(drawer, drawer_index, fan, fan_index) except Exception as e: @@ -245,6 +250,8 @@ class FanUpdater(logger.Logger): for psu_index, psu in enumerate(self.chassis.get_all_psus()): for fan_index, fan in enumerate(psu.get_all_fans()): + if self.task_stopping_event.is_set(): + return try: self._refresh_fan_status(psu, psu_index, fan, fan_index, True) except Exception as e: @@ -396,6 +403,8 @@ class FanUpdater(logger.Logger): def _update_led_color(self): for fan_name, fan_status in self.fan_status_dict.items(): + if self.task_stopping_event.is_set(): + return try: fvs = swsscommon.FieldValuePairs([ ('led_status', str(try_get(fan_status.fan.get_status_led))) @@ -408,6 +417,8 @@ class FanUpdater(logger.Logger): self.table.set(fan_name, fvs) for drawer in self.chassis.get_all_fan_drawers(): + if self.task_stopping_event.is_set(): + return drawer_name = try_get(drawer.get_name) if drawer_name == NOT_AVAILABLE: continue @@ -510,7 +521,7 @@ class TemperatureUpdater(logger.Logger): # Temperature information table name in database TEMPER_INFO_TABLE_NAME = 'TEMPERATURE_INFO' - def __init__(self, chassis): + def __init__(self, chassis, task_stopping_event): """ Initializer of TemperatureUpdater :param chassis: Object representing a platform chassis @@ -518,6 +529,7 @@ class TemperatureUpdater(logger.Logger): super(TemperatureUpdater, self).__init__(SYSLOG_IDENTIFIER) self.chassis = chassis + self.task_stopping_event = task_stopping_event self.temperature_status_dict = {} state_db = daemon_base.db_connect("STATE_DB") self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME) @@ -562,6 +574,8 @@ class TemperatureUpdater(logger.Logger): """ self.log_debug("Start temperature updating") for index, thermal in enumerate(self.chassis.get_all_thermals()): + if self.task_stopping_event.is_set(): + return try: self._refresh_temperature_status(CHASSIS_INFO_KEY, thermal, index) except Exception as e: @@ -570,6 +584,8 @@ class TemperatureUpdater(logger.Logger): for psu_index, psu in enumerate(self.chassis.get_all_psus()): parent_name = 'PSU {}'.format(psu_index + 1) for thermal_index, thermal in enumerate(psu.get_all_thermals()): + if self.task_stopping_event.is_set(): + return try: self._refresh_temperature_status(parent_name, thermal, thermal_index) except Exception as e: @@ -578,6 +594,8 @@ class TemperatureUpdater(logger.Logger): for sfp_index, sfp in enumerate(self.chassis.get_all_sfps()): parent_name = 'SFP {}'.format(sfp_index + 1) for thermal_index, thermal in enumerate(sfp.get_all_thermals()): + if self.task_stopping_event.is_set(): + return try: self._refresh_temperature_status(parent_name, thermal, thermal_index) except Exception as e: @@ -686,8 +704,8 @@ class ThermalMonitor(ProcessTaskBase): # Set minimum logging level to INFO self.logger.set_min_log_priority_info() - self.fan_updater = FanUpdater(chassis) - self.temperature_updater = TemperatureUpdater(chassis) + self.fan_updater = FanUpdater(chassis, self.task_stopping_event) + self.temperature_updater = TemperatureUpdater(chassis, self.task_stopping_event) def main(self): begin = time.time() @@ -791,6 +809,8 @@ class ThermalControlDaemon(daemon_base.DaemonBase): if sig in FATAL_SIGNALS: self.log_info("Caught signal '{}' - exiting...".format(SIGNALS_TO_NAMES_DICT[sig])) exit_code = 128 + sig # Make sure we exit with a non-zero code so that supervisor will try to restart us + self.thermal_monitor.task_stop() + self.thermal_manager.stop() self.stop_event.set() elif sig in NONFATAL_SIGNALS: self.log_info("Caught signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) diff --git a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py index 6947a8601..13c49dec1 100644 --- a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py +++ b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py @@ -2,6 +2,8 @@ Mock implementation of swsscommon package for unit testing ''' +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector + STATE_DB = '' diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index d2f647384..063e11c22 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -1,5 +1,6 @@ import os import sys +import multiprocessing from imp import load_source # TODO: Replace with importlib once we no longer need to support Python 2 # TODO: Clean this up once we no longer need to support Python 2 @@ -9,18 +10,24 @@ import mock import pytest -from sonic_py_common import daemon_base - -from .mock_platform import MockChassis, MockFan, MockPsu, MockSfp, MockThermal - -daemon_base.db_connect = mock.MagicMock() - tests_path = os.path.dirname(os.path.abspath(__file__)) # Add mocked_libs path so that the file under test can load mocked modules from there mocked_libs_path = os.path.join(tests_path, 'mocked_libs') sys.path.insert(0, mocked_libs_path) + +import swsscommon +# Check we are using the mocked package +assert len(swsscommon.__path__) == 1 +assert(os.path.samefile(swsscommon.__path__[0], os.path.join(mocked_libs_path, 'swsscommon'))) + +from sonic_py_common import daemon_base + +from .mock_platform import MockChassis, MockFan, MockPsu, MockSfp, MockThermal + +daemon_base.db_connect = mock.MagicMock() + # Add path to the file under test so that we can load it modules_path = os.path.dirname(tests_path) scripts_path = os.path.join(modules_path, 'scripts') @@ -148,7 +155,7 @@ class TestFanUpdater(object): Test cases to cover functionality in FanUpdater class """ def test_deinit(self): - fan_updater = thermalctld.FanUpdater(MockChassis()) + fan_updater = thermalctld.FanUpdater(MockChassis(), multiprocessing.Event()) fan_updater.fan_status_dict = {'key1': 'value1', 'key2': 'value2'} fan_updater.table._del = mock.MagicMock() @@ -161,7 +168,7 @@ def test_deinit(self): @mock.patch('thermalctld.update_entity_info', mock.MagicMock()) def test_refresh_fan_drawer_status_fan_drawer_get_name_not_impl(self): # Test case where fan_drawer.get_name is not implemented - fan_updater = thermalctld.FanUpdater(MockChassis()) + fan_updater = thermalctld.FanUpdater(MockChassis(), multiprocessing.Event()) mock_fan_drawer = mock.MagicMock() fan_updater._refresh_fan_drawer_status(mock_fan_drawer, 1) assert thermalctld.update_entity_info.call_count == 0 @@ -175,7 +182,7 @@ def test_update_fan_with_exception(self): fan.make_over_speed() chassis.get_all_fans().append(fan) - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() assert fan.get_status_led() == MockFan.STATUS_LED_COLOR_RED assert fan_updater.log_warning.call_count == 1 @@ -192,7 +199,7 @@ def test_set_fan_led_exception(self): mock_fan = MockFan() mock_fan.set_status_led = mock.MagicMock(side_effect=NotImplementedError) - fan_updater = thermalctld.FanUpdater(MockChassis()) + fan_updater = thermalctld.FanUpdater(MockChassis(), multiprocessing.Event()) fan_updater._set_fan_led(mock_fan_drawer, mock_fan, 'Test Fan', fan_status) assert fan_updater.log_warning.call_count == 1 fan_updater.log_warning.assert_called_with('Failed to set status LED for fan Test Fan, set_status_led not implemented') @@ -200,7 +207,7 @@ def test_set_fan_led_exception(self): def test_fan_absent(self): chassis = MockChassis() chassis.make_absent_fan() - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED @@ -224,7 +231,7 @@ def test_fan_absent(self): def test_fan_faulty(self): chassis = MockChassis() chassis.make_faulty_fan() - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED @@ -248,7 +255,7 @@ def test_fan_faulty(self): def test_fan_under_speed(self): chassis = MockChassis() chassis.make_under_speed_fan() - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED @@ -264,7 +271,7 @@ def test_fan_under_speed(self): def test_fan_over_speed(self): chassis = MockChassis() chassis.make_over_speed_fan() - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED @@ -283,7 +290,7 @@ def test_update_psu_fans(self): mock_fan = MockFan() psu._fan_list.append(mock_fan) chassis._psu_list.append(psu) - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() assert fan_updater.log_warning.call_count == 0 @@ -331,7 +338,7 @@ def test_insufficient_fan_number(): chassis = MockChassis() chassis.make_absent_fan() chassis.make_faulty_fan() - fan_updater = thermalctld.FanUpdater(chassis) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) fan_updater.update() assert fan_updater.log_warning.call_count == 3 expected_calls = [ @@ -415,7 +422,7 @@ class TestTemperatureUpdater(object): """ def test_deinit(self): chassis = MockChassis() - temp_updater = thermalctld.TemperatureUpdater(chassis) + temp_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temp_updater.temperature_status_dict = {'key1': 'value1', 'key2': 'value2'} temp_updater.table._del = mock.MagicMock() @@ -423,11 +430,12 @@ def test_deinit(self): assert temp_updater.table._del.call_count == 2 expected_calls = [mock.call('key1'), mock.call('key2')] temp_updater.table._del.assert_has_calls(expected_calls, any_order=True) + def test_over_temper(self): chassis = MockChassis() chassis.make_over_temper_thermal() - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() thermal_list = chassis.get_all_thermals() assert temperature_updater.log_warning.call_count == 1 @@ -441,7 +449,7 @@ def test_over_temper(self): def test_under_temper(self): chassis = MockChassis() chassis.make_under_temper_thermal() - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() thermal_list = chassis.get_all_thermals() assert temperature_updater.log_warning.call_count == 1 @@ -458,7 +466,7 @@ def test_update_psu_thermals(self): mock_thermal = MockThermal() psu._thermal_list.append(mock_thermal) chassis._psu_list.append(psu) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() assert temperature_updater.log_warning.call_count == 0 @@ -478,7 +486,7 @@ def test_update_sfp_thermals(self): mock_thermal = MockThermal() sfp._thermal_list.append(mock_thermal) chassis._sfp_list.append(sfp) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() assert temperature_updater.log_warning.call_count == 0 @@ -499,7 +507,7 @@ def test_update_thermal_with_exception(self): thermal.make_over_temper() chassis.get_all_thermals().append(thermal) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() assert temperature_updater.log_warning.call_count == 2 @@ -524,17 +532,17 @@ def test_updater_thermal_check_modular_chassis(): chassis = MockChassis() assert chassis.is_modular_chassis() == False - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) assert temperature_updater.chassis_table == None chassis.set_modular_chassis(True) chassis.set_my_slot(-1) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) assert temperature_updater.chassis_table == None my_slot = 1 chassis.set_my_slot(my_slot) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) assert temperature_updater.chassis_table != None assert temperature_updater.chassis_table.table_name == '{}_{}'.format(TEMPER_INFO_TABLE_NAME, str(my_slot)) @@ -547,7 +555,7 @@ def test_updater_thermal_check_chassis_table(): chassis.set_modular_chassis(True) chassis.set_my_slot(1) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals() @@ -566,7 +574,7 @@ def test_updater_thermal_check_min_max(): chassis.set_modular_chassis(True) chassis.set_my_slot(1) - temperature_updater = thermalctld.TemperatureUpdater(chassis) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() slot_dict = temperature_updater.chassis_table.get(thermal.get_name()) @@ -580,12 +588,14 @@ def test_signal_handler(): daemon_thermalctld.stop_event.set = mock.MagicMock() daemon_thermalctld.log_info = mock.MagicMock() daemon_thermalctld.log_warning = mock.MagicMock() + daemon_thermalctld.thermal_manager.stop = mock.MagicMock() daemon_thermalctld.signal_handler(thermalctld.signal.SIGHUP, None) daemon_thermalctld.deinit() # Deinit becuase the test will hang if we assert assert daemon_thermalctld.log_info.call_count == 1 daemon_thermalctld.log_info.assert_called_with("Caught signal 'SIGHUP' - ignoring...") assert daemon_thermalctld.log_warning.call_count == 0 assert daemon_thermalctld.stop_event.set.call_count == 0 + assert daemon_thermalctld.thermal_manager.stop.call_count == 0 assert thermalctld.exit_code == thermalctld.ERR_UNKNOWN # Test SIGINT @@ -593,6 +603,7 @@ def test_signal_handler(): daemon_thermalctld.stop_event.set = mock.MagicMock() daemon_thermalctld.log_info = mock.MagicMock() daemon_thermalctld.log_warning = mock.MagicMock() + daemon_thermalctld.thermal_manager.stop = mock.MagicMock() test_signal = thermalctld.signal.SIGINT daemon_thermalctld.signal_handler(test_signal, None) daemon_thermalctld.deinit() # Deinit becuase the test will hang if we assert @@ -600,6 +611,7 @@ def test_signal_handler(): daemon_thermalctld.log_info.assert_called_with("Caught signal 'SIGINT' - exiting...") assert daemon_thermalctld.log_warning.call_count == 0 assert daemon_thermalctld.stop_event.set.call_count == 1 + assert daemon_thermalctld.thermal_manager.stop.call_count == 1 assert thermalctld.exit_code == (128 + test_signal) # Test SIGTERM @@ -608,6 +620,7 @@ def test_signal_handler(): daemon_thermalctld.stop_event.set = mock.MagicMock() daemon_thermalctld.log_info = mock.MagicMock() daemon_thermalctld.log_warning = mock.MagicMock() + daemon_thermalctld.thermal_manager.stop = mock.MagicMock() test_signal = thermalctld.signal.SIGTERM daemon_thermalctld.signal_handler(test_signal, None) daemon_thermalctld.deinit() # Deinit becuase the test will hang if we assert @@ -615,6 +628,7 @@ def test_signal_handler(): daemon_thermalctld.log_info.assert_called_with("Caught signal 'SIGTERM' - exiting...") assert daemon_thermalctld.log_warning.call_count == 0 assert daemon_thermalctld.stop_event.set.call_count == 1 + assert daemon_thermalctld.thermal_manager.stop.call_count == 1 assert thermalctld.exit_code == (128 + test_signal) # Test an unhandled signal @@ -623,12 +637,14 @@ def test_signal_handler(): daemon_thermalctld.stop_event.set = mock.MagicMock() daemon_thermalctld.log_info = mock.MagicMock() daemon_thermalctld.log_warning = mock.MagicMock() + daemon_thermalctld.thermal_manager.stop = mock.MagicMock() daemon_thermalctld.signal_handler(thermalctld.signal.SIGUSR1, None) daemon_thermalctld.deinit() # Deinit becuase the test will hang if we assert assert daemon_thermalctld.log_warning.call_count == 1 daemon_thermalctld.log_warning.assert_called_with("Caught unhandled signal 'SIGUSR1' - ignoring...") assert daemon_thermalctld.log_info.call_count == 0 assert daemon_thermalctld.stop_event.set.call_count == 0 + assert daemon_thermalctld.thermal_manager.stop.call_count == 0 assert thermalctld.exit_code == thermalctld.ERR_UNKNOWN diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index a4d64b68d..507890162 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1,10 +1,7 @@ import os import sys -import subprocess -import pytest import unittest -from imp import load_source if sys.version_info >= (3, 3): from unittest.mock import MagicMock, patch else: @@ -12,6 +9,7 @@ from sonic_py_common import daemon_base from swsscommon import swsscommon +from sonic_platform_base.sfp_base import SfpBase from .mock_swsscommon import Table @@ -24,13 +22,12 @@ test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) scripts_path = os.path.join(modules_path, "xcvrd") -helper_file_path = os.path.join(scripts_path, "xcvrd_utilities"+"/y_cable_helper.py") sys.path.insert(0, modules_path) os.environ["XCVRD_UNIT_TESTING"] = "1" -load_source('y_cable_helper', scripts_path + '/xcvrd_utilities/y_cable_helper.py') -from y_cable_helper import * from xcvrd.xcvrd import * +from xcvrd.xcvrd_utilities.y_cable_helper import * +from xcvrd.xcvrd_utilities.sfp_status_helper import * class TestXcvrdScript(object): @@ -219,9 +216,9 @@ def test_init_port_sfp_status_tbl(self): init_port_sfp_status_tbl(stop_event) @patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0])) - @patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) - @patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) - @patch('y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self', + @patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) + @patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_info', MagicMock(return_value={'tor_active': 'self', 'mux_direction': 'self', 'manual_switch_count': '7', 'auto_switch_count': '71', @@ -258,9 +255,9 @@ def test_post_port_mux_info_to_db(self): assert(rc != -1) @patch('xcvrd.xcvrd_utilities.y_cable_helper.y_cable_platform_sfputil', MagicMock(return_value=[0])) - @patch('y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) - @patch('y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) - @patch('y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self', + @patch('xcvrd.xcvrd_utilities.y_cable_helper.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd_utilities.y_cable_helper._wrapper_get_presence', MagicMock(return_value=True)) + @patch('xcvrd.xcvrd_utilities.y_cable_helper.get_muxcable_static_info', MagicMock(return_value={'read_side': 'self', 'nic_lane1_precursor1': '1', 'nic_lane1_precursor2': '-7', 'nic_lane1_maincursor': '-1', @@ -316,5 +313,25 @@ def test_get_media_settings_key(self): # Test a bad 'specification_compliance' value xcvr_info_dict[0]['specification_compliance'] = 'N/A' result = get_media_settings_key(0, xcvr_info_dict) - assert result == ['MOLEX-1064141421', 'QSFP+'] + assert result == ['MOLEX-1064141421', 'QSFP+-*'] # TODO: Ensure that error message was logged + + def test_detect_port_in_error_status(self): + class MockTable: + def get(self, key): + pass + + status_tbl = MockTable() + status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'})) + assert not detect_port_in_error_status(None, status_tbl) + + status_tbl.get = MagicMock(return_value=(True, {'error': SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING})) + assert detect_port_in_error_status(None, status_tbl) + + def test_is_error_sfp_status(self): + error_values = [7, 11, 19, 35] + for error_value in error_values: + assert is_error_block_eeprom_reading(error_value) + + assert not is_error_block_eeprom_reading(int(SFP_STATUS_INSERTED)) + assert not is_error_block_eeprom_reading(int(SFP_STATUS_REMOVED)) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 0ae0a0cfd..563145a8e 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -15,11 +15,11 @@ import threading import time - from enum import Enum from sonic_py_common import daemon_base, device_info, logger from sonic_py_common import multi_asic from swsscommon import swsscommon + from .xcvrd_utilities import sfp_status_helper from .xcvrd_utilities import y_cable_helper except ImportError as e: raise ImportError(str(e) + " - required module not found") @@ -43,18 +43,6 @@ TIME_FOR_SFP_READY_SECS = 1 XCVRD_MAIN_THREAD_SLEEP_SECS = 60 -# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase -SFP_STATUS_REMOVED = '0' -SFP_STATUS_INSERTED = '1' - -# SFP error code enum, new elements can be added to the enum if new errors need to be supported. -SFP_STATUS_ERR_ENUM = Enum('SFP_STATUS_ERR_ENUM', ['SFP_STATUS_ERR_I2C_STUCK', 'SFP_STATUS_ERR_BAD_EEPROM', - 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', 'SFP_STATUS_ERR_HIGH_TEMP', - 'SFP_STATUS_ERR_BAD_CABLE'], start=2) - -# Convert the error code to string and store them in a set for convenience -errors_block_eeprom_reading = set(str(error_code.value) for error_code in SFP_STATUS_ERR_ENUM) - EVENT_ON_ALL_SFP = '-1' # events definition SYSTEM_NOT_READY = 'system_not_ready' @@ -188,11 +176,13 @@ def _wrapper_get_transceiver_change_event(timeout): if platform_chassis is not None: try: status, events = platform_chassis.get_change_event(timeout) - sfp_events = events['sfp'] - return status, sfp_events + sfp_events = events.get('sfp') + sfp_errors = events.get('sfp_error') + return status, sfp_events, sfp_errors except NotImplementedError: pass - return platform_sfputil.get_transceiver_change_event(timeout) + status, events = platform_sfputil.get_transceiver_change_event(timeout) + return status, events, None def _wrapper_get_sfp_type(physical_port): @@ -203,10 +193,18 @@ def _wrapper_get_sfp_type(physical_port): pass return None + +def _wrapper_get_sfp_error_description(physical_port): + if platform_chassis: + try: + return platform_chassis.get_sfp(physical_port).get_error_description() + except NotImplementedError: + pass + return None # Remove unnecessary unit from the raw data -def beautify_dom_info_dict(dom_info_dict): +def beautify_dom_info_dict(dom_info_dict, physical_port): dom_info_dict['temperature'] = strip_unit_and_beautify(dom_info_dict['temperature'], TEMP_UNIT) dom_info_dict['voltage'] = strip_unit_and_beautify(dom_info_dict['voltage'], VOLT_UNIT) dom_info_dict['rx1power'] = strip_unit_and_beautify(dom_info_dict['rx1power'], POWER_UNIT) @@ -221,6 +219,19 @@ def beautify_dom_info_dict(dom_info_dict): dom_info_dict['tx2power'] = strip_unit_and_beautify(dom_info_dict['tx2power'], POWER_UNIT) dom_info_dict['tx3power'] = strip_unit_and_beautify(dom_info_dict['tx3power'], POWER_UNIT) dom_info_dict['tx4power'] = strip_unit_and_beautify(dom_info_dict['tx4power'], POWER_UNIT) + if _wrapper_get_sfp_type(physical_port) == 'QSFP_DD': + dom_info_dict['rx5power'] = strip_unit_and_beautify(dom_info_dict['rx5power'], POWER_UNIT) + dom_info_dict['rx6power'] = strip_unit_and_beautify(dom_info_dict['rx6power'], POWER_UNIT) + dom_info_dict['rx7power'] = strip_unit_and_beautify(dom_info_dict['rx7power'], POWER_UNIT) + dom_info_dict['rx8power'] = strip_unit_and_beautify(dom_info_dict['rx8power'], POWER_UNIT) + dom_info_dict['tx5bias'] = strip_unit_and_beautify(dom_info_dict['tx5bias'], BIAS_UNIT) + dom_info_dict['tx6bias'] = strip_unit_and_beautify(dom_info_dict['tx6bias'], BIAS_UNIT) + dom_info_dict['tx7bias'] = strip_unit_and_beautify(dom_info_dict['tx7bias'], BIAS_UNIT) + dom_info_dict['tx8bias'] = strip_unit_and_beautify(dom_info_dict['tx8bias'], BIAS_UNIT) + dom_info_dict['tx5power'] = strip_unit_and_beautify(dom_info_dict['tx5power'], POWER_UNIT) + dom_info_dict['tx6power'] = strip_unit_and_beautify(dom_info_dict['tx6power'], POWER_UNIT) + dom_info_dict['tx7power'] = strip_unit_and_beautify(dom_info_dict['tx7power'], POWER_UNIT) + dom_info_dict['tx8power'] = strip_unit_and_beautify(dom_info_dict['tx8power'], POWER_UNIT) def beautify_dom_threshold_info_dict(dom_info_dict): @@ -399,7 +410,7 @@ def post_port_dom_info_to_db(logical_port_name, table, stop_event=threading.Even try: dom_info_dict = _wrapper_get_transceiver_dom_info(physical_port) if dom_info_dict is not None: - beautify_dom_info_dict(dom_info_dict) + beautify_dom_info_dict(dom_info_dict, physical_port) if _wrapper_get_sfp_type(physical_port) == 'QSFP_DD': fvs = swsscommon.FieldValuePairs( [('temperature', dom_info_dict['temperature']), @@ -540,7 +551,7 @@ def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event) continue keys = int_tbl[asic_index].getKeys() - if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + if logical_port_name not in keys and not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) @@ -638,29 +649,36 @@ def get_media_settings_key(physical_port, transceiver_dict): media_len = transceiver_dict[physical_port]['cable_length'] media_compliance_dict_str = transceiver_dict[physical_port]['specification_compliance'] - + media_compliance_code = '' + media_type = '' + media_key = '' media_compliance_dict = {} + try: - media_compliance_dict = ast.literal_eval(media_compliance_dict_str) + if _wrapper_get_sfp_type(physical_port) == 'QSFP_DD': + media_compliance_code = media_compliance_dict_str + else: + media_compliance_dict = ast.literal_eval(media_compliance_dict_str) + if sup_compliance_str in media_compliance_dict: + media_compliance_code = media_compliance_dict[sup_compliance_str] except ValueError as e: helper_logger.log_error("Invalid value for port {} 'specification_compliance': {}".format(physical_port, media_compliance_dict_str)) - media_compliance_code = '' - media_type = '' - - if sup_compliance_str in media_compliance_dict: - media_compliance_code = media_compliance_dict[sup_compliance_str] - media_type = transceiver_dict[physical_port]['type_abbrv_name'] - media_key = '' - if len(media_type) != 0: media_key += media_type if len(media_compliance_code) != 0: media_key += '-' + media_compliance_code - if len(media_len) != 0: - media_key += '-' + media_len + 'M' + if _wrapper_get_sfp_type(physical_port) == 'QSFP_DD': + if media_compliance_code == "passive_copper_media_interface": + if len(media_len) != 0: + media_key += '-' + media_len + 'M' + else: + if len(media_len) != 0: + media_key += '-' + media_len + 'M' + else: + media_key += '-' + '*' return [vendor_key, media_key] @@ -742,8 +760,8 @@ def notify_media_setting(logical_port_name, transceiver_dict, key = get_media_settings_key(physical_port, transceiver_dict) media_dict = get_media_settings_value(physical_port, key) - if(len(media_dict) == 0): - helper_logger.log_error("Error in obtaining media setting") + if len(media_dict) == 0: + helper_logger.log_error("Error in obtaining media setting for {}".format(logical_port_name)) return fvs = swsscommon.FieldValuePairs(len(media_dict)) @@ -771,30 +789,17 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait): # Update port SFP status table on receiving SFP change event -def update_port_transceiver_status_table(logical_port_name, status_tbl, status): - fvs = swsscommon.FieldValuePairs([('status', status)]) +def update_port_transceiver_status_table(logical_port_name, status_tbl, status, error_descriptions='N/A'): + fvs = swsscommon.FieldValuePairs([('status', status), ('error', error_descriptions)]) status_tbl.set(logical_port_name, fvs) + # Delete port from SFP status table def delete_port_from_status_table(logical_port_name, status_tbl): status_tbl._del(logical_port_name) -# Check whether port in error status - - -def detect_port_in_error_status(logical_port_name, status_tbl): - rec, fvp = status_tbl.get(logical_port_name) - if rec: - status_dict = dict(fvp) - if status_dict['status'] in errors_block_eeprom_reading: - return True - else: - return False - else: - return False - # Init TRANSCEIVER_STATUS table @@ -824,16 +829,16 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): physical_port_list = logical_port_name_to_physical_port_list(logical_port_name) if physical_port_list is None: helper_logger.log_error("No physical ports found for logical port '{}'".format(logical_port_name)) - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) for physical_port in physical_port_list: if stop_event.is_set(): break if not _wrapper_get_presence(physical_port): - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) else: - update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_INSERTED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED) # # Helper classes =============================================================== @@ -872,7 +877,7 @@ def task_worker(self, y_cable_presence): logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue - if not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + if not sfp_status_helper.detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) if y_cable_presence[0] is True: @@ -1015,7 +1020,7 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): while not stopping_event.is_set(): next_state = state time_start = time.time() - status, port_dict = _wrapper_get_transceiver_change_event(timeout) + status, port_dict, error_dict = _wrapper_get_transceiver_change_event(timeout) if not port_dict: continue helper_logger.log_debug("Got event {} {} in state {}".format(status, port_dict, state)) @@ -1075,11 +1080,11 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port)) continue - if value == SFP_STATUS_INSERTED: + if value == sfp_status_helper.SFP_STATUS_INSERTED: helper_logger.log_info("Got SFP inserted event") # A plugin event will clear the error state. update_port_transceiver_status_table( - logical_port, status_tbl[asic_index], SFP_STATUS_INSERTED) + logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_INSERTED) helper_logger.log_info("receive plug in and update port sfp status table.") rc = post_port_sfp_info_to_db(logical_port, int_tbl[asic_index], transceiver_dict) # If we didn't get the sfp info, assuming the eeprom is not ready, give a try again. @@ -1091,28 +1096,36 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): post_port_dom_threshold_info_to_db(logical_port, dom_tbl[asic_index]) notify_media_setting(logical_port, transceiver_dict, app_port_tbl[asic_index]) transceiver_dict.clear() - elif value == SFP_STATUS_REMOVED: + elif value == sfp_status_helper.SFP_STATUS_REMOVED: helper_logger.log_info("Got SFP removed event") update_port_transceiver_status_table( - logical_port, status_tbl[asic_index], SFP_STATUS_REMOVED) - helper_logger.log_info("receive plug out and pdate port sfp status table.") + logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED) + helper_logger.log_info("receive plug out and update port sfp status table.") del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index]) - elif value in errors_block_eeprom_reading: - helper_logger.log_info("Got SFP Error event") - # Add port to error table to stop accessing eeprom of it - # If the port already in the error table, the stored error code will - # be updated to the new one. - update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value) - helper_logger.log_info("receive error update port sfp status table.") - # In this case EEPROM is not accessible, so remove the DOM info - # since it will be outdated if long time no update. - # but will keep the interface info in the DB since it static. - del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) - else: - # SFP return unkown event, just ignore for now. - helper_logger.log_warning("Got unknown event {}, ignored".format(value)) - continue + try: + error_bits = int(value) + helper_logger.log_info("Got SFP error event {}".format(value)) + + error_descriptions = sfp_status_helper.fetch_generic_error_description(error_bits) + + if sfp_status_helper.has_vendor_specific_error(error_bits): + if error_dict: + vendor_specific_error_description = error_dict.get(key) + else: + vendor_specific_error_description = _wrapper_get_sfp_error_description(key) + error_descriptions.append(vendor_specific_error_description) + + # Add error info to database + # Any existing error will be replaced by the new one. + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, '|'.join(error_descriptions)) + helper_logger.log_info("Receive error update port sfp status table.") + # In this case EEPROM is not accessible. The DOM info will be removed since it can be out-of-date. + # The interface info remains in the DB since it is static. + if sfp_status_helper.is_error_block_eeprom_reading(error_bits): + del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) + except (TypeError, ValueError) as e: + logger.log_error("Got unrecognized event {}, ignored".format(value)) # Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port y_cable_helper.change_ports_status_for_y_cable_change_event( @@ -1333,6 +1346,8 @@ def deinit(self): if self.y_cable_presence[0] is True: y_cable_helper.delete_ports_status_for_y_cable() + del globals()['platform_chassis'] + # Run daemon def run(self): diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py new file mode 100644 index 000000000..789b761e4 --- /dev/null +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/sfp_status_helper.py @@ -0,0 +1,37 @@ +from sonic_platform_base.sfp_base import SfpBase + +# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase +SFP_STATUS_REMOVED = '0' +SFP_STATUS_INSERTED = '1' + +# SFP error code dictinary, new elements can be added if new errors need to be supported. +SFP_ERRORS_BLOCKING_MASK = 0x02 +SFP_ERRORS_GENERIC_MASK = 0x0000FFFE +SFP_ERRORS_VENDOR_SPECIFIC_MASK = 0xFFFF0000 + +def is_error_block_eeprom_reading(error_bits): + return 0 != (error_bits & SFP_ERRORS_BLOCKING_MASK) + + +def has_vendor_specific_error(error_bits): + return 0 != (error_bits & SFP_ERRORS_VENDOR_SPECIFIC_MASK) + + +def fetch_generic_error_description(error_bits): + generic_error_bits = (error_bits & SFP_ERRORS_GENERIC_MASK) + error_descriptions = [] + if generic_error_bits: + for error_bit, error_description in SfpBase.SFP_ERROR_BIT_TO_DESCRIPTION_DICT.items(): + if error_bit & generic_error_bits: + error_descriptions.append(error_description) + return error_descriptions + + +def detect_port_in_error_status(logical_port_name, status_tbl): + rec, fvp = status_tbl.get(logical_port_name) + if rec: + status_dict = dict(fvp) + error = status_dict.get('error') + return SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING in error + return False + diff --git a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py index aacc264b3..4ac4b4de0 100644 --- a/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py +++ b/sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py @@ -3,13 +3,14 @@ helper utlities configuring y_cable for xcvrd daemon """ +import datetime import threading -import time from sonic_py_common import daemon_base, logger from sonic_py_common import multi_asic from sonic_y_cable import y_cable from swsscommon import swsscommon +from . import sfp_status_helper SELECT_TIMEOUT = 1000 @@ -21,27 +22,6 @@ helper_logger = logger.Logger(SYSLOG_IDENTIFIER) - -# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase -SFP_STATUS_REMOVED = '0' -SFP_STATUS_INSERTED = '1' - -# SFP error codes, stored as strings. Can add more as needed. -SFP_STATUS_ERR_I2C_STUCK = '2' -SFP_STATUS_ERR_BAD_EEPROM = '3' -SFP_STATUS_ERR_UNSUPPORTED_CABLE = '4' -SFP_STATUS_ERR_HIGH_TEMP = '5' -SFP_STATUS_ERR_BAD_CABLE = '6' - -# Store the error codes in a set for convenience -errors_block_eeprom_reading = { - SFP_STATUS_ERR_I2C_STUCK, - SFP_STATUS_ERR_BAD_EEPROM, - SFP_STATUS_ERR_UNSUPPORTED_CABLE, - SFP_STATUS_ERR_HIGH_TEMP, - SFP_STATUS_ERR_BAD_CABLE -} - Y_CABLE_STATUS_NO_TOR_ACTIVE = 0 Y_CABLE_STATUS_TORA_ACTIVE = 1 Y_CABLE_STATUS_TORB_ACTIVE = 2 @@ -88,6 +68,8 @@ def update_table_mux_status_for_response_tbl(table_name, status, logical_port_na fvs = swsscommon.FieldValuePairs([('response', status)]) table_name.set(logical_port_name, fvs) + helper_logger.log_debug("Y_CABLE_DEBUG: Successful in returning probe port status {}".format(logical_port_name)) + def update_table_mux_status_for_statedb_port_tbl(table_name, status, read_side, active_side, logical_port_name): fvs = swsscommon.FieldValuePairs([('state', status), @@ -98,6 +80,8 @@ def update_table_mux_status_for_statedb_port_tbl(table_name, status, read_side, def y_cable_toggle_mux_torA(physical_port): update_status = y_cable.toggle_mux_to_torA(physical_port) + + helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR A for port {} {}".format(physical_port, update_status)) if update_status is True: return 1 else: @@ -108,6 +92,8 @@ def y_cable_toggle_mux_torA(physical_port): def y_cable_toggle_mux_torB(physical_port): update_status = y_cable.toggle_mux_to_torB(physical_port) + + helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR B for port {} {}".format(physical_port, update_status)) if update_status is True: return 2 else: @@ -193,6 +179,8 @@ def update_appdb_port_mux_cable_response_table(logical_port_name, asic_index, ap helper_logger.log_warning( "Error: Could not get state for mux cable port probe command logical port {} and physical port {}".format(logical_port_name, physical_port)) + helper_logger.log_debug("Y_CABLE_DEBUG: notifying a probe for port status {} {}".format(logical_port_name, status)) + update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name) else: @@ -282,9 +270,9 @@ def check_identifier_presence_and_update_mux_table_entry(state_db, port_tbl, y_c else: # Convert list of tuples to a dictionary mux_table_dict = dict(fvs) - if "mux_cable" in mux_table_dict: - val = mux_table_dict.get("mux_cable", None) - if val == "true": + if "state" in mux_table_dict: + val = mux_table_dict.get("state", None) + if val in ["active", "auto"]: y_cable_asic_table = y_cable_tbl.get(asic_index, None) mux_asic_table = mux_tbl.get(asic_index, None) @@ -314,6 +302,9 @@ def check_identifier_presence_and_update_mux_table_entry(state_db, port_tbl, y_c logical_port_name, y_cable_tbl[asic_index]) post_port_mux_info_to_db(logical_port_name, mux_tbl[asic_index]) post_port_mux_static_info_to_db(logical_port_name, static_tbl[asic_index]) + else: + helper_logger.log_warning( + "Could not retreive active or auto value for state kvp for {}, inside MUX_CABLE table".format(logical_port_name)) def check_identifier_presence_and_delete_mux_table_entry(state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event): @@ -364,17 +355,21 @@ def init_ports_status_for_y_cable(platform_sfp, platform_chassis, y_cable_presen config_db, state_db, port_tbl, y_cable_tbl = {}, {}, {}, {} static_tbl, mux_tbl = {}, {} port_table_keys = {} + xcvrd_log_tbl = {} y_cable_platform_sfputil = platform_sfp y_cable_platform_chassis = platform_chassis + fvs_updated = swsscommon.FieldValuePairs([('enable_log', 'false')]) # Get the namespaces in the platform namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: asic_id = multi_asic.get_asic_index_from_namespace(namespace) config_db[asic_id] = daemon_base.db_connect("CONFIG_DB", namespace) - port_tbl[asic_id] = swsscommon.Table(config_db[asic_id], "PORT") + port_tbl[asic_id] = swsscommon.Table(config_db[asic_id], "MUX_CABLE") port_table_keys[asic_id] = port_tbl[asic_id].getKeys() + xcvrd_log_tbl[asic_id] = swsscommon.Table(config_db[asic_id], "XCVRD_LOG") + xcvrd_log_tbl[asic_id].set("Y_CABLE", fvs_updated ) # Init PORT_STATUS table if ports are on Y cable logical_port_list = y_cable_platform_sfputil.logical @@ -414,7 +409,7 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st for namespace in namespaces: asic_id = multi_asic.get_asic_index_from_namespace(namespace) config_db[asic_id] = daemon_base.db_connect("CONFIG_DB", namespace) - port_tbl[asic_id] = swsscommon.Table(config_db[asic_id], "PORT") + port_tbl[asic_id] = swsscommon.Table(config_db[asic_id], "MUX_CABLE") port_table_keys[asic_id] = port_tbl[asic_id].getKeys() # Init PORT_STATUS table if ports are on Y cable and an event is received @@ -435,15 +430,23 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st continue if logical_port_name in port_table_keys[asic_index]: - if value == SFP_STATUS_INSERTED: + if value == sfp_status_helper.SFP_STATUS_INSERTED: helper_logger.log_info("Got SFP inserted event") check_identifier_presence_and_update_mux_table_entry( state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence) - elif value == SFP_STATUS_REMOVED or value in errors_block_eeprom_reading: + elif value == sfp_status_helper.SFP_STATUS_REMOVED: check_identifier_presence_and_delete_mux_table_entry( state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) else: + try: + # Now that the value is in bitmap format, let's convert it to number + event_bits = int(value) + if sfp_status_helper.is_error_block_eeprom_reading(event_bits): + check_identifier_presence_and_delete_mux_table_entry( + state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event) + except: + pass # SFP return unkown event, just ignore for now. helper_logger.log_warning("Got unknown event {}, ignored".format(value)) continue @@ -518,9 +521,9 @@ def check_identifier_presence_and_update_mux_info_entry(state_db, mux_tbl, asic_ else: # Convert list of tuples to a dictionary mux_table_dict = dict(fvs) - if "mux_cable" in mux_table_dict: - val = mux_table_dict.get("mux_cable", None) - if val == "true": + if "state" in mux_table_dict: + val = mux_table_dict.get("state", None) + if val in ["active", "auto"]: if mux_tbl.get(asic_index, None) is not None: # fill in the newly found entry @@ -535,6 +538,9 @@ def check_identifier_presence_and_update_mux_info_entry(state_db, mux_tbl, asic_ mux_tbl[asic_id] = swsscommon.Table(state_db[asic_id], MUX_CABLE_INFO_TABLE) # fill the newly found entry post_port_mux_info_to_db(logical_port_name, mux_tbl[asic_index]) + else: + helper_logger.log_warning( + "Could not retreive active or auto value for state kvp for {}, inside MUX_CABLE table".format(logical_port_name)) def get_firmware_dict(physical_port, target, side, mux_info_dict): @@ -1002,9 +1008,11 @@ def __init__(self): def task_worker(self): # Connect to STATE_DB and APPL_DB and get both the HW_MUX_STATUS_TABLE info - appl_db, state_db, status_tbl, y_cable_tbl = {}, {}, {}, {} + appl_db, state_db, config_db, status_tbl, y_cable_tbl = {}, {}, {}, {}, {} y_cable_tbl_keys = {} mux_cable_command_tbl, y_cable_command_tbl = {}, {} + mux_metrics_tbl = {} + xcvrd_log_tbl = {} sel = swsscommon.Select() @@ -1014,6 +1022,7 @@ def task_worker(self): # Open a handle to the Application database, in all namespaces asic_id = multi_asic.get_asic_index_from_namespace(namespace) appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) + config_db[asic_id] = daemon_base.db_connect("CONFIG_DB", namespace) status_tbl[asic_id] = swsscommon.SubscriberStateTable( appl_db[asic_id], swsscommon.APP_HW_MUX_CABLE_TABLE_NAME) mux_cable_command_tbl[asic_id] = swsscommon.SubscriberStateTable( @@ -1023,20 +1032,20 @@ def task_worker(self): state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) y_cable_tbl[asic_id] = swsscommon.Table( state_db[asic_id], swsscommon.STATE_HW_MUX_CABLE_TABLE_NAME) + mux_metrics_tbl[asic_id] = swsscommon.Table( + state_db[asic_id], swsscommon.STATE_MUX_METRICS_TABLE_NAME) + xcvrd_log_tbl[asic_id] = swsscommon.SubscriberStateTable( + config_db[asic_id], "XCVRD_LOG") y_cable_tbl_keys[asic_id] = y_cable_tbl[asic_id].getKeys() sel.addSelectable(status_tbl[asic_id]) sel.addSelectable(mux_cable_command_tbl[asic_id]) + sel.addSelectable(xcvrd_log_tbl[asic_id]) # Listen indefinitely for changes to the HW_MUX_CABLE_TABLE in the Application DB's while True: # Use timeout to prevent ignoring the signals we want to handle # in signal_handler() (e.g. SIGTERM for graceful shutdown) - # A brief sleep appears necessary in this loop or any spawned - # update threads will get stuck. Appears to be due to the sel.select() call. - # TODO: Eliminate the need for this sleep. - time.sleep(0.1) - (state, selectableObj) = sel.select(SELECT_TIMEOUT) if state == swsscommon.Select.TIMEOUT: @@ -1056,8 +1065,15 @@ def task_worker(self): while True: (port, op, fvp) = status_tbl[asic_index].pop() + if not port: break + + helper_logger.log_debug("Y_CABLE_DEBUG: received an event for port transition {}".format(port)) + + # entering this section signifies a start for xcvrd state + # change request from swss so initiate recording in mux_metrics table + time_start = datetime.datetime.utcnow().strftime("%Y-%b-%d %H:%M:%S.%f") if fvp: # This check might be redundant, to check, the presence of this Port in keys # in logical_port_list but keep for now for coherency @@ -1079,6 +1095,7 @@ def task_worker(self): old_status = mux_port_dict.get("state") read_side = mux_port_dict.get("read_side") # Now whatever is the state requested, toggle the mux appropriately + helper_logger.log_debug("Y_CABLE_DEBUG: xcvrd trying to transition port {} from {} to {}".format(port, old_status, new_status)) active_side = update_tor_active_side(read_side, new_status, port) if active_side == -1: helper_logger.log_warning("ERR: Got a change event for toggle but could not toggle the mux-direction for port {} state from {} to {}, writing unknown".format( @@ -1089,16 +1106,24 @@ def task_worker(self): ('read_side', read_side), ('active_side', str(active_side))]) y_cable_tbl[asic_index].set(port, fvs_updated) + helper_logger.log_debug("Y_CABLE_DEBUG: xcvrd successful to transition port {} from {} to {} and write back to the DB".format(port, old_status, new_status)) helper_logger.log_info("Got a change event for toggle the mux-direction active side for port {} state from {} to {}".format( port, old_status, new_status)) + time_end = datetime.datetime.utcnow().strftime("%Y-%b-%d %H:%M:%S.%f") + fvs_metrics = swsscommon.FieldValuePairs([('xcvrd_switch_{}_start'.format(new_status), str(time_start)), + ('xcvrd_switch_{}_end'.format(new_status), str(time_end))]) + mux_metrics_tbl[asic_index].set(port, fvs_metrics) else: helper_logger.log_info("Got a change event on port {} of table {} that does not contain state".format( port, swsscommon.APP_HW_MUX_CABLE_TABLE_NAME)) while True: (port_m, op_m, fvp_m) = mux_cable_command_tbl[asic_index].pop() + if not port_m: break + helper_logger.log_debug("Y_CABLE_DEBUG: received a probe for port status {}".format(port_m)) + if fvp_m: if port_m not in y_cable_tbl_keys[asic_index]: @@ -1120,6 +1145,30 @@ def task_worker(self): read_side = mux_port_dict.get("read_side") update_appdb_port_mux_cable_response_table(port_m, asic_index, appl_db, int(read_side)) + while True: + (key, op_m, fvp_m) = xcvrd_log_tbl[asic_index].pop() + + if not key: + break + + helper_logger.log_notice("Y_CABLE_DEBUG: trying to enable/disable debug logs") + if fvp_m: + + if key is "Y_CABLE": + continue + + fvp_dict = dict(fvp_m) + if "log_verbosity" in fvp_dict: + # check if xcvrd got a probe command + probe_identifier = fvp_dict["log_verbosity"] + + if probe_identifier == "debug": + helper_logger.set_min_log_priority_debug() + + elif probe_identifier == "notice": + helper_logger.set_min_log_priority_notice() + + def task_run(self): self.task_thread = threading.Thread(target=self.task_worker) self.task_thread.start()