From e8b4c2a1f40c35c283e2560e90d8356c3ad771af Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Mon, 25 Oct 2021 12:59:06 +0800 Subject: [PATCH] [Mellanox] Refactor Mellanox platform API to support dynamic port configuration (#8422) - Why I did it * To support systems with dynamic port configuration * Apply lazy initialization to faster the speed of loading platform API - How I did it * Add module.py to implement dynamic port configuration (aka line card model) * Adjust chassis.py, platform.py, thermal.py, sfp.py to support dynamic port configuration * Optimize existing code - How to verify it Platform regression on MSN4700, MSN3800 and MSN2700, 100% pass Unit test covers all new changes. --- .gitignore | 7 + .../mellanox/mlnx-platform-api/.gitignore | 4 + .../mellanox/mlnx-platform-api/pytest.ini | 1 + .../sonic_platform/chassis.py | 870 +++++++++++------- .../sonic_platform/device_data.py | 299 +++--- .../sonic_platform/eeprom.py | 31 +- .../mlnx-platform-api/sonic_platform/fan.py | 384 ++++---- .../sonic_platform/fan_drawer.py | 47 +- .../mlnx-platform-api/sonic_platform/led.py | 171 ++-- .../sonic_platform/module.py | 249 +++++ .../sonic_platform/platform.py | 20 +- .../mlnx-platform-api/sonic_platform/psu.py | 442 ++++----- .../mlnx-platform-api/sonic_platform/sfp.py | 483 ++++++---- .../sonic_platform/sfp_event.py | 31 +- .../sonic_platform/thermal.py | 824 ++++++----------- .../sonic_platform/thermal_actions.py | 8 +- .../mlnx-platform-api/sonic_platform/utils.py | 136 ++- .../sonic_platform/vpd_parser.py | 84 ++ .../mlnx-platform-api/tests/conftest.py | 44 + .../mlnx-platform-api/tests/mock_eeprom_data | Bin 0 -> 606 bytes .../mlnx-platform-api/tests/mock_psu_vpd | 10 + .../mlnx-platform-api/tests/test_chassis.py | 271 ++++++ .../mlnx-platform-api/tests/test_eeprom.py | 108 +++ .../mlnx-platform-api/tests/test_fan_api.py | 187 +++- .../mlnx-platform-api/tests/test_led.py | 145 +++ .../mlnx-platform-api/tests/test_module.py | 185 ++++ .../mlnx-platform-api/tests/test_psu.py | 112 +++ .../mlnx-platform-api/tests/test_sfp.py | 153 +-- .../mlnx-platform-api/tests/test_sfp_event.py | 5 +- .../mlnx-platform-api/tests/test_thermal.py | 239 +++++ .../tests/test_thermal_policy.py | 29 +- .../mlnx-platform-api/tests/test_utils.py | 118 +++ 32 files changed, 3661 insertions(+), 2036 deletions(-) create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/module.py create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/vpd_parser.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/conftest.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/mock_eeprom_data create mode 100644 platform/mellanox/mlnx-platform-api/tests/mock_psu_vpd create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_chassis.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_eeprom.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_led.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_module.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_psu.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_thermal.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_utils.py diff --git a/.gitignore b/.gitignore index 58d18cf69435..49861324aa1d 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,13 @@ dockers/**/buildinfo platform/**/buildinfo sonic-slave*/**/buildinfo +# pytest coverage files +.coverage +coverage.xml +test-results.xml +htmlcov/ + # Dev tools .vscode/ .idea/ + diff --git a/platform/mellanox/mlnx-platform-api/.gitignore b/platform/mellanox/mlnx-platform-api/.gitignore index 07f8a98e1f4a..2a94e25395bf 100644 --- a/platform/mellanox/mlnx-platform-api/.gitignore +++ b/platform/mellanox/mlnx-platform-api/.gitignore @@ -1,2 +1,6 @@ *.pyc .cache/ +*/test-results.xml +*/htmlcov/ +*/coverage.xml +*/.coverage diff --git a/platform/mellanox/mlnx-platform-api/pytest.ini b/platform/mellanox/mlnx-platform-api/pytest.ini index 4c699c515652..081d3f0671f6 100644 --- a/platform/mellanox/mlnx-platform-api/pytest.ini +++ b/platform/mellanox/mlnx-platform-api/pytest.ini @@ -15,5 +15,6 @@ ## limitations under the License. ## [pytest] +addopts = --cov=sonic_platform --cov-report html --cov-report term --cov-report xml --junitxml=test-results.xml -vv filterwarnings = ignore::DeprecationWarning diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index e3d826358702..50dd6cef53e0 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -24,22 +24,18 @@ try: from sonic_platform_base.chassis_base import ChassisBase - from sonic_platform_base.component_base import ComponentBase - from sonic_py_common import device_info from sonic_py_common.logger import Logger - from os import listdir - from os.path import isfile, join - import sys - import io - import re - import syslog + import os + from functools import reduce + + from . import utils + from .device_data import DeviceDataManager + from .sfp import SFP, deinitialize_sdk_handle except ImportError as e: raise ImportError (str(e) + "- required module not found") MAX_SELECT_DELAY = 3600 -MLNX_NUM_PSU = 2 - DMI_FILE = '/sys/firmware/dmi/entries/2-0/raw' DMI_HEADER_LEN = 15 DMI_PRODUCT_NAME = "Product Name" @@ -57,15 +53,8 @@ DMI_LOC: 5 } -EEPROM_CACHE_ROOT = '/var/cache/sonic/decode-syseeprom' -EEPROM_CACHE_FILE = 'syseeprom_cache' - HWMGMT_SYSTEM_ROOT = '/var/run/hw-management/system/' -MST_DEVICE_NAME_PATTERN = '/dev/mst/mt[0-9]*_pciconf0' -MST_DEVICE_RE_PATTERN = '/dev/mst/mt([0-9]*)_pciconf0' -SPECTRUM1_CHIP_ID = '52100' - #reboot cause related definitions REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT @@ -74,14 +63,6 @@ # Global logger class instance logger = Logger() -# magic code defnition for port number, qsfp port position of each Platform -# port_position_tuple = (PORT_START, QSFP_PORT_START, PORT_END, PORT_IN_BLOCK, EEPROM_OFFSET) -platform_dict_port = {'x86_64-mlnx_msn2010-r0': 3, 'x86_64-mlnx_msn2100-r0': 1, 'x86_64-mlnx_msn2410-r0': 2, - 'x86_64-mlnx_msn2700-r0': 0, 'x86_64-mlnx_lssn2700': 0, 'x86_64-mlnx_msn2740-r0': 0, - 'x86_64-mlnx_msn3420-r0': 5, 'x86_64-mlnx_msn3700-r0': 0, 'x86_64-mlnx_msn3700c-r0': 0, - 'x86_64-mlnx_msn3800-r0': 4, 'x86_64-mlnx_msn4600-r0': 4, 'x86_64-mlnx_msn4600c-r0': 4, - 'x86_64-mlnx_msn4700-r0': 0, 'x86_64-mlnx_msn4410-r0': 0} -port_position_tuple_list = [(0, 0, 31, 32, 1), (0, 0, 15, 16, 1), (0, 48, 55, 56, 1), (0, 18, 21, 22, 1), (0, 0, 63, 64, 1), (0, 48, 59, 60, 1)] class Chassis(ChassisBase): """Platform-specific Chassis class""" @@ -92,12 +73,6 @@ class Chassis(ChassisBase): def __init__(self): super(Chassis, self).__init__() - self.name = "Undefined" - self.model = "Undefined" - - # Initialize Platform name - self.platform_name = device_info.get_platform() - # Initialize DMI data self.dmi_data = None @@ -130,165 +105,161 @@ def __init__(self): # - False: All SFP modules have not been created # - True: All SFP modules have been created # - self.sfp_module_partial_initialized = False - self.sfp_module_full_initialized = False - self.sfp_event_initialized = False + self.sfp_initialized_count = 0 + self.sfp_event = None self.reboot_cause_initialized = False - self.sdk_handle = None - self.deinitialize_sdk_handle = None logger.log_info("Chassis loaded successfully") - def __del__(self): - if self.sfp_event_initialized: + if self.sfp_event: self.sfp_event.deinitialize() - if self.deinitialize_sdk_handle: - self.deinitialize_sdk_handle(self.sdk_handle) - + if SFP.shared_sdk_handle: + deinitialize_sdk_handle(SFP.shared_sdk_handle) + + ############################################## + # PSU methods + ############################################## def initialize_psu(self): - from sonic_platform.psu import Psu - # Initialize PSU list - self.psu_module = Psu - for index in range(MLNX_NUM_PSU): - psu = Psu(index, self.platform_name) - self._psu_list.append(psu) - - - def initialize_fan(self): - from .device_data import DEVICE_DATA - from sonic_platform.fan import Fan - from .fan_drawer import RealDrawer, VirtualDrawer - - fan_data = DEVICE_DATA[self.platform_name]['fans'] - drawer_num = fan_data['drawer_num'] - drawer_type = fan_data['drawer_type'] - fan_num_per_drawer = fan_data['fan_num_per_drawer'] - drawer_ctor = RealDrawer if drawer_type == 'real' else VirtualDrawer - fan_index = 0 - for drawer_index in range(drawer_num): - drawer = drawer_ctor(drawer_index, fan_data) - self._fan_drawer_list.append(drawer) - for index in range(fan_num_per_drawer): - fan = Fan(fan_index, drawer, index + 1) - fan_index += 1 - drawer._fan_list.append(fan) - - - def initialize_single_sfp(self, index): - if not self._sfp_list[index]: - if index >= self.QSFP_PORT_START and index < self.PORTS_IN_BLOCK: - sfp_module = self.sfp_module(index, 'QSFP', self.get_sdk_handle, self.platform_name) - else: - sfp_module = self.sfp_module(index, 'SFP', self.get_sdk_handle, self.platform_name) - - self._sfp_list[index] = sfp_module - - - def initialize_sfp(self, index=None): - from sonic_platform.sfp import SFP - - self.sfp_module = SFP - - # Initialize SFP list - port_position_tuple = self._get_port_position_tuple_by_platform_name() - self.PORT_START = port_position_tuple[0] - self.QSFP_PORT_START = port_position_tuple[1] - self.PORT_END = port_position_tuple[2] - self.PORTS_IN_BLOCK = port_position_tuple[3] - - if index is not None: - if not self.sfp_module_partial_initialized: - if index >= self.PORT_START and index < self.PORT_END: - self._sfp_list = list([None]*(self.PORT_END + 1)) + if not self._psu_list: + from .psu import Psu, FixedPsu + psu_count = DeviceDataManager.get_psu_count() + hot_swapable = DeviceDataManager.is_psu_hotswapable() + + # Initialize PSU list + for index in range(psu_count): + if hot_swapable: + psu = Psu(index) else: - raise IndexError("{} is not a valid index of SPF modules. Valid index range:[{}, {}]".format( - index, self.PORT_START + 1, self.PORT_END + 1)) - self.sfp_module_partial_initialized = True - else: - if not self.sfp_module_partial_initialized: - self._sfp_list = list([None]*(self.PORT_END + 1)) - self.sfp_module_partial_initialized = True - for index in range(self.PORT_START, self.PORT_END + 1): - self.initialize_single_sfp(index) - - self.sfp_module_full_initialized = True - - - def get_sdk_handle(self): - if not self.sdk_handle: - from sonic_platform.sfp import initialize_sdk_handle, deinitialize_sdk_handle - self.sdk_handle = initialize_sdk_handle() - if self.sdk_handle is None: - logger.log_error('Failed to open SDK handle') - else: - self.deinitialize_sdk_handle = deinitialize_sdk_handle - return self.sdk_handle + psu = FixedPsu(index) + self._psu_list.append(psu) + def get_num_psus(self): + """ + Retrieves the number of power supply units available on this chassis - def initialize_thermals(self): - from sonic_platform.thermal import initialize_chassis_thermals - # Initialize thermals - initialize_chassis_thermals(self.platform_name, self._thermal_list) + Returns: + An integer, the number of power supply units available on this + chassis + """ + self.initialize_psu() + return len(self._psu_list) + def get_all_psus(self): + """ + Retrieves all power supply units available on this chassis - def initialize_eeprom(self): - from .eeprom import Eeprom - # Initialize EEPROM - self._eeprom = Eeprom() - # Get chassis name and model from eeprom - self.name = self._eeprom.get_product_name() - self.model = self._eeprom.get_part_number() + Returns: + A list of objects derived from PsuBase representing all power + supply units available on this chassis + """ + self.initialize_psu() + return self._psu_list + def get_psu(self, index): + """ + Retrieves power supply unit represented by (0-based) index - def initialize_components(self): - # Initialize component list - from sonic_platform.component import ComponentONIE, ComponentSSD, ComponentBIOS, ComponentCPLD - self._component_list.append(ComponentONIE()) - self._component_list.append(ComponentSSD()) - self._component_list.append(ComponentBIOS()) - self._component_list.extend(ComponentCPLD.get_component_list()) + Args: + index: An integer, the index (0-based) of the power supply unit to + retrieve - def initizalize_system_led(self): - from .led import SystemLed - Chassis._led = SystemLed() + Returns: + An object dervied from PsuBase representing the specified power + supply unit + """ + self.initialize_psu() + return super(Chassis, self).get_psu(index) + ############################################## + # Fan methods + ############################################## - def get_name(self): - """ - Retrieves the name of the device + def initialize_fan(self): + if not self._fan_drawer_list: + from .fan import Fan + from .fan_drawer import RealDrawer, VirtualDrawer + + hot_swapable = DeviceDataManager.is_fan_hotswapable() + drawer_num = DeviceDataManager.get_fan_drawer_count() + fan_num = DeviceDataManager.get_fan_count() + fan_num_per_drawer = fan_num // drawer_num + drawer_ctor = RealDrawer if hot_swapable else VirtualDrawer + fan_index = 0 + for drawer_index in range(drawer_num): + drawer = drawer_ctor(drawer_index) + self._fan_drawer_list.append(drawer) + for index in range(fan_num_per_drawer): + fan = Fan(fan_index, drawer, index + 1) + fan_index += 1 + drawer._fan_list.append(fan) + + def get_num_fan_drawers(self): + """ + Retrieves the number of fan drawers available on this chassis Returns: - string: The name of the device + An integer, the number of fan drawers available on this chassis """ - return self.name - + return DeviceDataManager.get_fan_drawer_count() - def get_model(self): + def get_all_fan_drawers(self): """ - Retrieves the model number (or part number) of the device + Retrieves all fan drawers available on this chassis Returns: - string: Model/part number of device + A list of objects derived from FanDrawerBase representing all fan + drawers available on this chassis """ - return self.model + self.initialize_fan() + return self._fan_drawer_list - def get_revision(self): + def get_fan_drawer(self, index): """ - Retrieves the hardware revision of the device - + Retrieves fan drawers represented by (0-based) index + + Args: + index: An integer, the index (0-based) of the fan drawer to + retrieve + Returns: - string: Revision value of device + An object dervied from FanDrawerBase representing the specified fan + drawer """ - if self.dmi_data is None: - self.dmi_data = self._parse_dmi(DMI_FILE) + self.initialize_fan() + return super(Chassis, self).get_fan_drawer(index) - return self.dmi_data.get(DMI_VERSION, "N/A") - ############################################## # SFP methods ############################################## + + def initialize_single_sfp(self, index): + sfp_count = self.get_num_sfps() + if index < sfp_count: + if not self._sfp_list: + self._sfp_list = [None] * sfp_count + + if not self._sfp_list[index]: + from .sfp import SFP + self._sfp_list[index] = SFP(index) + self.sfp_initialized_count += 1 + + def initialize_sfp(self): + if not self._sfp_list: + from .sfp import SFP + sfp_count = self.get_num_sfps() + for index in range(sfp_count): + sfp_module = SFP(index) + self._sfp_list.append(sfp_module) + self.sfp_initialized_count = sfp_count + elif self.sfp_initialized_count != len(self._sfp_list): + from .sfp import SFP + for index in range(len(self._sfp_list)): + if self._sfp_list[index] is None: + self._sfp_list[index] = SFP(index) + self.sfp_initialized_count = len(self._sfp_list) + def get_num_sfps(self): """ Retrieves the number of sfps available on this chassis @@ -296,24 +267,19 @@ def get_num_sfps(self): Returns: An integer, the number of sfps available on this chassis """ - if not self.sfp_module_full_initialized: - self.initialize_sfp() - return len(self._sfp_list) - + return DeviceDataManager.get_sfp_count() def get_all_sfps(self): """ Retrieves all sfps available on this chassis Returns: - A list of objects derived from SfpBase representing all sfps + A list of objects derived from SfpBase representing all sfps available on this chassis """ - if not self.sfp_module_full_initialized: - self.initialize_sfp() + self.initialize_sfp() return self._sfp_list - def get_sfp(self, index): """ Retrieves sfp represented by (1-based) index @@ -327,69 +293,182 @@ def get_sfp(self, index): Returns: An object dervied from SfpBase representing the specified sfp """ - sfp = None - index -= 1 + index = index - 1 + self.initialize_single_sfp(index) + return super(Chassis, self).get_sfp(index) + + def get_change_event(self, timeout=0): + """ + Returns a nested dictionary containing all devices which have + experienced a change at chassis level - try: - if not self.sfp_module_partial_initialized: - self.initialize_sfp(index) - - sfp = self._sfp_list[index] - if not sfp: - self.initialize_single_sfp(index) - sfp = self._sfp_list[index] - except IndexError: - sys.stderr.write("SFP index {} out of range (0-{})\n".format( - index, len(self._sfp_list)-1)) - - return sfp - - - def _extract_num_of_fans_and_fan_drawers(self): - num_of_fan = 0 - num_of_drawer = 0 - for f in listdir(self.fan_path): - if isfile(join(self.fan_path, f)): - match_obj = re.match('fan(\d+)_speed_get', f) - if match_obj != None: - if int(match_obj.group(1)) > num_of_fan: - num_of_fan = int(match_obj.group(1)) - else: - match_obj = re.match('fan(\d+)_status', f) - if match_obj != None and int(match_obj.group(1)) > num_of_drawer: - num_of_drawer = int(match_obj.group(1)) + Args: + timeout: Timeout in milliseconds (optional). If timeout == 0, + this method will block until a change is detected. - return num_of_fan, num_of_drawer + Returns: + (bool, dict): + - True if call successful, False if not; + - A nested dictionary where key is a device type, + value is a dictionary with key:value pairs in the format of + {'device_id':'device_event'}, + where device_id is the device ID for this device and + device_event, + status='1' represents device inserted, + status='0' represents device removed. + Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}} + indicates that fan 0 has been removed, fan 2 + has been inserted and sfp 11 has been removed. + """ + self.initialize_sfp() + # Initialize SFP event first + if not self.sfp_event: + from .sfp_event import sfp_event + self.sfp_event = sfp_event() + self.sfp_event.initialize() - def _get_port_position_tuple_by_platform_name(self): - position_tuple = port_position_tuple_list[platform_dict_port[self.platform_name]] - return position_tuple + wait_for_ever = (timeout == 0) + port_dict = {} + error_dict = {} + if wait_for_ever: + timeout = MAX_SELECT_DELAY + while True: + status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) + if bool(port_dict): + break + else: + status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) + if status: + self.reinit_sfps(port_dict) + result_dict = {'sfp':port_dict} + if error_dict: + result_dict['sfp_error'] = error_dict + return True, result_dict + else: + return True, {'sfp':{}} - def get_watchdog(self): + def reinit_sfps(self, port_dict): """ - Retrieves hardware watchdog device on this chassis + Re-initialize SFP if there is any newly inserted SFPs + :param port_dict: SFP event data + :return: + """ + from . import sfp + for index, status in port_dict.items(): + if status == sfp.SFP_STATUS_INSERTED: + try: + self._sfp_list[index - 1].reinit() + except Exception as e: + logger.log_error("Fail to re-initialize SFP {} - {}".format(index, repr(e))) + + def _show_capabilities(self): + """ + This function is for debug purpose + Some features require a xSFP module to support some capabilities but it's unrealistic to + check those modules one by one. + So this function is introduce to show some capabilities of all xSFP modules mounted on the device. + """ + self.initialize_sfp() + for s in self._sfp_list: + try: + print("index {} tx disable {} dom {} calibration {} temp {} volt {} power (tx {} rx {})".format(s.index, + s.dom_tx_disable_supported, + s.dom_supported, + s.calibration, + s.dom_temp_supported, + s.dom_volt_supported, + s.dom_rx_power_supported, + s.dom_tx_power_supported + )) + except: + print("fail to retrieve capabilities for module index {}".format(s.index)) + + ############################################## + # THERMAL methods + ############################################## + + def initialize_thermals(self): + if not self._thermal_list: + from .thermal import initialize_chassis_thermals + # Initialize thermals + self._thermal_list = initialize_chassis_thermals() + + def get_num_thermals(self): + """ + Retrieves the number of thermals available on this chassis + + Returns: + An integer, the number of thermals available on this chassis + """ + self.initialize_thermals() + return len(self._thermal_list) + + def get_all_thermals(self): + """ + Retrieves all thermals available on this chassis + + Returns: + A list of objects derived from ThermalBase representing all thermals + available on this chassis + """ + self.initialize_thermals() + return self._thermal_list + + def get_thermal(self, index): + """ + Retrieves thermal unit represented by (0-based) index + + Args: + index: An integer, the index (0-based) of the thermal to + retrieve + + Returns: + An object dervied from ThermalBase representing the specified thermal + """ + self.initialize_thermals() + return super(Chassis, self).get_thermal(index) + + ############################################## + # EEPROM methods + ############################################## + + def initialize_eeprom(self): + if not self._eeprom: + from .eeprom import Eeprom + # Initialize EEPROM + self._eeprom = Eeprom() + + def get_eeprom(self): + """ + Retreives eeprom device on this chassis Returns: An object derived from WatchdogBase representing the hardware - watchdog device + eeprom device + """ + self.initialize_eeprom() + return self._eeprom - Note: - We overload this method to ensure that watchdog is only initialized - when it is referenced. Currently, only one daemon can open the watchdog. - To initialize watchdog in the constructor causes multiple daemon - try opening watchdog when loading and constructing a chassis object - and fail. By doing so we can eliminate that risk. + def get_name(self): """ - try: - if self._watchdog is None: - from sonic_platform.watchdog import get_watchdog - self._watchdog = get_watchdog() - except Exception as e: - logger.log_info("Fail to load watchdog due to {}".format(repr(e))) + Retrieves the name of the device - return self._watchdog + Returns: + string: The name of the device + """ + self.initialize_eeprom() + return self._eeprom.get_product_name() + + def get_model(self): + """ + Retrieves the model number (or part number) of the device + Returns: + string: Model/part number of device + """ + self.initialize_eeprom() + return self._eeprom.get_part_number() def get_base_mac(self): """ @@ -399,9 +478,9 @@ def get_base_mac(self): A string containing the MAC address in the format 'XX:XX:XX:XX:XX:XX' """ + self.initialize_eeprom() return self._eeprom.get_base_mac() - def get_serial(self): """ Retrieves the hardware serial number for the chassis @@ -409,9 +488,9 @@ def get_serial(self): Returns: A string containing the hardware serial number for this chassis. """ + self.initialize_eeprom() return self._eeprom.get_serial_number() - def get_system_eeprom_info(self): """ Retrieves the full content of system EEPROM information for the chassis @@ -421,23 +500,128 @@ def get_system_eeprom_info(self): OCP ONIE TlvInfo EEPROM format and values are their corresponding values. """ + self.initialize_eeprom() return self._eeprom.get_system_eeprom_info() + ############################################## + # Component methods + ############################################## + + def initialize_components(self): + if not utils.is_host(): + return + if not self._component_list: + # Initialize component list + from .component import ComponentONIE, ComponentSSD, ComponentBIOS, ComponentCPLD + self._component_list.append(ComponentONIE()) + self._component_list.append(ComponentSSD()) + self._component_list.append(ComponentBIOS()) + self._component_list.extend(ComponentCPLD.get_component_list()) + + def get_num_components(self): + """ + Retrieves the number of components available on this chassis + + Returns: + An integer, the number of components available on this chassis + """ + self.initialize_components() + return len(self._component_list) + + def get_all_components(self): + """ + Retrieves all components available on this chassis + + Returns: + A list of objects derived from ComponentBase representing all components + available on this chassis + """ + self.initialize_components() + return self._component_list + + def get_component(self, index): + """ + Retrieves component represented by (0-based) index + + Args: + index: An integer, the index (0-based) of the component to retrieve + + Returns: + An object dervied from ComponentBase representing the specified component + """ + self.initialize_components() + return super(Chassis, self).get_component(index) + + ############################################## + # System LED methods + ############################################## + + def initizalize_system_led(self): + if not Chassis._led: + from .led import SystemLed + Chassis._led = SystemLed() + + def set_status_led(self, color): + """ + Sets the state of the system LED + + Args: + color: A string representing the color with which to set the + system LED - def _read_generic_file(self, filename, len): + Returns: + bool: True if system LED state is set successfully, False if not """ - Read a generic file, returns the contents of the file + self.initizalize_system_led() + return False if not Chassis._led else Chassis._led.set_status(color) + + def get_status_led(self): + """ + Gets the state of the system LED + + Returns: + A string, one of the valid LED color strings which could be vendor + specified. + """ + self.initizalize_system_led() + return None if not Chassis._led else Chassis._led.get_status() + + def get_watchdog(self): + """ + Retrieves hardware watchdog device on this chassis + + Returns: + An object derived from WatchdogBase representing the hardware + watchdog device + + Note: + We overload this method to ensure that watchdog is only initialized + when it is referenced. Currently, only one daemon can open the watchdog. + To initialize watchdog in the constructor causes multiple daemon + try opening watchdog when loading and constructing a chassis object + and fail. By doing so we can eliminate that risk. """ - result = '' try: - fileobj = io.open(filename) - result = fileobj.read(len) - fileobj.close() - return result + if self._watchdog is None: + from .watchdog import get_watchdog + self._watchdog = get_watchdog() except Exception as e: - logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) - return '0' + logger.log_info("Fail to load watchdog due to {}".format(repr(e))) + + return self._watchdog + + + def get_revision(self): + """ + Retrieves the hardware revision of the device + + Returns: + string: Revision value of device + """ + if self.dmi_data is None: + self.dmi_data = self._parse_dmi(DMI_FILE) + return self.dmi_data.get(DMI_VERSION, "N/A") def _parse_dmi(self, filename): """ @@ -448,9 +632,8 @@ def _parse_dmi(self, filename): """ result = {} try: - fileobj = open(filename, "rb") - data = fileobj.read() - fileobj.close() + with open(filename, "rb") as fileobj: + data = fileobj.read() body = data[DMI_HEADER_LEN:] records = body.split(b'\x00') @@ -463,15 +646,13 @@ def _parse_dmi(self, filename): return result - def _verify_reboot_cause(self, filename): ''' Open and read the reboot cause file in /var/run/hwmanagement/system (which is defined as REBOOT_CAUSE_ROOT) If a reboot cause file doesn't exists, returns '0'. ''' - return bool(int(self._read_generic_file(join(REBOOT_CAUSE_ROOT, filename), REBOOT_CAUSE_FILE_LENGTH).rstrip('\n'))) - + return bool(utils.read_int_from_file(os.path.join(REBOOT_CAUSE_ROOT, filename), log_func=None)) def initialize_reboot_cause(self): self.reboot_major_cause_dict = { @@ -498,7 +679,6 @@ def initialize_reboot_cause(self): self.reboot_by_software = 'reset_sw_reset' self.reboot_cause_initialized = True - def get_reboot_cause(self): """ Retrieves the cause of the previous reboot @@ -529,139 +709,161 @@ def get_reboot_cause(self): return self.REBOOT_CAUSE_NON_HARDWARE, '' + def get_thermal_manager(self): + from .thermal_manager import ThermalManager + return ThermalManager - def _show_capabilities(self): + def get_position_in_parent(self): """ - This function is for debug purpose - Some features require a xSFP module to support some capabilities but it's unrealistic to - check those modules one by one. - So this function is introduce to show some capabilities of all xSFP modules mounted on the device. + Retrieves 1-based relative physical position in parent device. If the agent cannot determine the parent-relative position + for some reason, or if the associated value of entPhysicalContainedIn is '0', then the value '-1' is returned + Returns: + integer: The 1-based relative physical position in parent device or -1 if cannot determine the position + """ + return -1 + + def is_replaceable(self): """ - for s in self._sfp_list: - try: - print("index {} tx disable {} dom {} calibration {} temp {} volt {} power (tx {} rx {})".format(s.index, - s.dom_tx_disable_supported, - s.dom_supported, - s.calibration, - s.dom_temp_supported, - s.dom_volt_supported, - s.dom_rx_power_supported, - s.dom_tx_power_supported - )) - except: - print("fail to retrieve capabilities for module index {}".format(s.index)) + Indicate whether this device is replaceable. + Returns: + bool: True if it is replaceable. + """ + return False - def get_change_event(self, timeout=0): - """ - Returns a nested dictionary containing all devices which have - experienced a change at chassis level +class ModularChassis(Chassis): + def __init__(self): + super(ModularChassis, self).__init__() + self.module_initialized_count = 0 - Args: - timeout: Timeout in milliseconds (optional). If timeout == 0, - this method will block until a change is detected. + def is_modular_chassis(self): + """ + Retrieves whether the sonic instance is part of modular chassis Returns: - (bool, dict): - - True if call successful, False if not; - - A nested dictionary where key is a device type, - value is a dictionary with key:value pairs in the format of - {'device_id':'device_event'}, - where device_id is the device ID for this device and - device_event, - status='1' represents device inserted, - status='0' represents device removed. - Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}} - indicates that fan 0 has been removed, fan 2 - has been inserted and sfp 11 has been removed. + A bool value, should return False by default or for fixed-platforms. + Should return True for supervisor-cards, line-cards etc running as part + of modular-chassis. """ - # Initialize SFP event first - if not self.sfp_event_initialized: - from sonic_platform.sfp_event import sfp_event - self.sfp_event = sfp_event() - self.sfp_event.initialize() - self.MAX_SELECT_EVENT_RETURNED = self.PORT_END - self.sfp_event_initialized = True + return True - wait_for_ever = (timeout == 0) - port_dict = {} - error_dict = {} - if wait_for_ever: - timeout = MAX_SELECT_DELAY - while True: - status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) - if bool(port_dict): - break - else: - status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) + ############################################## + # Module methods + ############################################## + def initialize_single_module(self, index): + count = self.get_num_modules() + if index < count: + if not self._module_list: + self._module_list = [None] * count + + if not self._module_list[index]: + from .module import Module + self._module_list[index] = Module(index + 1) + self.module_initialized_count += 1 + + def initialize_modules(self): + if not self._module_list: + from .module import Module + count = self.get_num_modules() + for index in range(1, count + 1): + self._module_list.append(Module(index)) + self.module_initialized_count = count + elif self.module_initialized_count != len(self._module_list): + from .module import Module + for index in range(len(self._module_list)): + if self._module_list[index] is None: + self._module_list[index] = Module(index + 1) + self.module_initialized_count = len(self._module_list) + + def get_num_modules(self): + """ + Retrieves the number of modules available on this chassis - if status: - self.reinit_sfps(port_dict) - result_dict = {'sfp':port_dict} - if error_dict: - result_dict['sfp_error'] = error_dict - return True, result_dict - else: - return True, {'sfp':{}} + Returns: + An integer, the number of modules available on this chassis + """ + return DeviceDataManager.get_linecard_count() - def reinit_sfps(self, port_dict): + def get_all_modules(self): """ - Re-initialize SFP if there is any newly inserted SFPs - :param port_dict: SFP event data - :return: + Retrieves all modules available on this chassis + + Returns: + A list of objects derived from ModuleBase representing all + modules available on this chassis """ - # SFP not initialize yet, do nothing - if not self.sfp_module_full_initialized: - return + self.initialize_modules() + return self._module_list - from . import sfp - for index, status in port_dict.items(): - if status == sfp.SFP_STATUS_INSERTED: - try: - self.get_sfp(index).reinit() - except Exception as e: - logger.log_error("Fail to re-initialize SFP {} - {}".format(index, repr(e))) + def get_module(self, index): + """ + Retrieves module represented by (0-based) index - def get_thermal_manager(self): - from .thermal_manager import ThermalManager - return ThermalManager + Args: + index: An integer, the index (0-based) of the module to + retrieve - def set_status_led(self, color): + Returns: + An object dervied from ModuleBase representing the specified + module """ - Sets the state of the system LED + self.initialize_single_module(index) + return super(ModularChassis, self).get_module(index) + + @utils.default_return(-1) + def get_module_index(self, module_name): + """ + Retrieves module index from the module name Args: - color: A string representing the color with which to set the - system LED + module_name: A string, prefixed by SUPERVISOR, LINE-CARD or FABRIC-CARD + Ex. SUPERVISOR0, LINE-CARD1, FABRIC-CARD5 Returns: - bool: True if system LED state is set successfully, False if not + An integer, the index of the ModuleBase object in the module_list """ - return False if not Chassis._led else Chassis._led.set_status(color) + return int(module_name[len('LINE-CARD')-1:]) - def get_status_led(self): + ############################################## + # SFP methods + ############################################## + + def get_num_sfps(self): """ - Gets the state of the system LED + Retrieves the number of sfps available on this chassis Returns: - A string, one of the valid LED color strings which could be vendor - specified. + An integer, the number of sfps available on this chassis """ - return None if not Chassis._led else Chassis._led.get_status() + return reduce(lambda x, y: x + y, (x.get_num_sfps() for x in self.get_all_modules())) - def get_position_in_parent(self): + def get_all_sfps(self): """ - Retrieves 1-based relative physical position in parent device. If the agent cannot determine the parent-relative position - for some reason, or if the associated value of entPhysicalContainedIn is '0', then the value '-1' is returned - Returns: - integer: The 1-based relative physical position in parent device or -1 if cannot determine the position - """ - return -1 + Retrieves all sfps available on this chassis - def is_replaceable(self): + Returns: + A list of objects derived from SfpBase representing all sfps + available on this chassis """ - Indicate whether this device is replaceable. + return reduce(lambda x, y: x + y, (x.get_all_sfps() for x in self.get_all_modules())) + + def get_sfp(self, index): + """ + Retrieves sfp represented by (1-based) index + + Args: + index: An integer, the index (1-based) of the sfp to retrieve. + The index should be the sequence of a physical port in a chassis, + starting from 1. + For example, 1 for Ethernet0, 2 for Ethernet4 and so on. + Returns: - bool: True if it is replaceable. + An object dervied from SfpBase representing the specified sfp """ - return False + sfp_index = index % DeviceDataManager.get_linecard_max_port_count() - 1 + slot_id = int((index - sfp_index - 1) / 16) + 1 + module = self.get_module(slot_id - 1) + if not module: + return None + + return module.get_sfp(sfp_index - 1) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py index d35a4dd37568..bebd97f810b9 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py @@ -14,26 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import glob +import os +from sonic_py_common import device_info + +from . import utils + DEVICE_DATA = { 'x86_64-mlnx_msn2700-r0': { 'thermal': { 'minimum_table': { "unk_trust": {"-127:30":13, "31:40":14 , "41:120":15}, "unk_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16} + }, + "capability": { + "comex_amb": False } - }, - 'fans': { - 'drawer_num': 4, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn2740-r0': { @@ -41,20 +38,11 @@ 'minimum_table': { "unk_trust": {"-127:120":13}, "unk_untrust": {"-127:15":13, "16:25":14 , "26:30":15, "31:120":17}, + }, + "capability": { + "cpu_pack": False, + "comex_amb": False } - }, - 'fans': { - 'drawer_num': 4, - 'drawer_type': 'real', - 'fan_num_per_drawer': 1, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn2100-r0': { @@ -62,20 +50,11 @@ 'minimum_table': { "unk_trust": {"-127:40":12, "41:120":13}, "unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16} + }, + "capability": { + "cpu_pack": False, + "comex_amb": False } - }, - 'fans': { - 'drawer_num': 1, - 'drawer_type': 'virtual', - 'fan_num_per_drawer': 4, - 'support_fan_direction': True, - 'hot_swappable': False - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': False, - 'led_num': 2 } }, 'x86_64-mlnx_msn2410-r0': { @@ -83,20 +62,10 @@ 'minimum_table': { "unk_trust": {"-127:30":13, "31:40":14 , "41:120":15}, "unk_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16} + }, + "capability": { + "comex_amb": False } - }, - 'fans': { - 'drawer_num': 4, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn2010-r0': { @@ -104,20 +73,11 @@ 'minimum_table': { "unk_trust": {"-127:120":12}, "unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16} + }, + "capability": { + "cpu_pack": False, + "comex_amb": False } - }, - 'fans': { - 'drawer_num': 1, - 'drawer_type': 'virtual', - 'fan_num_per_drawer': 4, - 'support_fan_direction': True, - 'hot_swappable': False - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': False, - 'led_num': 2 } }, 'x86_64-mlnx_msn3700-r0': { @@ -126,19 +86,6 @@ "unk_trust": {"-127:25":12, "26:40":13 , "41:120":14}, "unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16}, } - }, - 'fans': { - 'drawer_num': 6, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn3700c-r0': { @@ -147,19 +94,6 @@ "unk_trust": {"-127:40":12, "41:120":13}, "unk_untrust": {"-127:10":12, "11:20":13 , "21:30":14, "31:35":15, "36:120":16}, } - }, - 'fans': { - 'drawer_num': 4, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn3800-r0': { @@ -168,19 +102,6 @@ "unk_trust": {"-127:30":12, "31:40":13 , "41:120":14}, "unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17}, } - }, - 'fans': { - 'drawer_num': 3, - 'drawer_type': 'real', - 'fan_num_per_drawer': 1, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn4700-r0': { @@ -189,19 +110,6 @@ "unk_trust": {"-127:35":14, "36:120":15}, "unk_untrust": {"-127:35":14, "36:120":15}, } - }, - 'fans': { - 'drawer_num': 6, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn4410-r0': { @@ -210,19 +118,6 @@ "unk_trust": {"-127:40":12, "41:120":13}, "unk_untrust": {"-127:10":12, "11:20":13, "21:30":14, "31:35":15, "36:120":16}, } - }, - 'fans': { - 'drawer_num': 6, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn3420-r0': { @@ -231,19 +126,6 @@ "unk_trust": {"-127:120":12}, "unk_untrust": {"-127:25":12, "26:35":13, "36:40":14, "41:120":16}, } - }, - 'fans': { - 'drawer_num': 5, - 'drawer_type': 'real', - 'fan_num_per_drawer': 2, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn4600c-r0': { @@ -252,19 +134,6 @@ "unk_trust": {"-127:40":12, "41:120":13}, "unk_untrust": {"-127:5":12, "6:20":13, "21:30":14, "31:35":15, "36:40":16, "41:120":17}, } - }, - 'fans': { - 'drawer_num': 3, - 'drawer_type': 'real', - 'fan_num_per_drawer': 1, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 } }, 'x86_64-mlnx_msn4600-r0': { @@ -273,19 +142,113 @@ "unk_trust": {"-127:40": 12, "41:120": 13}, "unk_untrust": {"-127:5": 12, "6:20": 13, "21:30": 14, "31:35": 15, "36:40": 16, "41:120": 17}, } + } + }, + 'x86_64-mlnx_msn4800-r0': { + 'thermal': { + "capability": { + "comex_amb": False + } }, - 'fans': { - 'drawer_num': 3, - 'drawer_type': 'real', - 'fan_num_per_drawer': 1, - 'support_fan_direction': True, - 'hot_swappable': True - }, - 'psus': { - 'psu_num': 2, - 'fan_num_per_psu': 1, - 'hot_swappable': True, - 'led_num': 1 + 'sfp': { + 'max_port_per_line_card': 16 } } } + + +class DeviceDataManager: + @classmethod + @utils.read_only_cache() + def get_platform_name(cls): + return device_info.get_platform() + + @classmethod + @utils.read_only_cache() + def get_fan_drawer_count(cls): + # Here we don't read from /run/hw-management/config/hotplug_fans because the value in it is not + # always correct. + return len(glob.glob('/run/hw-management/thermal/fan*_status')) if cls.is_fan_hotswapable() else 1 + + @classmethod + @utils.read_only_cache() + def get_fan_count(cls): + return len(glob.glob('/run/hw-management/thermal/fan*_speed_get')) + + @classmethod + @utils.read_only_cache() + def is_fan_hotswapable(cls): + return utils.read_int_from_file('/run/hw-management/config/hotplug_fans') > 0 + + @classmethod + @utils.read_only_cache() + def get_psu_count(cls): + psu_count = utils.read_int_from_file('/run/hw-management/config/hotplug_psus') + # If psu_count == 0, the platform has fixed PSU + return psu_count if psu_count > 0 else len(glob.glob('/run/hw-management/config/psu*_i2c_addr')) + + @classmethod + @utils.read_only_cache() + def is_psu_hotswapable(cls): + return utils.read_int_from_file('/run/hw-management/config/hotplug_psus') > 0 + + @classmethod + @utils.read_only_cache() + def get_sfp_count(cls): + return utils.read_int_from_file('/run/hw-management/config/sfp_counter') + + @classmethod + def get_linecard_sfp_count(cls, lc_index): + return utils.read_int_from_file('/run/hw-management/lc{}/config/module_counter'.format(lc_index), log_func=None) + + @classmethod + def get_gearbox_count(cls, sysfs_folder): + return utils.read_int_from_file(os.path.join(sysfs_folder, 'gearbox_counter'), log_func=None) + + @classmethod + @utils.read_only_cache() + def get_cpu_thermal_count(cls): + return len(glob.glob('run/hw-management/thermal/cpu_core[!_]')) + + @classmethod + @utils.read_only_cache() + def get_minimum_table(cls): + platform_data = DEVICE_DATA.get(cls.get_platform_name(), None) + if not platform_data: + return None + + thermal_data = platform_data.get('thermal', None) + if not thermal_data: + return None + + return thermal_data.get('minimum_table', None) + + @classmethod + @utils.read_only_cache() + def get_thermal_capability(cls): + platform_data = DEVICE_DATA.get(cls.get_platform_name(), None) + if not platform_data: + return None + + thermal_data = platform_data.get('thermal', None) + if not thermal_data: + return None + + return thermal_data.get('capability', None) + + @classmethod + @utils.read_only_cache() + def get_linecard_count(cls): + return utils.read_int_from_file('/run/hw-management/config/hotplug_linecards', log_func=None) + + @classmethod + @utils.read_only_cache() + def get_linecard_max_port_count(cls): + platform_data = DEVICE_DATA.get(cls.get_platform_name(), None) + if not platform_data: + return 0 + + sfp_data = platform_data.get('sfp', None) + if not sfp_data: + return 0 + return sfp_data.get('max_port_per_line_card', 0) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/eeprom.py b/platform/mellanox/mlnx-platform-api/sonic_platform/eeprom.py index bd2711633d87..17f14b04430f 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/eeprom.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/eeprom.py @@ -22,17 +22,16 @@ # ############################################################################# import os -import time import subprocess from sonic_py_common.logger import Logger -from sonic_py_common.device_info import get_platform, get_path_to_platform_dir try: from sonic_platform_base.sonic_eeprom import eeprom_tlvinfo except ImportError as e: raise ImportError (str(e) + "- required module not found") -from .utils import default_return +from .device_data import DeviceDataManager +from .utils import default_return, is_host logger = Logger() @@ -41,30 +40,23 @@ # should this be moved to chass.py or here, which better? # EEPROM_SYMLINK = "/var/run/hw-management/eeprom/vpd_info" - -platform_name = get_platform() -if 'simx' in platform_name: - platform_path = get_path_to_platform_dir() - +platform_name = DeviceDataManager.get_platform_name() +if platform_name and 'simx' in platform_name: if not os.path.exists(EEPROM_SYMLINK): + if is_host(): + platform_path = os.path.join('/usr/share/sonic/device', platform_name) + else: + platform_path = '/usr/share/sonic/platform' if not os.path.exists(os.path.dirname(EEPROM_SYMLINK)): os.makedirs(os.path.dirname(EEPROM_SYMLINK)) - subprocess.check_call(['/usr/bin/xxd', '-r', '-p', 'syseeprom.hex', EEPROM_SYMLINK], cwd=platform_path) -class Eeprom(eeprom_tlvinfo.TlvInfoDecoder): - RETRIES = 3 +class Eeprom(eeprom_tlvinfo.TlvInfoDecoder): def __init__(self): - for attempt in range(self.RETRIES): - if not os.path.islink(EEPROM_SYMLINK): - time.sleep(1) - else: - break - if not os.path.exists(EEPROM_SYMLINK): - logger.log_error("Nowhere to read syseeprom from! No symlink or cache file found") - raise RuntimeError("No syseeprom symlink or cache file found") + logger.log_error("Nowhere to read syseeprom from! No symlink found") + raise RuntimeError("No syseeprom symlink found") self.eeprom_path = EEPROM_SYMLINK super(Eeprom, self).__init__(self.eeprom_path, 0, '', True) @@ -123,7 +115,6 @@ def get_system_eeprom_info(self): """ if self._eeprom_info_dict is None: self._eeprom_info_dict = {} - # Try get from DB first db_initialized = self._redis_hget('EEPROM_INFO|State', 'Initialized') if db_initialized == '1': diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py index 0128bbb54282..1da6fd8e5c69 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py @@ -27,11 +27,15 @@ try: from sonic_platform_base.fan_base import FanBase - from .led import FanLed, ComponentFaultyIndicator - from .utils import read_int_from_file, read_str_from_file, write_file + from sonic_py_common.logger import Logger + from .led import ComponentFaultyIndicator + from . import utils except ImportError as e: raise ImportError (str(e) + "- required module not found") +# Global logger class instance +logger = Logger() + PWM_MAX = 255 FAN_PATH = "/var/run/hw-management/thermal/" @@ -42,110 +46,19 @@ FAN_DIR_VALUE_INTAKE = 1 COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state" -class Fan(FanBase): - """Platform-specific Fan class""" - STATUS_LED_COLOR_ORANGE = "orange" - min_cooling_level = 2 +class MlnxFan(FanBase): MIN_VALID_COOLING_LEVEL = 1 MAX_VALID_COOLING_LEVEL = 10 - # PSU fan speed vector - PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c', - '0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64'] - - def __init__(self, fan_index, fan_drawer, position, psu_fan = False, psu=None): - super(Fan, self).__init__() - # API index is starting from 0, Mellanox platform index is starting from 1 + def __init__(self, fan_index, position): + super(MlnxFan, self).__init__() self.index = fan_index + 1 - self.fan_drawer = fan_drawer self.position = position - self.is_psu_fan = psu_fan - self.psu = psu - if self.fan_drawer: - self.led = ComponentFaultyIndicator(self.fan_drawer.get_led()) - elif self.is_psu_fan: - from .psu import Psu - self.led = ComponentFaultyIndicator(Psu.get_shared_led()) - else: - self.led = FanLed(self.index) - - if not self.is_psu_fan: - self.fan_speed_get_path = "fan{}_speed_get".format(self.index) - self.fan_speed_set_path = "fan{}_speed_set".format(self.index) - self.fan_max_speed_path = os.path.join(FAN_PATH, "fan{}_max".format(self.index)) - self.fan_min_speed_path = os.path.join(FAN_PATH, "fan{}_min".format(self.index)) - self._name = "fan{}".format(self.index) - else: - self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index) - self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) - self._name = 'psu{}_fan{}'.format(self.index, 1) - self.fan_max_speed_path = os.path.join(FAN_PATH, "psu{}_fan_max".format(self.index)) - self.fan_min_speed_path = os.path.join(FAN_PATH, "psu{}_fan_min".format(self.index)) - self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index)) - self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index)) - self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command') - - self.fan_status_path = "fan{}_fault".format(self.index) - self.fan_pwm_path = "pwm1" - - - def get_direction(self): - """ - Retrieves the fan's direction - - Returns: - A string, either FAN_DIRECTION_INTAKE or FAN_DIRECTION_EXHAUST - depending on fan direction - - Notes: - What Mellanox calls forward: - Air flows from fans side to QSFP side, for example: MSN2700-CS2F - which means intake in community - What Mellanox calls reverse: - Air flow from QSFP side to fans side, for example: MSN2700-CS2R - which means exhaust in community - According to hw-mgmt: - 1 stands for forward, in other words intake - 0 stands for reverse, in other words exhaust - """ - if self.is_psu_fan: - return self.FAN_DIRECTION_NOT_APPLICABLE - else: - return self.fan_drawer.get_direction() - def get_name(self): return self._name - def get_status(self): - """ - Retrieves the operational status of fan - - Returns: - bool: True if fan is operating properly, False if not - """ - status = 0 - if self.is_psu_fan: - status = 0 - else: - status = read_int_from_file(os.path.join(FAN_PATH, self.fan_status_path), 1) - - return status == 0 - - - def get_presence(self): - """ - Retrieves the presence status of fan - - Returns: - bool: True if fan is present, False if not - """ - if self.is_psu_fan: - return self.psu.get_presence() and self.psu.get_powergood_status() and os.path.exists(os.path.join(FAN_PATH, self.fan_presence_path)) - else: - return self.fan_drawer.get_presence() - def get_speed(self): """ Retrieves the speed of fan @@ -154,9 +67,9 @@ def get_speed(self): int: percentage of the max fan speed """ speed = 0 - speed_in_rpm = read_int_from_file(os.path.join(FAN_PATH, self.fan_speed_get_path)) + speed_in_rpm = utils.read_int_from_file(self.fan_speed_get_path) - max_speed_in_rpm = read_int_from_file(self.fan_max_speed_path) + max_speed_in_rpm = utils.read_int_from_file(self.fan_max_speed_path) if max_speed_in_rpm == 0: return speed_in_rpm @@ -166,72 +79,6 @@ def get_speed(self): return speed - - def get_target_speed(self): - """ - Retrieves the expected speed of fan - - Returns: - int: percentage of the max fan speed - """ - if self.is_psu_fan: - try: - # Get PSU fan target speed according to current system cooling level - cooling_level = self.get_cooling_level() - return int(self.PSU_FAN_SPEED[cooling_level], 16) - except Exception: - return self.get_speed() - - pwm = read_int_from_file(os.path.join(FAN_PATH, self.fan_speed_set_path)) - return int(round(pwm*100.0/PWM_MAX)) - - - def set_speed(self, speed): - """ - Set fan speed to expected value - - Args: - speed: An integer, the percentage of full fan speed to set fan to, - in the range 0 (off) to 100 (full speed) - - Returns: - bool: True if set success, False if fail. - """ - status = True - - if self.is_psu_fan: - if not self.get_presence(): - return False - from .thermal import logger - try: - bus = read_str_from_file(self.psu_i2c_bus_path, raise_exception=True) - addr = read_str_from_file(self.psu_i2c_addr_path, raise_exception=True) - command = read_str_from_file(self.psu_i2c_command_path, raise_exception=True) - speed = Fan.PSU_FAN_SPEED[int(speed // 10)] - command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed) - subprocess.check_call(command, shell = True, universal_newlines=True) - return True - except subprocess.CalledProcessError as ce: - logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output)) - return False - except Exception as e: - logger.log_error('Failed to set PSU FAN speed - {}'.format(e)) - return False - - try: - cooling_level = int(speed // 10) - if cooling_level < self.min_cooling_level: - cooling_level = self.min_cooling_level - speed = self.min_cooling_level * 10 - self.set_cooling_level(cooling_level, cooling_level) - pwm = int(round(PWM_MAX*speed/100.0)) - write_file(os.path.join(FAN_PATH, self.fan_speed_set_path), pwm, raise_exception=True) - except (ValueError, IOError): - status = False - - return status - - def set_status_led(self, color): """ Set led to expected color @@ -245,7 +92,6 @@ def set_status_led(self, color): """ return self.led.set_status(color) - def get_status_led(self): """ Gets the state of the fan status LED @@ -255,7 +101,6 @@ def get_status_led(self): """ return self.led.get_status() - def get_speed_tolerance(self): """ Retrieves the speed tolerance of the fan @@ -289,9 +134,6 @@ def set_cooling_level(cls, level, cur_state): Change cooling level. The input level should be an integer value [1, 10]. 1 means 10%, 2 means 20%, 10 means 100%. """ - if not isinstance(level, int): - raise RuntimeError("Failed to set cooling level, input parameter must be integer") - if level < cls.MIN_VALID_COOLING_LEVEL or level > cls.MAX_VALID_COOLING_LEVEL: raise RuntimeError("Failed to set cooling level, level value must be in range [{}, {}], got {}".format( cls.MIN_VALID_COOLING_LEVEL, @@ -303,16 +145,214 @@ def set_cooling_level(cls, level, cur_state): # Reset FAN cooling level vector. According to low level team, # if we need set cooling level to X, we need first write a (10+X) # to cooling_cur_state file to reset the cooling level vector. - write_file(COOLING_STATE_PATH, level + 10, raise_exception=True) + utils.write_file(COOLING_STATE_PATH, level + 10, raise_exception=True) # We need set cooling level after resetting the cooling level vector - write_file(COOLING_STATE_PATH, cur_state, raise_exception=True) + utils.write_file(COOLING_STATE_PATH, cur_state, raise_exception=True) except (ValueError, IOError) as e: raise RuntimeError("Failed to set cooling level - {}".format(e)) @classmethod def get_cooling_level(cls): try: - return read_int_from_file(COOLING_STATE_PATH, raise_exception=True) + return utils.read_int_from_file(COOLING_STATE_PATH, raise_exception=True) except (ValueError, IOError) as e: raise RuntimeError("Failed to get cooling level - {}".format(e)) + + +class PsuFan(MlnxFan): + # PSU fan speed vector + PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c', + '0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64'] + + def __init__(self, fan_index, position, psu): + super(PsuFan, self).__init__(fan_index, position) + self._name = 'psu{}_fan{}'.format(self.index, position) + self.psu = psu + + from .psu import Psu + self.led = ComponentFaultyIndicator(Psu.get_shared_led()) + self.fan_speed_get_path = os.path.join(FAN_PATH, "psu{}_fan1_speed_get".format(self.index)) + self.fan_presence_path = os.path.join(FAN_PATH, "psu{}_fan1_speed_get".format(self.index)) + self.fan_max_speed_path = os.path.join(FAN_PATH, "psu{}_fan_max".format(self.index)) + self.fan_min_speed_path = os.path.join(FAN_PATH, "psu{}_fan_min".format(self.index)) + self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index)) + self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index)) + self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command') + + def get_direction(self): + """ + Retrieves the fan's direction + + Returns: + A string, either FAN_DIRECTION_INTAKE or FAN_DIRECTION_EXHAUST + depending on fan direction + + Notes: + What Mellanox calls forward: + Air flows from fans side to QSFP side, for example: MSN2700-CS2F + which means intake in community + What Mellanox calls reverse: + Air flow from QSFP side to fans side, for example: MSN2700-CS2R + which means exhaust in community + According to hw-mgmt: + 1 stands for forward, in other words intake + 0 stands for reverse, in other words exhaust + """ + return self.FAN_DIRECTION_NOT_APPLICABLE + + def get_status(self): + """ + Retrieves the operational status of fan + + Returns: + bool: True if fan is operating properly, False if not + """ + return True + + def get_presence(self): + """ + Retrieves the presence status of fan + + Returns: + bool: True if fan is present, False if not + """ + return self.psu.get_presence() and self.psu.get_powergood_status() and os.path.exists(self.fan_presence_path) + + def get_target_speed(self): + """ + Retrieves the expected speed of fan + + Returns: + int: percentage of the max fan speed + """ + try: + # Get PSU fan target speed according to current system cooling level + cooling_level = self.get_cooling_level() + return int(self.PSU_FAN_SPEED[cooling_level], 16) + except Exception: + return self.get_speed() + + def set_speed(self, speed): + """ + Set fan speed to expected value + + Args: + speed: An integer, the percentage of full fan speed to set fan to, + in the range 0 (off) to 100 (full speed) + + Returns: + bool: True if set success, False if fail. + """ + if not self.get_presence(): + return False + + try: + bus = utils.read_str_from_file(self.psu_i2c_bus_path, raise_exception=True) + addr = utils.read_str_from_file(self.psu_i2c_addr_path, raise_exception=True) + command = utils.read_str_from_file(self.psu_i2c_command_path, raise_exception=True) + speed = self.PSU_FAN_SPEED[int(speed // 10)] + command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed) + subprocess.check_call(command, shell = True, universal_newlines=True) + return True + except subprocess.CalledProcessError as ce: + logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output)) + return False + except Exception as e: + logger.log_error('Failed to set PSU FAN speed - {}'.format(e)) + return False + +class Fan(MlnxFan): + """Platform-specific Fan class""" + + min_cooling_level = 2 + + def __init__(self, fan_index, fan_drawer, position): + super(Fan, self).__init__(fan_index, position) + + self.fan_drawer = fan_drawer + self.led = ComponentFaultyIndicator(self.fan_drawer.get_led()) + + self._name = "fan{}".format(self.index) + self.fan_speed_get_path = os.path.join(FAN_PATH, "fan{}_speed_get".format(self.index)) + self.fan_speed_set_path = os.path.join(FAN_PATH, "fan{}_speed_set".format(self.index)) + self.fan_max_speed_path = os.path.join(FAN_PATH, "fan{}_max".format(self.index)) + self.fan_min_speed_path = os.path.join(FAN_PATH, "fan{}_min".format(self.index)) + + self.fan_status_path = os.path.join(FAN_PATH, "fan{}_fault".format(self.index)) + + def get_direction(self): + """ + Retrieves the fan's direction + + Returns: + A string, either FAN_DIRECTION_INTAKE or FAN_DIRECTION_EXHAUST + depending on fan direction + + Notes: + What Mellanox calls forward: + Air flows from fans side to QSFP side, for example: MSN2700-CS2F + which means intake in community + What Mellanox calls reverse: + Air flow from QSFP side to fans side, for example: MSN2700-CS2R + which means exhaust in community + According to hw-mgmt: + 1 stands for forward, in other words intake + 0 stands for reverse, in other words exhaust + """ + return self.fan_drawer.get_direction() + + def get_status(self): + """ + Retrieves the operational status of fan + + Returns: + bool: True if fan is operating properly, False if not + """ + + return utils.read_int_from_file(self.fan_status_path, 1) == 0 + + def get_presence(self): + """ + Retrieves the presence status of fan + + Returns: + bool: True if fan is present, False if not + """ + return self.fan_drawer.get_presence() + + def get_target_speed(self): + """ + Retrieves the expected speed of fan + + Returns: + int: percentage of the max fan speed + """ + pwm = utils.read_int_from_file(self.fan_speed_set_path) + return int(round(pwm*100.0/PWM_MAX)) + + def set_speed(self, speed): + """ + Set fan speed to expected value + + Args: + speed: An integer, the percentage of full fan speed to set fan to, + in the range 0 (off) to 100 (full speed) + + Returns: + bool: True if set success, False if fail. + """ + status = True + + try: + cooling_level = int(speed // 10) + if cooling_level < self.min_cooling_level: + cooling_level = self.min_cooling_level + speed = self.min_cooling_level * 10 + self.set_cooling_level(cooling_level, cooling_level) + pwm = int(PWM_MAX*speed/100.0) + utils.write_file(self.fan_speed_set_path, pwm, raise_exception=True) + except (ValueError, IOError): + status = False + + return status diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/fan_drawer.py b/platform/mellanox/mlnx-platform-api/sonic_platform/fan_drawer.py index 23ecba001946..848abb04aca3 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/fan_drawer.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/fan_drawer.py @@ -27,18 +27,21 @@ try: from sonic_platform_base.fan_drawer_base import FanDrawerBase from sonic_platform_base.fan_base import FanBase + from sonic_py_common.logger import Logger from .led import FanLed, SharedLed - from .utils import read_int_from_file + from . import utils except ImportError as e: raise ImportError (str(e) + "- required module not found") +# Global logger class instance +logger = Logger() + class MellanoxFanDrawer(FanDrawerBase): - def __init__(self, index, fan_data): + def __init__(self, index): from .fan import FAN_PATH super(MellanoxFanDrawer, self).__init__() self._index = index + 1 - self._fan_data = fan_data self._presence_path = os.path.join(FAN_PATH, 'fan{}_status'.format(self._index)) self._led = None @@ -49,33 +52,25 @@ def get_led(self): return self._led def get_presence(self): - if not self._fan_data['hot_swappable']: - return True - - status = 0 - try: - with open(self._presence_path, 'r') as presence_status: - status = int(presence_status.read()) - except (ValueError, IOError) as e: - status = 0 - - return status == 1 + return utils.read_int_from_file(self._presence_path) == 1 def get_direction(self): - if not self._fan_data['support_fan_direction'] or not self.get_presence(): + if not self.get_presence(): return FanBase.FAN_DIRECTION_NOT_APPLICABLE try: from .fan import FAN_DIR, FAN_DIR_VALUE_INTAKE, FAN_DIR_VALUE_EXHAUST - fan_dir = read_int_from_file(FAN_DIR.format(self._index), raise_exception=True) + fan_dir = utils.read_int_from_file(FAN_DIR.format(self._index), raise_exception=True) if fan_dir == FAN_DIR_VALUE_INTAKE: return FanBase.FAN_DIRECTION_INTAKE elif fan_dir == FAN_DIR_VALUE_EXHAUST: return FanBase.FAN_DIRECTION_EXHAUST else: - raise RuntimeError("Got wrong value {} for fan direction {}".format(fan_dir, self._index)) + logger.log_error("Got wrong value {} for fan direction {}".format(fan_dir, self._index)) + return FanBase.FAN_DIRECTION_NOT_APPLICABLE except (ValueError, IOError) as e: - raise RuntimeError("Failed to read fan direction status to {}".format(repr(e))) + logger.log_error("Failed to read fan direction status to {}".format(repr(e))) + return FanBase.FAN_DIRECTION_NOT_APPLICABLE def set_status_led(self, color): """ @@ -113,12 +108,12 @@ def is_replaceable(self): Returns: bool: True if it is replaceable. """ - return self._fan_data['hot_swappable'] + return True class RealDrawer(MellanoxFanDrawer): - def __init__(self, index, fan_data): - super(RealDrawer, self).__init__(index, fan_data) + def __init__(self, index): + super(RealDrawer, self).__init__(index) self._name = 'drawer{}'.format(self._index) self._led = SharedLed(FanLed(self._index)) @@ -127,9 +122,15 @@ def get_name(self): class VirtualDrawer(MellanoxFanDrawer): - def __init__(self, index, fan_data): - super(VirtualDrawer, self).__init__(index, fan_data) + def __init__(self, index): + super(VirtualDrawer, self).__init__(index) self._led = SharedLed(FanLed(None)) def get_name(self): return 'N/A' + + def get_presence(self): + return True + + def is_replaceable(self): + return False diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/led.py b/platform/mellanox/mlnx-platform-api/sonic_platform/led.py index 2d8b16177016..2f27386814c7 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/led.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/led.py @@ -15,7 +15,11 @@ # limitations under the License. # import os +from sonic_py_common.logger import Logger +from . import utils + +logger = Logger() class Led(object): STATUS_LED_COLOR_GREEN = 'green' @@ -26,7 +30,7 @@ class Led(object): STATUS_LED_COLOR_ORANGE_BLINK = 'orange_blink' STATUS_LED_COLOR_OFF = 'off' - LED_ON = '1' + LED_ON = '255' LED_OFF = '0' LED_BLINK = '50' @@ -42,12 +46,11 @@ def set_status(self, color): self._stop_blink(led_cap_list) blink_pos = color.find('blink') if blink_pos != -1: - return self._set_status_blink(color, blink_pos, led_cap_list) + return self._set_status_blink(color, led_cap_list) if color == Led.STATUS_LED_COLOR_GREEN: - with open(self.get_green_led_path(), 'w') as led: - led.write(Led.LED_ON) - status = True + utils.write_file(self.get_green_led_path(), Led.LED_ON) + status = True elif color == Led.STATUS_LED_COLOR_RED: # Some led don't support red led but support orange led, in this case we set led to orange if Led.STATUS_LED_COLOR_RED in led_cap_list: @@ -57,19 +60,15 @@ def set_status(self, color): else: return False - with open(led_path, 'w') as led: - led.write(Led.LED_ON) - status = True + utils.write_file(led_path, Led.LED_ON) + status = True elif color == Led.STATUS_LED_COLOR_OFF: if Led.STATUS_LED_COLOR_GREEN in led_cap_list: - with open(self.get_green_led_path(), 'w') as led: - led.write(Led.LED_OFF) + utils.write_file(self.get_green_led_path(), Led.LED_OFF) if Led.STATUS_LED_COLOR_RED in led_cap_list: - with open(self.get_red_led_path(), 'w') as led: - led.write(Led.LED_OFF) + utils.write_file(self.get_red_led_path(), Led.LED_OFF) if Led.STATUS_LED_COLOR_ORANGE in led_cap_list: - with open(self.get_orange_led_path(), 'w') as led: - led.write(Led.LED_OFF) + utils.write_file(self.get_orange_led_path(), Led.LED_OFF) status = True else: @@ -79,7 +78,7 @@ def set_status(self, color): return status - def _set_status_blink(self, color, blink_pos, led_cap_list): + def _set_status_blink(self, color, led_cap_list): if color not in led_cap_list: if color == Led.STATUS_LED_COLOR_RED_BLINK and Led.STATUS_LED_COLOR_ORANGE_BLINK in led_cap_list: color = Led.STATUS_LED_COLOR_ORANGE_BLINK @@ -89,16 +88,14 @@ def _set_status_blink(self, color, blink_pos, led_cap_list): return False if Led.STATUS_LED_COLOR_GREEN_BLINK == color: - self._set_led_blink_status(self.get_green_led_delay_on_path(), self.get_green_led_delay_off_path(), Led.LED_BLINK) + return self._set_led_blink_status(self.get_green_led_delay_on_path(), self.get_green_led_delay_off_path(), Led.LED_BLINK) elif Led.STATUS_LED_COLOR_RED_BLINK == color: - self._set_led_blink_status(self.get_red_led_delay_on_path(), self.get_red_led_delay_off_path(), Led.LED_BLINK) + return self._set_led_blink_status(self.get_red_led_delay_on_path(), self.get_red_led_delay_off_path(), Led.LED_BLINK) elif Led.STATUS_LED_COLOR_ORANGE_BLINK == color: - self._set_led_blink_status(self.get_orange_led_delay_on_path(), self.get_orange_led_delay_off_path(), Led.LED_BLINK) + return self._set_led_blink_status(self.get_orange_led_delay_on_path(), self.get_orange_led_delay_off_path(), Led.LED_BLINK) else: return False - return True - def _stop_blink(self, led_cap_list): try: if Led.STATUS_LED_COLOR_GREEN_BLINK in led_cap_list: @@ -111,10 +108,9 @@ def _stop_blink(self, led_cap_list): return def _set_led_blink_status(self, delay_on_file, delay_off_file, value): - with open(delay_on_file, 'w') as led: - led.write(value) - with open(delay_off_file, 'w') as led: - led.write(value) + utils.write_file(delay_on_file, value) + utils.write_file(delay_off_file, value) + return True def get_status(self): led_cap_list = self.get_capability() @@ -126,18 +122,15 @@ def get_status(self): if blink_status is not None: return blink_status - with open(self.get_green_led_path(), 'r') as led: - if Led.LED_OFF != led.read().rstrip('\n'): - return Led.STATUS_LED_COLOR_GREEN + if utils.read_str_from_file(self.get_green_led_path()) != Led.LED_OFF: + return Led.STATUS_LED_COLOR_GREEN if Led.STATUS_LED_COLOR_RED in led_cap_list: - with open(self.get_red_led_path(), 'r') as led: - if Led.LED_OFF != led.read().rstrip('\n'): - return Led.STATUS_LED_COLOR_RED + if utils.read_str_from_file(self.get_red_led_path()) != Led.LED_OFF: + return Led.STATUS_LED_COLOR_RED if Led.STATUS_LED_COLOR_ORANGE in led_cap_list: - with open(self.get_orange_led_path(), 'r') as led: - if Led.LED_OFF != led.read().rstrip('\n'): - return Led.STATUS_LED_COLOR_RED + if utils.read_str_from_file(self.get_orange_led_path()) != Led.LED_OFF: + return Led.STATUS_LED_COLOR_RED except (ValueError, IOError) as e: raise RuntimeError("Failed to read led status due to {}".format(repr(e))) @@ -148,6 +141,7 @@ def _get_blink_status(self, led_cap_list): if Led.STATUS_LED_COLOR_GREEN_BLINK in led_cap_list: if self._is_led_blinking(self.get_green_led_delay_on_path(), self.get_green_led_delay_off_path()): return Led.STATUS_LED_COLOR_GREEN_BLINK + if Led.STATUS_LED_COLOR_RED_BLINK in led_cap_list: if self._is_led_blinking(self.get_red_led_delay_on_path(), self.get_red_led_delay_off_path()): return Led.STATUS_LED_COLOR_RED_BLINK @@ -160,126 +154,73 @@ def _get_blink_status(self, led_cap_list): return None def _is_led_blinking(self, delay_on_file, delay_off_file): - with open(delay_on_file, 'r') as led: - delay_on = led.read().rstrip('\n') - with open(delay_off_file, 'r') as led: - delay_off = led.read().rstrip('\n') + delay_on = utils.read_str_from_file(delay_on_file, default=Led.LED_OFF, log_func=None) + delay_off = utils.read_str_from_file(delay_off_file, default=Led.LED_OFF, log_func=None) return delay_on != Led.LED_OFF and delay_off != Led.LED_OFF def get_capability(self): - cap_list = None - try: - with open(self.get_led_cap_path(), 'r') as led_cap: - caps = led_cap.read() - cap_list = set(caps.split()) - except (ValueError, IOError): - pass - - return cap_list + caps = utils.read_str_from_file(self.get_led_cap_path()) + return set(caps.split()) def get_green_led_path(self): - pass + return os.path.join(Led.LED_PATH, 'led_{}_green'.format(self._led_id)) def get_green_led_delay_off_path(self): - return '{}_delay_off'.format(self.get_green_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_green_delay_off'.format(self._led_id)) def get_green_led_delay_on_path(self): - return '{}_delay_on'.format(self.get_green_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_green_delay_on'.format(self._led_id)) + + def get_green_led_trigger(self): + return os.path.join(Led.LED_PATH, 'led_{}_green_trigger'.format(self._led_id)) def get_red_led_path(self): - pass + return os.path.join(Led.LED_PATH, 'led_{}_red'.format(self._led_id)) def get_red_led_delay_off_path(self): - return '{}_delay_off'.format(self.get_red_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_red_delay_off'.format(self._led_id)) def get_red_led_delay_on_path(self): - return '{}_delay_on'.format(self.get_red_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_red_delay_on'.format(self._led_id)) + + def get_red_led_trigger(self): + return os.path.join(Led.LED_PATH, 'led_{}_red_trigger'.format(self._led_id)) def get_orange_led_path(self): - pass + return os.path.join(Led.LED_PATH, 'led_{}_orange'.format(self._led_id)) def get_orange_led_delay_off_path(self): - return '{}_delay_off'.format(self.get_orange_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_orange_delay_off'.format(self._led_id)) def get_orange_led_delay_on_path(self): - return '{}_delay_on'.format(self.get_orange_led_path()) + return os.path.join(Led.LED_PATH, 'led_{}_orange_delay_on'.format(self._led_id)) + + def get_orange_led_trigger(self): + return os.path.join(Led.LED_PATH, 'led_{}_orange_trigger'.format(self._led_id)) def get_led_cap_path(self): - pass + return os.path.join(Led.LED_PATH, 'led_{}_capability'.format(self._led_id)) - -class FanLed(Led): - LED_PATH = "/var/run/hw-management/led/" +class FanLed(Led): def __init__(self, index): if index is not None: - self._green_led_path = os.path.join(Led.LED_PATH, "led_fan{}_green".format(index)) - self._red_led_path = os.path.join(Led.LED_PATH, "led_fan{}_red".format(index)) - self._orange_led_path = os.path.join(Led.LED_PATH, "led_fan{}_orange".format(index)) - self._led_cap_path = os.path.join(Led.LED_PATH, "led_fan{}_capability".format(index)) + self._led_id = 'fan{}'.format(index) else: - self._green_led_path = os.path.join(Led.LED_PATH, "led_fan_green") - self._red_led_path = os.path.join(Led.LED_PATH, "led_fan_red") - self._orange_led_path = os.path.join(Led.LED_PATH, "led_fan_orange") - self._led_cap_path = os.path.join(Led.LED_PATH, "led_fan_capability") - - def get_green_led_path(self): - return self._green_led_path - - def get_red_led_path(self): - return self._red_led_path - - def get_orange_led_path(self): - return self._orange_led_path - - def get_led_cap_path(self): - return self._led_cap_path + self._led_id = 'fan' class PsuLed(Led): def __init__(self, index): if index is not None: - self._green_led_path = os.path.join(Led.LED_PATH, "led_psu{}_green".format(index)) - self._red_led_path = os.path.join(Led.LED_PATH, "led_psu{}_red".format(index)) - self._orange_led_path = os.path.join(Led.LED_PATH, "led_psu{}_orange".format(index)) - self._led_cap_path = os.path.join(Led.LED_PATH, "led_psu{}_capability".format(index)) + self._led_id = 'psu{}'.format(index) else: - self._green_led_path = os.path.join(Led.LED_PATH, "led_psu_green") - self._red_led_path = os.path.join(Led.LED_PATH, "led_psu_red") - self._orange_led_path = os.path.join(Led.LED_PATH, "led_psu_orange") - self._led_cap_path = os.path.join(Led.LED_PATH, "led_psu_capability") - - def get_green_led_path(self): - return self._green_led_path - - def get_red_led_path(self): - return self._red_led_path - - def get_orange_led_path(self): - return self._orange_led_path - - def get_led_cap_path(self): - return self._led_cap_path + self._led_id = 'psu' class SystemLed(Led): def __init__(self): - self._green_led_path = os.path.join(Led.LED_PATH, "led_status_green") - self._red_led_path = os.path.join(Led.LED_PATH, "led_status_red") - self._orange_led_path = os.path.join(Led.LED_PATH, "led_status_orange") - self._led_cap_path = os.path.join(Led.LED_PATH, "led_status_capability") - - def get_green_led_path(self): - return self._green_led_path - - def get_red_led_path(self): - return self._red_led_path - - def get_orange_led_path(self): - return self._orange_led_path - - def get_led_cap_path(self): - return self._led_cap_path + self._led_id = 'status' class SharedLed(object): diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/module.py b/platform/mellanox/mlnx-platform-api/sonic_platform/module.py new file mode 100644 index 000000000000..8cd6ffa9b8db --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/module.py @@ -0,0 +1,249 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import redis +import threading +from sonic_platform_base.module_base import ModuleBase +from sonic_py_common.logger import Logger + +from . import utils +from .device_data import DeviceDataManager +from .vpd_parser import VpdParser + +# Global logger class instance +logger = Logger() + + +class Module(ModuleBase): + STATE_ACTIVATED = 1 + STATE_DEACTIVATED = 0 + + STATE_DB = 6 + STATE_MODULAR_CHASSIS_SLOT_TABLE = 'MODULAR_CHASSIS_SLOT|{}' + FIELD_SEQ_NO = 'seq_no' + redis_client = redis.Redis(db = STATE_DB) + + def __init__(self, slot_id): + super(Module, self).__init__() + self.slot_id = slot_id + self.seq_no = 0 + self.current_state = Module.STATE_DEACTIVATED + self.lock = threading.Lock() + + self.sfp_initialized_count = 0 + self.sfp_count = 0 + self.vpd_parser = VpdParser('/run/hw-management/lc{}/eeprom/vpd_parsed') + + def get_name(self): + return 'LINE-CARD{}'.format(self.slot_id) + + def get_model(self): + """ + Retrieves the model number (or part number) of the device + + Returns: + string: Model/part number of device + """ + return self.vpd_parser.get_model() + + def get_serial(self): + """ + Retrieves the serial number of the device + + Returns: + string: Serial number of device + """ + return self.vpd_parser.get_serial() + + def get_revision(self): + """ + Retrieves the hardware revision of the device + + Returns: + string: Revision value of device + """ + return self.vpd_parser.get_revision() + + def get_type(self): + return ModuleBase.MODULE_TYPE_LINE + + def get_slot(self): + return self.slot_id + + def get_presence(self): + return utils.read_int_from_file('/run/hw-management/system/lc{}_present'.format(self.slot_id)) == 1 + + def get_position_in_parent(self): + return self.slot_id + + def is_replaceable(self): + return True + + def get_oper_status(self): # TODO: read from DB? + if utils.read_int_from_file('/run/hw-management/system/lc{}_active'.format(self.slot_id)) == 1: + return ModuleBase.MODULE_STATUS_ONLINE + elif utils.read_int_from_file('/run/hw-management/system/lc{}_present'.format(self.slot_id)) == 1: + return ModuleBase.MODULE_STATUS_PRESENT + elif utils.read_int_from_file('/run/hw-management/system/lc{}_present'.format(self.slot_id)) == 0: + return ModuleBase.MODULE_STATUS_EMPTY + else: + return ModuleBase.MODULE_STATUS_FAULT + + def _check_state(self): + """Check Module status change: + 1. If status sysfs file value has been changed TODO: read from DB? + 2. If sequence NO has been changed which means line card has been removed and inserted again. + """ + seq_no = self._get_seq_no() + state = utils.read_int_from_file('/run/hw-management/system/lc{}_powered'.format(self.slot_id), log_func=None) + if state != self.current_state: + self._re_init() + elif seq_no != self.seq_no: + if state == Module.STATE_ACTIVATED: # LC has been replaced, need re-initialize + self._re_init() + self.current_state = state + self.seq_no = seq_no + + def _get_seq_no(self): + try: + seq_no = Module.redis_client.hget(Module.STATE_MODULAR_CHASSIS_SLOT_TABLE.format(self.slot_id), Module.FIELD_SEQ_NO) + seq_no = seq_no.decode().strip() + except Exception as e: + seq_no = 0 + return seq_no + + def _re_init(self): + self._thermal_list = [] + self._sfp_list = [] + self._sfp_count = 0 + + + ############################################## + # THERMAL methods + ############################################## + + def initialize_thermals(self): + self._check_state() + if self.current_state == Module.STATE_ACTIVATED and not self._thermal_list: + from .thermal import initialize_linecard_thermals + self._thermal_list = initialize_linecard_thermals(self.get_name(), self.slot_id) # TODO: add presence_cb? + + def get_num_thermals(self): + """ + Retrieves the number of thermals available on this module + + Returns: + An integer, the number of thermals available on this module + """ + return DeviceDataManager.get_gearbox_count('/run/hw-management/lc{}/config'.format(self.slot_id)) + + def get_all_thermals(self): + """ + Retrieves all thermals available on this module + + Returns: + A list of objects derived from ThermalBase representing all thermals + available on this module + """ + with self.lock: + self.initialize_thermals() + return self._thermal_list + + def get_thermal(self, index): + """ + Retrieves thermal unit represented by (0-based) index + + Args: + index: An integer, the index (0-based) of the thermal to + retrieve + + Returns: + An object dervied from ThermalBase representing the specified thermal + """ + with self.lock: + self.initialize_thermals() + return super(Module, self).get_thermal(index) + + ############################################## + # SFP methods + ############################################## + def _create_sfp_object(self, index): + from .sfp import SFP + return SFP(index, slot_id=self.slot_id, linecard_port_count=self.sfp_count, lc_name=self.get_name()) + + def initialize_single_sfp(self, index): + self._check_state() + if self.current_state == Module.STATE_ACTIVATED: + sfp_count = self.get_num_sfps() + if index < sfp_count: + if not self._sfp_list: + self._sfp_list = [None] * sfp_count + + if not self._sfp_list[index]: + self._sfp_list[index] = self._create_sfp_object(index) + self.sfp_initialized_count += 1 + + def initialize_sfps(self): + self._check_state() + if self.current_state == Module.STATE_ACTIVATED: + if not self._sfp_list: + sfp_count = self.get_num_sfps() + for index in range(sfp_count): + self._sfp_list.append(self._create_sfp_object(index)) + self.sfp_initialized_count = sfp_count + elif self.sfp_initialized_count != len(self._sfp_list): + for index in range(len(self._sfp_list)): + if self._sfp_list[index] is None: + self._sfp_list[index] = self._create_sfp_object(index) + self.sfp_initialized_count = len(self._sfp_list) + + def get_num_sfps(self): + """ + Retrieves the number of sfps available on this module + + Returns: + An integer, the number of sfps available on this module + """ + if self.sfp_count == 0: + self.sfp_count = DeviceDataManager.get_linecard_sfp_count(self.slot_id) + return self.sfp_count + + def get_all_sfps(self): + """ + Retrieves all sfps available on this module + + Returns: + A list of objects derived from PsuBase representing all sfps + available on this module + """ + with self.lock: + self.initialize_sfps() + return self._sfp_list + + def get_sfp(self, index): + """ + Retrieves sfp represented by (0-based) index + + Args: + index: An integer, the index (0-based) of the sfp to retrieve + + Returns: + An object dervied from SfpBase representing the specified sfp + """ + with self.lock: + self.initialize_single_sfp(index) + return super(Module, self).get_sfp(index) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py index 3b26e7ede403..9100e8fb2f26 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py @@ -22,23 +22,15 @@ try: from sonic_platform_base.platform_base import PlatformBase - from sonic_platform.chassis import Chassis - from sonic_py_common.device_info import get_platform - from . import utils + from .chassis import Chassis, ModularChassis + from .device_data import DeviceDataManager except ImportError as e: raise ImportError(str(e) + "- required module not found") class Platform(PlatformBase): def __init__(self): PlatformBase.__init__(self) - self._chassis = Chassis() - self._chassis.initialize_eeprom() - platform_name = get_platform() - if "simx" not in platform_name: - self._chassis.initialize_psu() - if utils.is_host(): - self._chassis.initialize_components() - self._chassis.initizalize_system_led() - else: - self._chassis.initialize_fan() - self._chassis.initialize_thermals() + if DeviceDataManager.get_linecard_count() == 0: + self._chassis = Chassis() + else: + self._chassis = ModularChassis() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py index 8dd8bea2dc24..a71b8fc308fd 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py @@ -23,12 +23,12 @@ ############################################################################# try: - import os.path + import os from sonic_platform_base.psu_base import PsuBase from sonic_py_common.logger import Logger - from sonic_platform.fan import Fan from .led import PsuLed, SharedLed, ComponentFaultyIndicator - from .device_data import DEVICE_DATA + from . import utils + from .vpd_parser import VpdParser except ImportError as e: raise ImportError (str(e) + "- required module not found") @@ -36,210 +36,28 @@ # Global logger class instance logger = Logger() -psu_list = [] - -PSU_CURRENT = "current" -PSU_VOLTAGE = "voltage" -PSU_POWER = "power" -PSU_VPD = "vpd" - -SN_VPD_FIELD = "SN_VPD_FIELD" -PN_VPD_FIELD = "PN_VPD_FIELD" -REV_VPD_FIELD = "REV_VPD_FIELD" - -# in most platforms the file psuX_curr, psuX_volt and psuX_power contain current, voltage and power data respectively. -# but there are exceptions which will be handled by the following dictionary - -platform_dict_psu = {'x86_64-mlnx_msn3420-r0': 1, 'x86_64-mlnx_msn3700-r0': 1, 'x86_64-mlnx_msn3700c-r0': 1, - 'x86_64-mlnx_msn3800-r0': 1, 'x86_64-mlnx_msn4600-r0': 1, 'x86_64-mlnx_msn4600c-r0': 1, - 'x86_64-mlnx_msn4700-r0': 1, 'x86_64-mlnx_msn4410-r0': 1, 'x86_64-mlnx_msn2010-r0' : 2, - 'x86_64-mlnx_msn2100-r0': 2} - -psu_profile_list = [ - # default filename convention - { - PSU_CURRENT : "power/psu{}_curr", - PSU_VOLTAGE : "power/psu{}_volt", - PSU_POWER : "power/psu{}_power", - PSU_VPD : "eeprom/psu{}_vpd" - }, - # for 3420, 3700, 3700c, 3800, 4600c, 4700 - { - PSU_CURRENT : "power/psu{}_curr", - PSU_VOLTAGE : "power/psu{}_volt_out2", - PSU_POWER : "power/psu{}_power", - PSU_VPD : "eeprom/psu{}_vpd" - }, - # for fixed platforms 2100, 2010 - { - PSU_CURRENT : "power/psu{}_curr", - PSU_VOLTAGE : "power/psu{}_volt_out2", - PSU_POWER : "power/psu{}_power", - PSU_VPD : None - } -] - -class Psu(PsuBase): - """Platform-specific Psu class""" +PSU_PATH = '/var/run/hw-management/' - shared_led = None - def __init__(self, psu_index, platform): - global psu_list - PsuBase.__init__(self) - # PSU is 1-based on Mellanox platform +class FixedPsu(PsuBase): + def __init__(self, psu_index): + super(FixedPsu, self).__init__() self.index = psu_index + 1 - psu_list.append(self.index) - self.psu_path = "/var/run/hw-management/" - psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) - #psu_oper_status should always be present for all platforms - self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status) - self._name = "PSU {}".format(psu_index + 1) - - if platform in platform_dict_psu: - filemap = psu_profile_list[platform_dict_psu[platform]] - else: - filemap = psu_profile_list[0] - - self.psu_data = DEVICE_DATA[platform]['psus'] - psu_vpd = filemap[PSU_VPD] - - self.model = "N/A" - self.serial = "N/A" - self.rev = "N/A" - - if psu_vpd is not None: - self.psu_vpd = os.path.join(self.psu_path, psu_vpd.format(self.index)) - self.vpd_data = self._read_vpd_file(self.psu_vpd) - - if PN_VPD_FIELD in self.vpd_data: - self.model = self.vpd_data[PN_VPD_FIELD] - else: - logger.log_error("Fail to read PSU{} model number: No key {} in VPD {}".format(self.index, PN_VPD_FIELD, self.psu_vpd)) - - if SN_VPD_FIELD in self.vpd_data: - self.serial = self.vpd_data[SN_VPD_FIELD] - else: - logger.log_error("Fail to read PSU{} serial number: No key {} in VPD {}".format(self.index, SN_VPD_FIELD, self.psu_vpd)) - - if REV_VPD_FIELD in self.vpd_data: - self.rev = self.vpd_data[REV_VPD_FIELD] - else: - logger.log_error("Fail to read PSU{} serial number: No key {} in VPD {}".format(self.index, REV_VPD_FIELD, self.psu_vpd)) - - else: - logger.log_info("Not reading PSU{} VPD data: Platform is fixed".format(self.index)) - - if not self.psu_data['hot_swappable']: - self.always_present = True - self.psu_voltage = None - self.psu_current = None - self.psu_power = None - self.psu_presence = None - self.psu_temp = None - self.psu_temp_threshold = None - else: - self.always_present = False - psu_voltage = filemap[PSU_VOLTAGE].format(self.index) - psu_voltage = os.path.join(self.psu_path, psu_voltage) - self.psu_voltage = psu_voltage - - psu_current = filemap[PSU_CURRENT].format(self.index) - psu_current = os.path.join(self.psu_path, psu_current) - self.psu_current = psu_current - - psu_power = filemap[PSU_POWER].format(self.index) - psu_power = os.path.join(self.psu_path, psu_power) - self.psu_power = psu_power - - psu_presence = "thermal/psu{}_status".format(self.index) - psu_presence = os.path.join(self.psu_path, psu_presence) - self.psu_presence = psu_presence - - self.psu_temp = os.path.join(self.psu_path, 'thermal/psu{}_temp'.format(self.index)) - self.psu_temp_threshold = os.path.join(self.psu_path, 'thermal/psu{}_temp_max'.format(self.index)) - - # unplugable PSU has no FAN - if self.psu_data['hot_swappable']: - fan = Fan(psu_index, None, 1, True, self) - self._fan_list.append(fan) - - if self.psu_data['led_num'] == 1: - self.led = ComponentFaultyIndicator(Psu.get_shared_led()) - else: # 2010/2100 - self.led = PsuLed(self.index) - - # initialize thermal for PSU - from .thermal import initialize_psu_thermals - initialize_psu_thermals(platform, self._thermal_list, self.index, self.get_power_available_status) - + self._name = "PSU {}".format(self.index) + self.psu_oper_status = os.path.join(PSU_PATH, "thermal/psu{}_pwr_status".format(self.index)) + self._led = None def get_name(self): return self._name - - def _read_vpd_file(self, filename): - """ - Read a vpd file parsed from eeprom with keys and values. - Returns a dictionary. - """ - result = {} - try: - if not os.path.exists(filename): - return result - with open(filename, 'r') as fileobj: - for line in fileobj.readlines(): - key, val = line.split(":") - result[key.strip()] = val.strip() - except Exception as e: - logger.log_error("Fail to read VPD file {} due to {}".format(filename, repr(e))) - return result - - - def _read_generic_file(self, filename, len): - """ - Read a generic file, returns the contents of the file - """ - result = 0 - try: - if not os.path.exists(filename): - return result - with open(filename, 'r') as fileobj: - result = int(fileobj.read().strip()) - except Exception as e: - logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) - return result - - def get_model(self): - """ - Retrieves the model number (or part number) of the device - - Returns: - string: Model/part number of device - """ - return self.model - + return 'N/A' def get_serial(self): - """ - Retrieves the serial number of the device - - Returns: - string: Serial number of device - """ - return self.serial - + return 'N/A' def get_revision(self): - """ - Retrieves the hardware revision of the device - - Returns: - string: Revision value of device - """ - return self.rev - + return 'N/A' def get_powergood_status(self): """ @@ -248,10 +66,7 @@ def get_powergood_status(self): Returns: bool: True if PSU is operating properly, False if not """ - status = self._read_generic_file(os.path.join(self.psu_path, self.psu_oper_status), 0) - - return status == 1 - + return utils.read_int_from_file(self.psu_oper_status) == 1 def get_presence(self): """ @@ -260,12 +75,7 @@ def get_presence(self): Returns: bool: True if PSU is present, False if not """ - if self.always_present: - return self.always_present - else: - status = self._read_generic_file(self.psu_presence, 0) - return status == 1 - + return True def get_voltage(self): """ @@ -275,12 +85,7 @@ def get_voltage(self): A float number, the output voltage in volts, e.g. 12.1 """ - if self.psu_voltage is not None and self.get_powergood_status(): - voltage = self._read_generic_file(self.psu_voltage, 0) - return float(voltage) / 1000 - else: - return None - + return None def get_current(self): """ @@ -289,11 +94,7 @@ def get_current(self): Returns: A float number, the electric current in amperes, e.g 15.4 """ - if self.psu_current is not None and self.get_powergood_status(): - amperes = self._read_generic_file(self.psu_current, 0) - return float(amperes) / 1000 - else: - return None + return None def get_power(self): """ @@ -302,11 +103,13 @@ def get_power(self): Returns: A float number, the power in watts, e.g. 302.6 """ - if self.psu_power is not None and self.get_powergood_status(): - power = self._read_generic_file(self.psu_power, 0) - return float(power) / 1000000 - else: - return None + return None + + @property + def led(self): + if not self._led: + self._led = PsuLed(self.index) + return self._led def set_status_led(self, color): """ @@ -324,7 +127,6 @@ def set_status_led(self, color): """ return self.led.set_status(color) - def get_status_led(self): """ Gets the state of the PSU status LED @@ -332,11 +134,7 @@ def get_status_led(self): Returns: A string, one of the predefined STATUS_LED_COLOR_* strings above """ - if self.psu_data['led_num'] == 1: - return Psu.get_shared_led().get_status() - else: - return self.led.get_status() - + return self.led.get_status() def get_power_available_status(self): """ @@ -368,13 +166,7 @@ def is_replaceable(self): Returns: bool: True if it is replaceable. """ - return self.psu_data['hot_swappable'] - - @classmethod - def get_shared_led(cls): - if not cls.shared_led: - cls.shared_led = SharedLed(PsuLed(None)) - return cls.shared_led + return False def get_temperature(self): """ @@ -384,13 +176,6 @@ def get_temperature(self): A float number of current temperature in Celsius up to nearest thousandth of one degree Celsius, e.g. 30.125 """ - if self.psu_temp is not None and self.get_powergood_status(): - try: - temp = self._read_generic_file(self.psu_temp, 0) - return float(temp) / 1000 - except Exception as e: - logger.log_info("Fail to get temperature for PSU {} due to - {}".format(self._name, repr(e))) - return None def get_temperature_high_threshold(self): @@ -401,33 +186,172 @@ def get_temperature_high_threshold(self): A float number, the high threshold temperature of PSU in Celsius up to nearest thousandth of one degree Celsius, e.g. 30.125 """ - if self.psu_temp_threshold is not None and self.get_powergood_status(): - try: - temp_threshold = self._read_generic_file(self.psu_temp_threshold, 0) - return float(temp_threshold) / 1000 - except Exception as e: - logger.log_info("Fail to get temperature threshold for PSU {} due to - {}".format(self._name, repr(e))) - return None - def get_voltage_high_threshold(self): + +class Psu(FixedPsu): + """Platform-specific Psu class""" + PSU_CURRENT = "power/psu{}_curr" + PSU_VOLTAGE = "power/psu{}_volt" + PSU_VOLTAGE1 = "power/psu{}_volt_out2" + PSU_POWER = "power/psu{}_power" + PSU_VPD = "eeprom/psu{}_vpd" + shared_led = None + + def __init__(self, psu_index): + super(Psu, self).__init__(psu_index) + + psu_voltage = os.path.join(PSU_PATH, self.PSU_VOLTAGE1.format(self.index)) + # Workaround for psu voltage sysfs file as the file name differs among platforms + if os.path.exists(psu_voltage): + self.psu_voltage = os.path.join(PSU_PATH, self.PSU_VOLTAGE1.format(self.index)) + else: + self.psu_voltage = os.path.join(PSU_PATH, self.PSU_VOLTAGE.format(self.index)) + + self.psu_current = os.path.join(PSU_PATH, self.PSU_CURRENT.format(self.index)) + self.psu_power = os.path.join(PSU_PATH, self.PSU_POWER.format(self.index)) + self.psu_presence = os.path.join(PSU_PATH, "thermal/psu{}_status".format(self.index)) + + self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index)) + self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index)) + + from .fan import PsuFan + self._fan_list.append(PsuFan(psu_index, 1, self)) + + self.vpd_parser = VpdParser(os.path.join(PSU_PATH, self.PSU_VPD.format(self.index))) + + # initialize thermal for PSU + from .thermal import initialize_psu_thermal + self._thermal_list = initialize_psu_thermal(psu_index, self.get_power_available_status) + + def get_model(self): """ - Retrieves the high threshold PSU voltage output + Retrieves the model number (or part number) of the device Returns: - A float number, the high threshold output voltage in volts, - e.g. 12.1 + string: Model/part number of device + """ + return self.vpd_parser.get_model() + + def get_serial(self): + """ + Retrieves the serial number of the device + + Returns: + string: Serial number of device + """ + return self.vpd_parser.get_serial() + + def get_revision(self): + """ + Retrieves the hardware revision of the device + + Returns: + string: Revision value of device """ - # hw-management doesn't expose those sysfs for now - raise NotImplementedError + return self.vpd_parser.get_revision() - def get_voltage_low_threshold(self): + def get_presence(self): """ - Retrieves the low threshold PSU voltage output + Retrieves the presence status of power supply unit (PSU) defined Returns: - A float number, the low threshold output voltage in volts, + bool: True if PSU is present, False if not + """ + return utils.read_int_from_file(self.psu_presence) == 1 + + def get_voltage(self): + """ + Retrieves current PSU voltage output + + Returns: + A float number, the output voltage in volts, e.g. 12.1 """ - # hw-management doesn't expose those sysfs for now - raise NotImplementedError + if self.get_powergood_status(): + # TODO: should we put log_func=None here? If not do this, when a PSU is back to power, some PSU related + # sysfs may not ready, read_int_from_file would encounter exception and log an error. + voltage = utils.read_int_from_file(self.psu_voltage, log_func=logger.log_info) + return float(voltage) / 1000 + return None + + def get_current(self): + """ + Retrieves present electric current supplied by PSU + + Returns: + A float number, the electric current in amperes, e.g 15.4 + """ + if self.get_powergood_status(): + amperes = utils.read_int_from_file(self.psu_current, log_func=logger.log_info) + return float(amperes) / 1000 + return None + + def get_power(self): + """ + Retrieves current energy supplied by PSU + + Returns: + A float number, the power in watts, e.g. 302.6 + """ + if self.get_powergood_status(): + power = utils.read_int_from_file(self.psu_power, log_func=logger.log_info) + return float(power) / 1000000 + return None + + @classmethod + def get_shared_led(cls): + if not cls.shared_led: + cls.shared_led = SharedLed(PsuLed(None)) + return cls.shared_led + + @property + def led(self): + if not self._led: + self._led = ComponentFaultyIndicator(Psu.get_shared_led()) + return self._led + + def get_status_led(self): + """ + Gets the state of the PSU status LED + + Returns: + A string, one of the predefined STATUS_LED_COLOR_* strings above + """ + return Psu.get_shared_led().get_status() + + def is_replaceable(self): + """ + Indicate whether this device is replaceable. + Returns: + bool: True if it is replaceable. + """ + return True + + def get_temperature(self): + """ + Retrieves current temperature reading from PSU + + Returns: + A float number of current temperature in Celsius up to nearest thousandth + of one degree Celsius, e.g. 30.125 + """ + if self.get_powergood_status(): + temp = utils.read_int_from_file(self.psu_temp, log_func=logger.log_info) + return float(temp) / 1000 + + return None + + def get_temperature_high_threshold(self): + """ + Retrieves the high threshold temperature of PSU + + Returns: + A float number, the high threshold temperature of PSU in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + if self.get_powergood_status(): + temp_threshold = utils.read_int_from_file(self.psu_temp_threshold, log_func=logger.log_info) + return float(temp_threshold) / 1000 + + return None diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index 874ebaa8dee0..0650cf20741c 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -36,6 +36,7 @@ from sonic_platform_base.sonic_sfp.qsfp_dd import qsfp_dd_Dom from sonic_py_common.logger import Logger from . import utils + from .device_data import DeviceDataManager except ImportError as e: raise ImportError (str(e) + "- required module not found") @@ -354,9 +355,25 @@ def __exit__(self, exc_type, exc_val, exc_tb): deinitialize_sdk_handle(self.sdk_handle) +class SfpCapability: + def __init__(self): + self.dom_supported = False + self.dom_temp_supported = False + self.dom_volt_supported = False + self.dom_rx_power_supported = False + self.dom_tx_bias_power_supported = False + self.dom_tx_power_supported = False + self.dom_tx_disable_supported = False + self.dom_thresholds_supported = False + self.dom_rx_tx_power_bias_supported = False + self.calibration = 0 + self.qsfp_page3_available = False + self.second_application_list = False + + class SFP(SfpBase): """Platform-specific SFP class""" - + shared_sdk_handle = None SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE = 'Long range for non-Mellanox cable or module' SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST = 'Enforce part number list' SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED = 'PMD type not enabled' @@ -369,119 +386,69 @@ class SFP(SfpBase): SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED = 0x00080000 SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000 - def __init__(self, sfp_index, sfp_type, sdk_handle_getter, platform): - SfpBase.__init__(self) - self.index = sfp_index + 1 - self.sfp_eeprom_path = "qsfp{}".format(self.index) - self.sfp_status_path = "qsfp{}_status".format(self.index) - self._detect_sfp_type(sfp_type) - self.dom_tx_disable_supported = False - self._dom_capability_detect() - self.sdk_handle_getter = sdk_handle_getter - self.sdk_index = sfp_index - - # initialize SFP thermal list - from .thermal import initialize_sfp_thermals - initialize_sfp_thermals(platform, self._thermal_list, self.index) - + def __init__(self, sfp_index, slot_id=0, linecard_port_count=0, lc_name=None): + super(SFP, self).__init__() + + if slot_id == 0: # For non-modular chassis + self.index = sfp_index + 1 + self.sdk_index = sfp_index + + from .thermal import initialize_sfp_thermal + self._thermal_list = initialize_sfp_thermal(sfp_index) + else: # For modular chassis + # (slot_id % MAX_LC_CONUNT - 1) * MAX_PORT_COUNT + (sfp_index + 1) * (MAX_PORT_COUNT / LC_PORT_COUNT) + max_linecard_count = DeviceDataManager.get_linecard_count() + max_linecard_port_count = DeviceDataManager.get_linecard_max_port_count() + self.index = (slot_id % max_linecard_count - 1) * max_linecard_port_count + sfp_index * (max_linecard_port_count / linecard_port_count) + 1 + self.sdk_index = sfp_index + + from .thermal import initialize_linecard_sfp_thermal + self._thermal_list = initialize_linecard_sfp_thermal(lc_name, slot_id, sfp_index) + + self.slot_id = slot_id + self._sfp_type = None + self._sfp_capability = None + @property def sdk_handle(self): - return self.sdk_handle_getter() - - def reinit(self): - - """ - Re-initialize this SFP object when a new SFP inserted - :return: - """ - self._detect_sfp_type(self.sfp_type) - self._dom_capability_detect() - - def get_presence(self): - """ - Retrieves the presence of the device - - Returns: - bool: True if device is present, False if not - """ - presence = False - ethtool_cmd = "ethtool -m sfp{} hex on offset 0 length 1 2>/dev/null".format(self.index) - try: - proc = subprocess.Popen(ethtool_cmd, - stdout=subprocess.PIPE, - shell=True, - stderr=subprocess.STDOUT, - universal_newlines=True) - stdout = proc.communicate()[0] - proc.wait() - result = stdout.rstrip('\n') - if result != '': - presence = True + if not SFP.shared_sdk_handle: + SFP.shared_sdk_handle = initialize_sdk_handle() + if not SFP.shared_sdk_handle: + logger.log_error('Failed to open SDK handle') + return SFP.shared_sdk_handle - except OSError as e: - raise OSError("Cannot detect sfp") - - return presence - - - # Read out any bytes from any offset - def _read_eeprom_specific_bytes(self, offset, num_bytes): - eeprom_raw = [] - ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {} 2>/dev/null".format(self.index, offset, num_bytes) - try: - output = subprocess.check_output(ethtool_cmd, - shell=True, - universal_newlines=True) - output_lines = output.splitlines() - first_line_raw = output_lines[0] - if "Offset" in first_line_raw: - for line in output_lines[2:]: - line_split = line.split() - eeprom_raw = eeprom_raw + line_split[1:] - except subprocess.CalledProcessError as e: - return None - - return eeprom_raw - - - def _detect_sfp_type(self, sfp_type): - eeprom_raw = [] - eeprom_raw = self._read_eeprom_specific_bytes(XCVR_TYPE_OFFSET, XCVR_TYPE_WIDTH) - if eeprom_raw: - if eeprom_raw[0] in SFP_TYPE_CODE_LIST: - self.sfp_type = SFP_TYPE - elif eeprom_raw[0] in QSFP_TYPE_CODE_LIST: - self.sfp_type = QSFP_TYPE - elif eeprom_raw[0] in QSFP_DD_TYPE_CODE_LIST: - self.sfp_type = QSFP_DD_TYPE - else: - # we don't regonize this identifier value, treat the xSFP module as the default type - self.sfp_type = sfp_type - logger.log_info("Identifier value of {} module {} is {} which isn't regonized and will be treated as default type ({})".format( - sfp_type, self.index, eeprom_raw[0], sfp_type - )) + @property + def sfp_type(self): + if not self._sfp_type: + eeprom_raw = [] + eeprom_raw = self._read_eeprom_specific_bytes(XCVR_TYPE_OFFSET, XCVR_TYPE_WIDTH) + if eeprom_raw: + if eeprom_raw[0] in SFP_TYPE_CODE_LIST: + self._sfp_type = SFP_TYPE + elif eeprom_raw[0] in QSFP_TYPE_CODE_LIST: + self._sfp_type = QSFP_TYPE + elif eeprom_raw[0] in QSFP_DD_TYPE_CODE_LIST: + self._sfp_type = QSFP_DD_TYPE + + # we don't regonize this identifier value, treat the xSFP module as the default type + if not self._sfp_type: + raise RuntimeError("Failed to detect SFP type for SFP {}".format(self.index)) else: - # eeprom_raw being None indicates the module is not present. - # in this case we treat it as the default type according to the SKU - self.sfp_type = sfp_type - + return self._sfp_type def _dom_capability_detect(self): + if self._sfp_capability: + return + + self._sfp_capability = SfpCapability() if not self.get_presence(): - self.dom_supported = False - self.dom_temp_supported = False - self.dom_volt_supported = False - self.dom_rx_power_supported = False - self.dom_tx_bias_power_supported = False - self.dom_tx_power_supported = False - self.calibration = 0 return if self.sfp_type == QSFP_TYPE: - self.calibration = 1 + self._sfp_capability.calibration = 1 sfpi_obj = sff8436InterfaceId() if sfpi_obj is None: - self.dom_supported = False + self._sfp_capability.dom_supported = False offset = 128 # QSFP capability byte parse, through this byte can know whether it support tx_power or not. @@ -494,72 +461,72 @@ def _dom_capability_detect(self): qsfp_version_compliance = int(qsfp_version_compliance_raw[0], 16) dom_capability = sfpi_obj.parse_dom_capability(qsfp_dom_capability_raw, 0) if qsfp_version_compliance >= 0x08: - self.dom_temp_supported = dom_capability['data']['Temp_support']['value'] == 'On' - self.dom_volt_supported = dom_capability['data']['Voltage_support']['value'] == 'On' - self.dom_rx_power_supported = dom_capability['data']['Rx_power_support']['value'] == 'On' - self.dom_tx_power_supported = dom_capability['data']['Tx_power_support']['value'] == 'On' + self._sfp_capability.dom_temp_supported = dom_capability['data']['Temp_support']['value'] == 'On' + self._sfp_capability.dom_volt_supported = dom_capability['data']['Voltage_support']['value'] == 'On' + self._sfp_capability.dom_rx_power_supported = dom_capability['data']['Rx_power_support']['value'] == 'On' + self._sfp_capability.dom_tx_power_supported = dom_capability['data']['Tx_power_support']['value'] == 'On' else: - self.dom_temp_supported = True - self.dom_volt_supported = True - self.dom_rx_power_supported = dom_capability['data']['Rx_power_support']['value'] == 'On' - self.dom_tx_power_supported = True - self.dom_supported = True - self.calibration = 1 + self._sfp_capability.dom_temp_supported = True + self._sfp_capability.dom_volt_supported = True + self._sfp_capability.dom_rx_power_supported = dom_capability['data']['Rx_power_support']['value'] == 'On' + self._sfp_capability.dom_tx_power_supported = True + self._sfp_capability.dom_supported = True + self._sfp_capability.calibration = 1 sfpd_obj = sff8436Dom() if sfpd_obj is None: return None qsfp_option_value_raw = self._read_eeprom_specific_bytes(QSFP_OPTION_VALUE_OFFSET, QSFP_OPTION_VALUE_WIDTH) if qsfp_option_value_raw is not None: optional_capability = sfpd_obj.parse_option_params(qsfp_option_value_raw, 0) - self.dom_tx_disable_supported = optional_capability['data']['TxDisable']['value'] == 'On' + self._sfp_capability.dom_tx_disable_supported = optional_capability['data']['TxDisable']['value'] == 'On' dom_status_indicator = sfpd_obj.parse_dom_status_indicator(qsfp_version_compliance_raw, 1) - self.qsfp_page3_available = dom_status_indicator['data']['FlatMem']['value'] == 'Off' + self._sfp_capability.qsfp_page3_available = dom_status_indicator['data']['FlatMem']['value'] == 'Off' else: - self.dom_supported = False - self.dom_temp_supported = False - self.dom_volt_supported = False - self.dom_rx_power_supported = False - self.dom_tx_power_supported = False - self.calibration = 0 - self.qsfp_page3_available = False + self._sfp_capability.dom_supported = False + self._sfp_capability.dom_temp_supported = False + self._sfp_capability.dom_volt_supported = False + self._sfp_capability.dom_rx_power_supported = False + self._sfp_capability.dom_tx_power_supported = False + self._sfp_capability.calibration = 0 + self._sfp_capability.qsfp_page3_available = False elif self.sfp_type == QSFP_DD_TYPE: sfpi_obj = qsfp_dd_InterfaceId() if sfpi_obj is None: - self.dom_supported = False + self._sfp_capability.dom_supported = False offset = 0 # two types of QSFP-DD cable types supported: Copper and Optical. qsfp_dom_capability_raw = self._read_eeprom_specific_bytes((offset + XCVR_DOM_CAPABILITY_OFFSET_QSFP_DD), XCVR_DOM_CAPABILITY_WIDTH_QSFP_DD) if qsfp_dom_capability_raw is not None: - self.dom_temp_supported = True - self.dom_volt_supported = True + self._sfp_capability.dom_temp_supported = True + self._sfp_capability.dom_volt_supported = True dom_capability = sfpi_obj.parse_dom_capability(qsfp_dom_capability_raw, 0) if dom_capability['data']['Flat_MEM']['value'] == 'Off': - self.dom_supported = True - self.second_application_list = True - self.dom_rx_power_supported = True - self.dom_tx_power_supported = True - self.dom_tx_bias_power_supported = True - self.dom_thresholds_supported = True - self.dom_rx_tx_power_bias_supported = True + self._sfp_capability.dom_supported = True + self._sfp_capability.second_application_list = True + self._sfp_capability.dom_rx_power_supported = True + self._sfp_capability.dom_tx_power_supported = True + self._sfp_capability.dom_tx_bias_power_supported = True + self._sfp_capability.dom_thresholds_supported = True + self._sfp_capability.dom_rx_tx_power_bias_supported = True else: - self.dom_supported = False - self.second_application_list = False - self.dom_rx_power_supported = False - self.dom_tx_power_supported = False - self.dom_tx_bias_power_supported = False - self.dom_thresholds_supported = False - self.dom_rx_tx_power_bias_supported = False + self._sfp_capability.dom_supported = False + self._sfp_capability.second_application_list = False + self._sfp_capability.dom_rx_power_supported = False + self._sfp_capability.dom_tx_power_supported = False + self._sfp_capability.dom_tx_bias_power_supported = False + self._sfp_capability.dom_thresholds_supported = False + self._sfp_capability.dom_rx_tx_power_bias_supported = False else: - self.dom_supported = False - self.dom_temp_supported = False - self.dom_volt_supported = False - self.dom_rx_power_supported = False - self.dom_tx_power_supported = False - self.dom_tx_bias_power_supported = False - self.dom_thresholds_supported = False - self.dom_rx_tx_power_bias_supported = False + self._sfp_capability.dom_supported = False + self._sfp_capability.dom_temp_supported = False + self._sfp_capability.dom_volt_supported = False + self._sfp_capability.dom_rx_power_supported = False + self._sfp_capability.dom_tx_power_supported = False + self._sfp_capability.dom_tx_bias_power_supported = False + self._sfp_capability.dom_thresholds_supported = False + self._sfp_capability.dom_rx_tx_power_bias_supported = False elif self.sfp_type == SFP_TYPE: sfpi_obj = sff8472InterfaceId() @@ -568,32 +535,145 @@ def _dom_capability_detect(self): sfp_dom_capability_raw = self._read_eeprom_specific_bytes(XCVR_DOM_CAPABILITY_OFFSET, XCVR_DOM_CAPABILITY_WIDTH) if sfp_dom_capability_raw is not None: sfp_dom_capability = int(sfp_dom_capability_raw[0], 16) - self.dom_supported = (sfp_dom_capability & 0x40 != 0) - if self.dom_supported: - self.dom_temp_supported = True - self.dom_volt_supported = True - self.dom_rx_power_supported = True - self.dom_tx_power_supported = True + self._sfp_capability.dom_supported = (sfp_dom_capability & 0x40 != 0) + if self._sfp_capability.dom_supported: + self._sfp_capability.dom_temp_supported = True + self._sfp_capability.dom_volt_supported = True + self._sfp_capability.dom_rx_power_supported = True + self._sfp_capability.dom_tx_power_supported = True if sfp_dom_capability & 0x20 != 0: - self.calibration = 1 + self._sfp_capability.calibration = 1 elif sfp_dom_capability & 0x10 != 0: - self.calibration = 2 + self._sfp_capability.calibration = 2 else: - self.calibration = 0 + self._sfp_capability.calibration = 0 else: - self.dom_temp_supported = False - self.dom_volt_supported = False - self.dom_rx_power_supported = False - self.dom_tx_power_supported = False - self.calibration = 0 - self.dom_tx_disable_supported = (int(sfp_dom_capability_raw[1], 16) & 0x40 != 0) + self._sfp_capability.dom_temp_supported = False + self._sfp_capability.dom_volt_supported = False + self._sfp_capability.dom_rx_power_supported = False + self._sfp_capability.dom_tx_power_supported = False + self._sfp_capability.calibration = 0 + self._sfp_capability.dom_tx_disable_supported = (int(sfp_dom_capability_raw[1], 16) & 0x40 != 0) else: - self.dom_supported = False - self.dom_temp_supported = False - self.dom_volt_supported = False - self.dom_rx_power_supported = False - self.dom_tx_power_supported = False + self._sfp_capability.dom_supported = False + self._sfp_capability.dom_temp_supported = False + self._sfp_capability.dom_volt_supported = False + self._sfp_capability.dom_rx_power_supported = False + self._sfp_capability.dom_tx_power_supported = False + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_supported(self): + return self._sfp_capability.dom_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_temp_supported(self): + return self._sfp_capability.dom_temp_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_volt_supported(self): + return self._sfp_capability.dom_volt_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_rx_power_supported(self): + return self._sfp_capability.dom_rx_power_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_tx_power_supported(self): + return self._sfp_capability.dom_tx_power_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def calibration(self): + return self._sfp_capability.calibration + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_tx_bias_power_supported(self): + return self._sfp_capability.dom_tx_bias_power_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_tx_disable_supported(self): + return self._sfp_capability.dom_tx_disable_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def qsfp_page3_available(self): + return self._sfp_capability.qsfp_page3_available + + @property + @utils.pre_initialize(_dom_capability_detect) + def second_application_list(self): + return self._sfp_capability.second_application_list + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_thresholds_supported(self): + return self._sfp_capability.dom_thresholds_supported + + @property + @utils.pre_initialize(_dom_capability_detect) + def dom_rx_tx_power_bias_supported(self): + return self._sfp_capability.dom_rx_tx_power_bias_supported + + def reinit(self): + + """ + Re-initialize this SFP object when a new SFP inserted + :return: + """ + self._sfp_type = None + self._sfp_capability = None + + def get_presence(self): + """ + Retrieves the presence of the device + + Returns: + bool: True if device is present, False if not + """ + presence = False + ethtool_cmd = "ethtool -m sfp{} hex on offset 0 length 1 2>/dev/null".format(self.index) + try: + proc = subprocess.Popen(ethtool_cmd, + stdout=subprocess.PIPE, + shell=True, + stderr=subprocess.STDOUT, + universal_newlines=True) + stdout = proc.communicate()[0] + proc.wait() + result = stdout.rstrip('\n') + if result != '': + presence = True + + except OSError as e: + raise OSError("Cannot detect sfp") + + return presence + + # Read out any bytes from any offset + def _read_eeprom_specific_bytes(self, offset, num_bytes): + eeprom_raw = [] + ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {}".format(self.index, offset, num_bytes) + try: + output = subprocess.check_output(ethtool_cmd, + shell=True, + universal_newlines=True) + output_lines = output.splitlines() + first_line_raw = output_lines[0] + if "Offset" in first_line_raw: + for line in output_lines[2:]: + line_split = line.split() + eeprom_raw = eeprom_raw + line_split[1:] + except subprocess.CalledProcessError as e: + return None + + return eeprom_raw def _convert_string_to_num(self, value_str): if "-inf" in value_str: @@ -615,7 +695,6 @@ def _convert_string_to_num(self, value_str): else: return 'N/A' - def get_transceiver_info(self): """ Retrieves transceiver info of this SFP @@ -706,9 +785,7 @@ def get_transceiver_info(self): elif self.sfp_type == QSFP_TYPE: offset = 128 vendor_rev_width = XCVR_HW_REV_WIDTH_QSFP - cable_length_width = XCVR_CABLE_LENGTH_WIDTH_QSFP interface_info_bulk_width = XCVR_INTFACE_BULK_WIDTH_QSFP - sfp_type = 'QSFP' sfpi_obj = sff8436InterfaceId() if sfpi_obj is None: @@ -833,9 +910,7 @@ def get_transceiver_info(self): else: offset = 0 vendor_rev_width = XCVR_HW_REV_WIDTH_SFP - cable_length_width = XCVR_CABLE_LENGTH_WIDTH_SFP interface_info_bulk_width = XCVR_INTFACE_BULK_WIDTH_SFP - sfp_type = 'SFP' sfpi_obj = sff8472InterfaceId() if sfpi_obj is None: @@ -1527,13 +1602,13 @@ def get_tx_disable_channel(self): @classmethod - def mgmt_phy_mod_pwr_attr_get(cls, power_attr_type, sdk_handle, sdk_index): + def mgmt_phy_mod_pwr_attr_get(cls, power_attr_type, sdk_handle, sdk_index, slot_id): sx_mgmt_phy_mod_pwr_attr_p = new_sx_mgmt_phy_mod_pwr_attr_t_p() sx_mgmt_phy_mod_pwr_attr = sx_mgmt_phy_mod_pwr_attr_t() sx_mgmt_phy_mod_pwr_attr.power_attr_type = power_attr_type sx_mgmt_phy_mod_pwr_attr_t_p_assign(sx_mgmt_phy_mod_pwr_attr_p, sx_mgmt_phy_mod_pwr_attr) module_id_info = sx_mgmt_module_id_info_t() - module_id_info.slot_id = 0 + module_id_info.slot_id = slot_id module_id_info.module_id = sdk_index try: rc = sx_mgmt_phy_module_pwr_attr_get(sdk_handle, module_id_info, sx_mgmt_phy_mod_pwr_attr_p) @@ -1558,30 +1633,31 @@ def get_lpmode(self): # call class level method to avoid initialize the whole sonic platform API get_lpmode_code = 'from sonic_platform import sfp;\n' \ 'with sfp.SdkHandleContext() as sdk_handle:' \ - 'print(sfp.SFP._get_lpmode(sdk_handle, {}))'.format(self.sdk_index) + 'print(sfp.SFP._get_lpmode(sdk_handle, {}, {}))'.format(self.sdk_index, self.slot_id) lpm_cmd = "docker exec pmon python3 -c \"{}\"".format(get_lpmode_code) try: output = subprocess.check_output(lpm_cmd, shell=True, universal_newlines=True) return 'True' in output except subprocess.CalledProcessError as e: - print("Error! Unable to get LPM for {}, rc = {}, err msg: {}".format(self.index, e.returncode, e.output)) + print("Error! Unable to get LPM for {}, rc = {}, err msg: {}".format(self.sdk_index, e.returncode, e.output)) return False else: - return self._get_lpmode(self.sdk_handle, self.sdk_index) + return self._get_lpmode(self.sdk_handle, self.sdk_index, self.slot_id) @classmethod - def _get_lpmode(cls, sdk_handle, sdk_index): + def _get_lpmode(cls, sdk_handle, sdk_index, slot_id): """Class level method to get low power mode. Args: sdk_handle: SDK handle sdk_index (integer): SDK port index + slot_id (integer): Slot ID Returns: [boolean]: True if low power mode is on else off """ - _, oper_pwr_mode = cls.mgmt_phy_mod_pwr_attr_get(SX_MGMT_PHY_MOD_PWR_ATTR_PWR_MODE_E, sdk_handle, sdk_index) + _, oper_pwr_mode = cls.mgmt_phy_mod_pwr_attr_get(SX_MGMT_PHY_MOD_PWR_ATTR_PWR_MODE_E, sdk_handle, sdk_index, slot_id) return oper_pwr_mode == SX_MGMT_PHY_MOD_PWR_MODE_LOW_E @@ -1759,7 +1835,7 @@ def get_tx_bias(self): if sfpd_obj is None: return None - if dom_tx_bias_power_supported: + if self.dom_tx_bias_power_supported: dom_tx_bias_raw = self._read_eeprom_specific_bytes((offset + QSFP_DD_TX_BIAS_OFFSET), QSFP_DD_TX_BIAS_WIDTH) if dom_tx_bias_raw is not None: dom_tx_bias_data = sfpd_obj.parse_dom_tx_bias(dom_tx_bias_raw, 0) @@ -1961,28 +2037,28 @@ def reset(self): # call class level method to avoid initialize the whole sonic platform API reset_code = 'from sonic_platform import sfp;\n' \ 'with sfp.SdkHandleContext() as sdk_handle:' \ - 'print(sfp.SFP._reset(sdk_handle, {}))' \ - .format(self.sdk_index) + 'print(sfp.SFP._reset(sdk_handle, {}, {}))' \ + .format(self.sdk_index, self.slot_id) reset_cmd = "docker exec pmon python3 -c \"{}\"".format(reset_code) try: output = subprocess.check_output(reset_cmd, shell=True, universal_newlines=True) return 'True' in output except subprocess.CalledProcessError as e: - print("Error! Unable to set LPM for {}, rc = {}, err msg: {}".format(self.index, e.returncode, e.output)) + print("Error! Unable to set LPM for {}, rc = {}, err msg: {}".format(self.sdk_index, e.returncode, e.output)) return False else: - return self._reset(self.sdk_handle, self.sdk_index) + return self._reset(self.sdk_handle, self.sdk_index, self.slot_id) @classmethod - def _reset(cls, sdk_handle, sdk_index): + def _reset(cls, sdk_handle, sdk_index, slot_id): module_id_info = sx_mgmt_module_id_info_t() - module_id_info.slot_id = 0 + module_id_info.slot_id = slot_id module_id_info.module_id = sdk_index rc = sx_mgmt_phy_module_reset(sdk_handle, module_id_info) if rc != SX_STATUS_SUCCESS: - logger.log_error("Error occurred when resetting SFP module {}, error code {}".format(sdk_index, rc)) + logger.log_error("Error occurred when resetting SFP module {}, slot {}, error code {}".format(sdk_index, slot_id, rc)) return rc == SX_STATUS_SUCCESS @@ -2046,10 +2122,7 @@ def is_port_admin_status_up(cls, sdk_handle, log_port): delete_sx_port_admin_state_t_p(admin_state_p) delete_sx_port_module_state_t_p(module_state_p) - if admin_state == SX_PORT_ADMIN_STATUS_UP: - return True - else: - return False + return admin_state == SX_PORT_ADMIN_STATUS_UP @classmethod @@ -2062,7 +2135,7 @@ def set_port_admin_status_by_log_port(cls, sdk_handle, log_port, admin_status): @classmethod - def get_logical_ports(cls, sdk_handle, sdk_index): + def get_logical_ports(cls, sdk_handle, sdk_index, slot_id): # Get all the ports related to the sfp, if port admin status is up, put it to list port_attributes_list = new_sx_port_attributes_t_arr(SX_PORT_ATTR_ARR_SIZE) port_cnt_p = new_uint32_t_p() @@ -2078,6 +2151,7 @@ def get_logical_ports(cls, sdk_handle, sdk_index): if not cls.is_nve(int(port_attributes.log_port)) \ and not cls.is_cpu(int(port_attributes.log_port)) \ and port_attributes.port_mapping.module_port == sdk_index \ + and port_attributes.port_mapping.slot == slot_id \ and cls.is_port_admin_status_up(sdk_handle, port_attributes.log_port): log_port_list.append(port_attributes.log_port) @@ -2087,7 +2161,7 @@ def get_logical_ports(cls, sdk_handle, sdk_index): @classmethod - def mgmt_phy_mod_pwr_attr_set(cls, sdk_handle, sdk_index, power_attr_type, admin_pwr_mode): + def mgmt_phy_mod_pwr_attr_set(cls, sdk_handle, sdk_index, slot_id, power_attr_type, admin_pwr_mode): result = False sx_mgmt_phy_mod_pwr_attr = sx_mgmt_phy_mod_pwr_attr_t() sx_mgmt_phy_mod_pwr_mode_attr = sx_mgmt_phy_mod_pwr_mode_attr_t() @@ -2097,12 +2171,12 @@ def mgmt_phy_mod_pwr_attr_set(cls, sdk_handle, sdk_index, power_attr_type, admin sx_mgmt_phy_mod_pwr_attr_p = new_sx_mgmt_phy_mod_pwr_attr_t_p() sx_mgmt_phy_mod_pwr_attr_t_p_assign(sx_mgmt_phy_mod_pwr_attr_p, sx_mgmt_phy_mod_pwr_attr) module_id_info = sx_mgmt_module_id_info_t() - module_id_info.slot_id = 0 + module_id_info.slot_id = slot_id module_id_info.module_id = sdk_index try: rc = sx_mgmt_phy_module_pwr_attr_set(sdk_handle, SX_ACCESS_CMD_SET, module_id_info, sx_mgmt_phy_mod_pwr_attr_p) if SX_STATUS_SUCCESS != rc: - logger.log_error("Error occurred when setting power mode for SFP module {}, error code {}".format(sdk_index, rc)) + logger.log_error("Error occurred when setting power mode for SFP module {}, slot {}, error code {}".format(sdk_index, slot_id, rc)) result = False else: result = True @@ -2113,10 +2187,10 @@ def mgmt_phy_mod_pwr_attr_set(cls, sdk_handle, sdk_index, power_attr_type, admin @classmethod - def _set_lpmode_raw(cls, sdk_handle, sdk_index, ports, attr_type, power_mode): + def _set_lpmode_raw(cls, sdk_handle, sdk_index, slot_id, ports, attr_type, power_mode): result = False # Check if the module already works in the same mode - admin_pwr_mode, oper_pwr_mode = cls.mgmt_phy_mod_pwr_attr_get(attr_type, sdk_handle, sdk_index) + admin_pwr_mode, oper_pwr_mode = cls.mgmt_phy_mod_pwr_attr_get(attr_type, sdk_handle, sdk_index, slot_id) if (power_mode == SX_MGMT_PHY_MOD_PWR_MODE_LOW_E and oper_pwr_mode == SX_MGMT_PHY_MOD_PWR_MODE_LOW_E) \ or (power_mode == SX_MGMT_PHY_MOD_PWR_MODE_AUTO_E and admin_pwr_mode == SX_MGMT_PHY_MOD_PWR_MODE_AUTO_E): return True @@ -2125,7 +2199,7 @@ def _set_lpmode_raw(cls, sdk_handle, sdk_index, ports, attr_type, power_mode): for port in ports: cls.set_port_admin_status_by_log_port(sdk_handle, port, SX_PORT_ADMIN_STATUS_DOWN) # Set the desired power mode - result = cls.mgmt_phy_mod_pwr_attr_set(sdk_handle, sdk_index, attr_type, power_mode) + result = cls.mgmt_phy_mod_pwr_attr_set(sdk_handle, sdk_index, slot_id, attr_type, power_mode) finally: # Bring the port up for port in ports: @@ -2150,8 +2224,8 @@ def set_lpmode(self, lpmode): # call class level method to avoid initialize the whole sonic platform API set_lpmode_code = 'from sonic_platform import sfp;\n' \ 'with sfp.SdkHandleContext() as sdk_handle:' \ - 'print(sfp.SFP._set_lpmode({}, sdk_handle, {}))' \ - .format('True' if lpmode else 'False', self.sdk_index) + 'print(sfp.SFP._set_lpmode({}, sdk_handle, {}, {}))' \ + .format('True' if lpmode else 'False', self.sdk_index, self.slot_id) lpm_cmd = "docker exec pmon python3 -c \"{}\"".format(set_lpmode_code) # Set LPM @@ -2159,22 +2233,23 @@ def set_lpmode(self, lpmode): output = subprocess.check_output(lpm_cmd, shell=True, universal_newlines=True) return 'True' in output except subprocess.CalledProcessError as e: - print("Error! Unable to set LPM for {}, rc = {}, err msg: {}".format(self.index, e.returncode, e.output)) + print("Error! Unable to set LPM for {}, rc = {}, err msg: {}".format(self.sdk_index, e.returncode, e.output)) return False else: - return self._set_lpmode(lpmode, self.sdk_handle, self.sdk_index) + return self._set_lpmode(lpmode, self.sdk_handle, self.sdk_index, self.slot_id) @classmethod - def _set_lpmode(cls, lpmode, sdk_handle, sdk_index): - log_port_list = cls.get_logical_ports(sdk_handle, sdk_index) + def _set_lpmode(cls, lpmode, sdk_handle, sdk_index, slot_id): + log_port_list = cls.get_logical_ports(sdk_handle, sdk_index, slot_id) sdk_lpmode = SX_MGMT_PHY_MOD_PWR_MODE_LOW_E if lpmode else SX_MGMT_PHY_MOD_PWR_MODE_AUTO_E cls._set_lpmode_raw(sdk_handle, sdk_index, + slot_id, log_port_list, SX_MGMT_PHY_MOD_PWR_ATTR_PWR_MODE_E, sdk_lpmode) - logger.log_info("{} low power mode for module {}".format("Enabled" if lpmode else "Disabled", sdk_index)) + logger.log_info("{} low power mode for module {}, slot {}".format("Enabled" if lpmode else "Disabled", sdk_index, slot_id)) return True diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 6f00d4252982..669c49db8131 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -23,14 +23,19 @@ import os import time import select -if 'MLNX_PLATFORM_API_UNIT_TESTING' not in os.environ: - from python_sdk_api.sx_api import * -else: - from mock import MagicMock - class MockSxFd(object): - fd = 99 - new_sx_fd_t_p = MagicMock(return_value=MockSxFd()) - new_sx_user_channel_t_p = MagicMock() + +from .device_data import DeviceDataManager +try: + if 'PLATFORM_API_UNIT_TESTING' not in os.environ: + from python_sdk_api.sx_api import * + else: + from mock import MagicMock + class MockSxFd(object): + fd = 99 + new_sx_fd_t_p = MagicMock(return_value=MockSxFd()) + new_sx_user_channel_t_p = MagicMock() +except KeyError: + pass from sonic_py_common.logger import Logger from .sfp import SFP @@ -253,6 +258,7 @@ def check_sfp_status(self, port_change, error_dict, timeout): try: read, _, _ = select.select([self.rx_fd_p.fd], [], [], timeout) + print(read) except select.error as err: rc, msg = err if rc == errno.EAGAIN or rc == errno.EINTR: @@ -263,6 +269,7 @@ def check_sfp_status(self, port_change, error_dict, timeout): for fd in read: if fd == self.rx_fd_p.fd: success, port_list, module_state, error_type = self.on_pmpe(self.rx_fd_p) + print('success = ', success) if not success: logger.log_error("failed to read from {}".format(fd)) break @@ -339,6 +346,7 @@ def on_pmpe(self, fd_p): module_state = pmpe_t.module_state error_type = pmpe_t.error_type module_id = pmpe_t.module_id + slot_id = pmpe_t.slot_id # For non-modular chassis, it should return 0 if module_state == SDK_SFP_STATE_ERR: logger.log_error("Receive PMPE error event on module {}: status {} error type {}".format(module_id, module_state, error_type)) @@ -352,13 +360,16 @@ def on_pmpe(self, fd_p): logical_port = sx_port_log_id_t_arr_getitem(logical_port_list, i) rc = sx_api_port_device_get(self.handle, 1 , 0, port_attributes_list, port_cnt_p) port_cnt = uint32_t_p_value(port_cnt_p) - + x = 0 # x is the port index within a LC for i in range(port_cnt): port_attributes = sx_port_attributes_t_arr_getitem(port_attributes_list,i) if port_attributes.log_port == logical_port: - label_port = port_attributes.port_mapping.module_port + label_port = slot_id * DeviceDataManager.get_linecard_max_port_count() + x + 1 break + if port_attributes.port_mapping.slot_id == slot_id: + x += 1 + if label_port is not None: label_port_list.append(label_port) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py index 85523fd5ac0e..56348998ac68 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -25,406 +25,241 @@ try: from sonic_platform_base.thermal_base import ThermalBase from sonic_py_common.logger import Logger - from os import listdir - from os.path import isfile, join - import io - import os.path + import copy + import os + import glob + + from .device_data import DeviceDataManager + from . import utils except ImportError as e: raise ImportError (str(e) + "- required module not found") # Global logger class instance logger = Logger() -THERMAL_DEV_CATEGORY_CPU_CORE = "cpu_core" -THERMAL_DEV_CATEGORY_CPU_PACK = "cpu_pack" -THERMAL_DEV_CATEGORY_MODULE = "module" -THERMAL_DEV_CATEGORY_PSU = "psu" -THERMAL_DEV_CATEGORY_GEARBOX = "gearbox" -THERMAL_DEV_CATEGORY_AMBIENT = "ambient" - -THERMAL_DEV_ASIC_AMBIENT = "asic_amb" -THERMAL_DEV_FAN_AMBIENT = "fan_amb" -THERMAL_DEV_PORT_AMBIENT = "port_amb" -THERMAL_DEV_COMEX_AMBIENT = "comex_amb" -THERMAL_DEV_BOARD_AMBIENT = "board_amb" - -THERMAL_API_GET_TEMPERATURE = "get_temperature" -THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" -THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD = "get_high_critical_threshold" - -THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0 - -HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" - -THERMAL_ZONE_ASIC_PATH = "/var/run/hw-management/thermal/mlxsw/" -THERMAL_ZONE_MODULE_PATH = "/var/run/hw-management/thermal/mlxsw-module{}/" -THERMAL_ZONE_GEARBOX_PATH = "/var/run/hw-management/thermal/mlxsw-gearbox{}/" -THERMAL_ZONE_MODE = "thermal_zone_mode" -THERMAL_ZONE_POLICY = "thermal_zone_policy" -THERMAL_ZONE_TEMPERATURE = "thermal_zone_temp" -THERMAL_ZONE_NORMAL_TEMPERATURE = "temp_trip_high" - -MODULE_TEMPERATURE_FAULT_PATH = "/var/run/hw-management/thermal/module{}_temp_fault" - -thermal_api_handler_asic = { - THERMAL_API_GET_TEMPERATURE: 'asic', - THERMAL_API_GET_HIGH_THRESHOLD: 'mlxsw/temp_trip_hot', - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD: 'mlxsw/temp_trip_crit' -} - -thermal_api_handler_cpu_core = { - THERMAL_API_GET_TEMPERATURE:"cpu_core{}", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max", - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_core{}_crit" -} -thermal_api_handler_cpu_pack = { - THERMAL_API_GET_TEMPERATURE:"cpu_pack", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max", - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_pack_crit" -} -thermal_api_handler_module = { - THERMAL_API_GET_TEMPERATURE:"module{}_temp_input", - THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit", - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"module{}_temp_emergency" -} -thermal_api_handler_psu = { - THERMAL_API_GET_TEMPERATURE:"psu{}_temp", - THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max", - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None -} -thermal_api_handler_gearbox = { - THERMAL_API_GET_TEMPERATURE:"gearbox{}_temp_input", - THERMAL_API_GET_HIGH_THRESHOLD:"mlxsw-gearbox{}/temp_trip_hot", - THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"mlxsw-gearbox{}/temp_trip_crit" -} -thermal_ambient_apis = { - THERMAL_DEV_ASIC_AMBIENT : thermal_api_handler_asic, - THERMAL_DEV_PORT_AMBIENT : "port_amb", - THERMAL_DEV_FAN_AMBIENT : "fan_amb", - THERMAL_DEV_COMEX_AMBIENT : "comex_amb", - THERMAL_DEV_BOARD_AMBIENT : "board_amb" -} -thermal_ambient_name = { - THERMAL_DEV_ASIC_AMBIENT : 'ASIC', - THERMAL_DEV_PORT_AMBIENT : "Ambient Port Side Temp", - THERMAL_DEV_FAN_AMBIENT : "Ambient Fan Side Temp", - THERMAL_DEV_COMEX_AMBIENT : "Ambient COMEX Temp", - THERMAL_DEV_BOARD_AMBIENT : "Ambient Board Temp" -} -thermal_api_handlers = { - THERMAL_DEV_CATEGORY_CPU_CORE : thermal_api_handler_cpu_core, - THERMAL_DEV_CATEGORY_CPU_PACK : thermal_api_handler_cpu_pack, - THERMAL_DEV_CATEGORY_MODULE : thermal_api_handler_module, - THERMAL_DEV_CATEGORY_PSU : thermal_api_handler_psu, - THERMAL_DEV_CATEGORY_GEARBOX : thermal_api_handler_gearbox -} -thermal_name = { - THERMAL_DEV_CATEGORY_CPU_CORE : "CPU Core {} Temp", - THERMAL_DEV_CATEGORY_CPU_PACK : "CPU Pack Temp", - THERMAL_DEV_CATEGORY_MODULE : "xSFP module {} Temp", - THERMAL_DEV_CATEGORY_PSU : "PSU-{} Temp", - THERMAL_DEV_CATEGORY_GEARBOX : "Gearbox {} Temp" -} - -thermal_device_categories_all = [ - THERMAL_DEV_CATEGORY_AMBIENT, - THERMAL_DEV_CATEGORY_CPU_PACK, - THERMAL_DEV_CATEGORY_CPU_CORE, - THERMAL_DEV_CATEGORY_GEARBOX, -] - -thermal_device_categories_singleton = [ - THERMAL_DEV_CATEGORY_CPU_PACK, - THERMAL_DEV_CATEGORY_AMBIENT -] -thermal_api_names = [ - THERMAL_API_GET_TEMPERATURE, - THERMAL_API_GET_HIGH_THRESHOLD -] - -platform_dict_thermal = {'x86_64-mlnx_msn2700-r0': 0, 'x86_64-mlnx_lssn2700-r0': 0, 'x86_64-mlnx_msn2740-r0': 3, - 'x86_64-mlnx_msn2100-r0': 1, 'x86_64-mlnx_msn2410-r0': 2, 'x86_64-mlnx_msn2010-r0': 4, - 'x86_64-mlnx_msn3420-r0': 9, 'x86_64-mlnx_msn3700-r0': 5, 'x86_64-mlnx_msn3700c-r0': 6, - 'x86_64-mlnx_msn3800-r0': 7, 'x86_64-mlnx_msn4600-r0': 12, 'x86_64-mlnx_msn4600c-r0': 9, - 'x86_64-mlnx_msn4700-r0': 8, 'x86_64-mlnx_msn4410-r0': 8} -thermal_profile_list = [ - # 0 2700 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 1 2100 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 16), - THERMAL_DEV_CATEGORY_PSU:(0, 0), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT, - ] - ) - }, - # 2 2410 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), - THERMAL_DEV_CATEGORY_MODULE:(1, 56), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT, - ] - ) - }, - # 3 2740 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT, - ] - ) - }, - # 4 2010 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 22), - THERMAL_DEV_CATEGORY_PSU:(0, 0), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT, - ] - ) - }, - # 5 3700 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 6 3700c - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 7 3800 +""" +The most important information for creating a Thermal object is 3 sysfs files: temperature file, high threshold file and +high critical threshold file. There is no common naming rule for thermal objects on Nvidia platform. There are two types +of thermal object: single and indexable: + 1. Single. Such as asic, port_amb... + 2. Indexablt. Such as cpu_core0, cpu_core1, psu1_temp, psu2_temp + +Thermal objects can be created according to a pre-defined naming rule. The naming rules contains following fields + +Field Name Mandatory Default Description +name M Thermal object name template +temperature M Temperature file name +high_threshold O None High threshold file name +high_critical_threshold O None High critical threshold file name +type O single Thermal object type +start_index O 1 Thermal object start index, only used by indexable thermal object +""" +THERMAL_NAMING_RULE = { + "sfp thermals": { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 64), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(1,32), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) + "name": "xSFP module {} Temp", + "temperature": "module{}_temp_input", + "high_threshold": "module{}_temp_crit", + "high_critical_threshold": "module{}_temp_emergency", + "type": "indexable" }, - # 8 4700 + "psu thermals": { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) + "name": "PSU-{} Temp", + "temperature": "psu{}_temp", + "high_threshold": "psu{}_temp_max", + "type": "indexable" }, - # 9 3420 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), - THERMAL_DEV_CATEGORY_MODULE:(1, 60), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 10 4600C - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 64), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 11 4410 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 32), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) - }, - # 12 4600 - { - THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), - THERMAL_DEV_CATEGORY_MODULE:(1, 64), - THERMAL_DEV_CATEGORY_PSU:(1, 2), - THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), - THERMAL_DEV_CATEGORY_GEARBOX:(0,0), - THERMAL_DEV_CATEGORY_AMBIENT:(0, - [ - THERMAL_DEV_ASIC_AMBIENT, - THERMAL_DEV_COMEX_AMBIENT, - THERMAL_DEV_PORT_AMBIENT, - THERMAL_DEV_FAN_AMBIENT - ] - ) + "chassis thermals": [ + { + "name": "ASIC", + "temperature": "asic", + "high_threshold": "mlxsw/temp_trip_hot", + "high_critical_threshold": "mlxsw/temp_trip_crit" + }, + { + "name": "Ambient Port Side Temp", + "temperature": "port_amb" + }, + { + "name": "Ambient Fan Side Temp", + "temperature": "fan_amb" + }, + { + "name": "Ambient COMEX Temp", + "temperature": "comex_amb" + }, + { + "name": "CPU Pack Temp", + "temperature": "cpu_pack", + "high_threshold": "cpu_pack_max", + "high_critical_threshold": "cpu_pack_crit" + }, + { + "name": "CPU Core {} Temp", + "temperature": "cpu_core{}", + "high_threshold": "cpu_core{}_max", + "high_critical_threshold": "cpu_core{}_crit", + "type": "indexable", + "start_index": 0 + }, + { + "name": "Gearbox {} Temp", + "temperature": "gearbox{}_temp_input", + "high_threshold": "mlxsw-gearbox{}/temp_trip_hot", + "high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit", + "type": "indexable" + } + ], + 'linecard thermals': { + "name": "Gearbox {} Temp", + "temperature": "gearbox{}_temp_input", + "high_threshold": "mlxsw-gearbox{}/temp_trip_hot", + "high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit", + "type": "indexable" } -] - -def initialize_psu_thermals(platform, thermal_list, psu_index, dependency): - tp_index = platform_dict_thermal[platform] - thermal_profile = thermal_profile_list[tp_index] - _, count = thermal_profile[THERMAL_DEV_CATEGORY_PSU] - if count == 0: - return - thermal = Thermal(THERMAL_DEV_CATEGORY_PSU, psu_index, True, 1, dependency) - thermal_list.append(thermal) - - -def initialize_sfp_thermals(platform, thermal_list, sfp_index): - thermal = Thermal(THERMAL_DEV_CATEGORY_MODULE, sfp_index, True, 1) - thermal_list.append(thermal) - - -def initialize_chassis_thermals(platform, thermal_list): - # create thermal objects for all categories of sensors - tp_index = platform_dict_thermal[platform] - thermal_profile = thermal_profile_list[tp_index] - Thermal.thermal_profile = thermal_profile +} + +CHASSIS_THERMAL_SYSFS_FOLDER = '/run/hw-management/thermal' +THERMAL_ZONE_FOLDER_WILDCARD = '/run/hw-management/thermal/mlxsw*' +THERMAL_ZONE_POLICY_FILE = 'thermal_zone_policy' +THERMAL_ZONE_MODE_FILE = 'thermal_zone_mode' +THERMAL_ZONE_TEMP_FILE = 'thermal_zone_temp' +THERMAL_ZONE_THRESHOLD_FILE = 'temp_trip_high' +MODULE_TEMP_FAULT_WILDCARRD = '/run/hw-management/thermal/module*_temp_fault' +MAX_AMBIENT_TEMP = 120 + + +def initialize_chassis_thermals(): + thermal_list = [] + rules = THERMAL_NAMING_RULE['chassis thermals'] position = 1 - for category in thermal_device_categories_all: - if category == THERMAL_DEV_CATEGORY_AMBIENT: - count, ambient_list = thermal_profile[category] - for ambient in ambient_list: - thermal = Thermal(category, ambient, True, position) - thermal_list.append(thermal), + for rule in rules: + if 'type' in rule and rule['type'] == 'indexable': + count = 0 + if 'Gearbox' in rule['name']: + count = DeviceDataManager.get_gearbox_count('/run/hw-management/config') + elif 'CPU Core' in rule['name']: + count = DeviceDataManager.get_cpu_thermal_count() + if count == 0: + logger.log_debug('Failed to get thermal object count for {}'.format(rule['name'])) + continue + + for index in range(count): + thermal_list.append(create_indexable_thermal(rule, index, CHASSIS_THERMAL_SYSFS_FOLDER, position)) position += 1 else: - start, count = 0, 0 - if category in thermal_profile: - start, count = thermal_profile[category] - if count == 0: - continue - if count == 1: - thermal = Thermal(category, 0, False, position) - thermal_list.append(thermal) + thermal_object = create_single_thermal(rule, CHASSIS_THERMAL_SYSFS_FOLDER, position) + if thermal_object: + thermal_list.append(thermal_object) position += 1 - else: - for index in range(count): - thermal = Thermal(category, start + index, True, position) - thermal_list.append(thermal) - position += 1 + return thermal_list + + +def initialize_psu_thermal(psu_index, presence_cb): + """Initialize PSU thermal object + + Args: + psu_index (int): PSU index, 0-based + presence_cb (function): A callback function to indicate if the thermal is present. When removing a PSU, the related + thermal sysfs files will be removed from system, presence_cb is used to check such situation and avoid printing + error logs. + + Returns: + [list]: A list of thermal objects + """ + return [create_indexable_thermal(THERMAL_NAMING_RULE['psu thermals'], psu_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1, presence_cb)] + + +def initialize_sfp_thermal(sfp_index): + return [create_indexable_thermal(THERMAL_NAMING_RULE['sfp thermals'], sfp_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1)] + + +def initialize_linecard_thermals(lc_name, lc_index): + thermal_list = [] + rule = THERMAL_NAMING_RULE['linecard thermals'] + rule = copy.deepcopy(rule) + rule['name'] = '{} {}'.format(lc_name, rule['name']) + sysfs_folder = '/run/hw-management/lc{}/thermal'.format(lc_index) + count = DeviceDataManager.get_gearbox_count('/run/hw-management/lc{}/config'.format(lc_index)) + for index in range(count): + thermal_list.append(create_indexable_thermal(rule, index, sysfs_folder, index + 1)) + return thermal_list + + +def initialize_linecard_sfp_thermal(lc_name, lc_index, sfp_index): + rule = THERMAL_NAMING_RULE['sfp thermals'] + rule = copy.deepcopy(rule) + rule['name'] = '{} {}'.format(lc_name, rule['name']) + sysfs_folder = '/run/hw-management/lc{}/thermal'.format(lc_index) + return [create_indexable_thermal(rule, sfp_index, sysfs_folder, 1)] + + +def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=None): + index += rule.get('start_index', 1) + name = rule['name'].format(index) + temp_file = os.path.join(sysfs_folder, rule['temperature'].format(index)) + _check_thermal_sysfs_existence(temp_file) + if 'high_threshold' in rule: + high_th_file = os.path.join(sysfs_folder, rule['high_threshold'].format(index)) + _check_thermal_sysfs_existence(high_th_file) + else: + high_th_file = None + if 'high_critical_threshold' in rule: + high_crit_th_file = os.path.join(sysfs_folder, rule['high_critical_threshold'].format(index)) + _check_thermal_sysfs_existence(high_crit_th_file) + else: + high_crit_th_file = None + if not presence_cb: + return Thermal(name, temp_file, high_th_file, high_crit_th_file, position) + else: + return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb) + + +def create_single_thermal(rule, sysfs_folder, position, presence_cb=None): + temp_file = rule['temperature'] + thermal_capability = DeviceDataManager.get_thermal_capability() + if thermal_capability: + if not thermal_capability.get(temp_file, True): + return None + temp_file = os.path.join(sysfs_folder, temp_file) + _check_thermal_sysfs_existence(temp_file) + if 'high_threshold' in rule: + high_th_file = os.path.join(sysfs_folder, rule['high_threshold']) + _check_thermal_sysfs_existence(high_th_file) + else: + high_th_file = None + if 'high_critical_threshold' in rule: + high_crit_th_file = os.path.join(sysfs_folder, rule['high_critical_threshold']) + _check_thermal_sysfs_existence(high_crit_th_file) + else: + high_crit_th_file = None + name = rule['name'] + if not presence_cb: + return Thermal(name, temp_file, high_th_file, high_crit_th_file, position) + else: + return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb) + + +def _check_thermal_sysfs_existence(file_path): + if not os.path.exists(file_path): + logger.log_error('Thermal sysfs {} does not exist'.format(file_path)) class Thermal(ThermalBase): - thermal_profile = None thermal_algorithm_status = False - def __init__(self, category, index, has_index, position, dependency = None): + def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position): """ index should be a string for category ambient and int for other categories """ super(Thermal, self).__init__() - if category == THERMAL_DEV_CATEGORY_AMBIENT: - self.name = thermal_ambient_name[index] - self.index = index - elif has_index: - self.name = thermal_name[category].format(index) - self.index = index - else: - self.name = thermal_name[category] - self.index = 0 - - self.category = category + self.name = name self.position = position - self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) - self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) - self.high_critical_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD) - self.dependency = dependency - + self.temperature = temp_file + self.high_threshold = high_th_file + self.high_critical_threshold = high_crit_th_file def get_name(self): """ @@ -435,45 +270,6 @@ def get_name(self): """ return self.name - - @classmethod - def _read_generic_file(cls, filename, len): - """ - Read a generic file, returns the contents of the file - """ - result = None - try: - with open(filename, 'r') as fileobj: - result = fileobj.read().strip() - except Exception as e: - logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) - return result - - - def _get_file_from_api(self, api_name): - if self.category == THERMAL_DEV_CATEGORY_AMBIENT: - handler = thermal_ambient_apis[self.index] - if isinstance(handler, str): - if api_name == THERMAL_API_GET_TEMPERATURE: - filename = thermal_ambient_apis[self.index] - else: - return None - elif isinstance(handler, dict): - filename = handler[api_name] - else: - return None - else: - handler = thermal_api_handlers[self.category][api_name] - if self.category in thermal_device_categories_singleton: - filename = handler - else: - if handler: - filename = handler.format(self.index) - else: - return None - return join(HW_MGMT_THERMAL_ROOT, filename) - - def get_temperature(self): """ Retrieves current temperature reading from thermal @@ -482,19 +278,8 @@ def get_temperature(self): A float number of current temperature in Celsius up to nearest thousandth of one degree Celsius, e.g. 30.125 """ - if self.dependency: - status, hint = self.dependency() - if not status: - logger.log_debug("get_temperature for {} failed due to {}".format(self.name, hint)) - return None - value_str = self._read_generic_file(self.temperature, 0) - if value_str is None: - return None - value_float = float(value_str) - if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: - return None - return value_float / 1000.0 - + value = utils.read_float_from_file(self.temperature, None, log_func=logger.log_info) + return value / 1000.0 if (value is not None and value != 0) else None def get_high_threshold(self): """ @@ -506,19 +291,8 @@ def get_high_threshold(self): """ if self.high_threshold is None: return None - if self.dependency: - status, hint = self.dependency() - if not status: - logger.log_debug("get_high_threshold for {} failed due to {}".format(self.name, hint)) - return None - value_str = self._read_generic_file(self.high_threshold, 0) - if value_str is None: - return None - value_float = float(value_str) - if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: - return None - return value_float / 1000.0 - + value = utils.read_float_from_file(self.high_threshold, None, log_func=logger.log_info) + return value / 1000.0 if (value is not None and value != 0) else None def get_high_critical_threshold(self): """ @@ -530,18 +304,8 @@ def get_high_critical_threshold(self): """ if self.high_critical_threshold is None: return None - if self.dependency: - status, hint = self.dependency() - if not status: - logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint)) - return None - value_str = self._read_generic_file(self.high_critical_threshold, 0) - if value_str is None: - return None - value_float = float(value_str) - if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: - return None - return value_float / 1000.0 + value = utils.read_float_from_file(self.high_critical_threshold, None, log_func=logger.log_info) + return value / 1000.0 if (value is not None and value != 0) else None def get_position_in_parent(self): """ @@ -559,20 +323,6 @@ def is_replaceable(self): """ return False - @classmethod - def _write_generic_file(cls, filename, content): - """ - Generic functions to write content to a specified file path if - the content has changed. - """ - try: - with open(filename, 'w+') as file_obj: - origin_content = file_obj.read() - if origin_content != content: - file_obj.write(content) - except Exception as e: - logger.log_info("Fail to write file {} due to {}".format(filename, repr(e))) - @classmethod def set_thermal_algorithm_status(cls, status, force=True): """ @@ -589,31 +339,18 @@ def set_thermal_algorithm_status(cls, status, force=True): Returns: True if thermal algorithm status changed. """ - if not cls.thermal_profile: - raise Exception("Fail to get thermal profile for this switch") - if not force and cls.thermal_algorithm_status == status: return False cls.thermal_algorithm_status = status - content = "enabled" if status else "disabled" + mode = "enabled" if status else "disabled" policy = "step_wise" if status else "user_space" - cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_MODE), content) - cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_POLICY), policy) - - if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile: - start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE] - if count != 0: - for index in range(count): - cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_MODE), content) - cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_POLICY), policy) - - if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile: - start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX] - if count != 0: - for index in range(count): - cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content) - cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy) + for thermal_zone_folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD): + policy_file = os.path.join(thermal_zone_folder, THERMAL_ZONE_POLICY_FILE) + utils.write_file(policy_file, policy) + mode_file = os.path.join(thermal_zone_folder, THERMAL_ZONE_MODE_FILE) + utils.write_file(mode_file, mode) + return True @classmethod @@ -624,64 +361,91 @@ def check_thermal_zone_temperature(cls): Returns: True if all thermal zones current temperature less or equal than normal temperature """ - if not cls.thermal_profile: - raise Exception("Fail to get thermal profile for this switch") - - if not cls._check_thermal_zone_temperature(THERMAL_ZONE_ASIC_PATH): - return False - - if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile: - start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE] - if count != 0: - for index in range(count): - if not cls._check_thermal_zone_temperature(THERMAL_ZONE_MODULE_PATH.format(start + index)): - return False - - if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile: - start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX] - if count != 0: - for index in range(count): - if not cls._check_thermal_zone_temperature(THERMAL_ZONE_GEARBOX_PATH.format(start + index)): - return False + for thermal_zone_folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD): + if not cls._check_thermal_zone_temperature(thermal_zone_folder): + return False return True @classmethod def _check_thermal_zone_temperature(cls, thermal_zone_path): - normal_temp_path = join(thermal_zone_path, THERMAL_ZONE_NORMAL_TEMPERATURE) - current_temp_path = join(thermal_zone_path, THERMAL_ZONE_TEMPERATURE) - normal = None - current = None - try: - with open(normal_temp_path, 'r') as file_obj: - normal = float(file_obj.read()) - - with open(current_temp_path, 'r') as file_obj: - current = float(file_obj.read()) - - return current <= normal + threshold_path = os.path.join(thermal_zone_path, THERMAL_ZONE_THRESHOLD_FILE) + current_temp_path = os.path.join(thermal_zone_path, THERMAL_ZONE_TEMP_FILE) + + try: + threshold = utils.read_int_from_file(threshold_path, raise_exception=True) + current = utils.read_int_from_file(current_temp_path, raise_exception=True) + return current <= threshold except Exception as e: logger.log_info("Fail to check thermal zone temperature for file {} due to {}".format(thermal_zone_path, repr(e))) + return False @classmethod def check_module_temperature_trustable(cls): - if not cls.thermal_profile: - raise Exception("Fail to get thermal profile for this switch") - - start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE] - for index in range(count): - fault_file_path = MODULE_TEMPERATURE_FAULT_PATH.format(index + start) - fault = cls._read_generic_file(fault_file_path, 0) - if fault.strip() != '0': + for file_path in glob.iglob(MODULE_TEMP_FAULT_WILDCARRD): + fault = utils.read_int_from_file(file_path) + if fault != 0: return 'untrust' return 'trust' @classmethod def get_min_amb_temperature(cls): - fan_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_FAN_AMBIENT) - port_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_PORT_AMBIENT) + fan_ambient_path = os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'fan_amb') + port_ambient_path = os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'port_amb') - # if there is any exception, let it raise - fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path, 0)) - port_ambient_temp = int(cls._read_generic_file(port_ambient_path, 0)) - return fan_ambient_temp if fan_ambient_temp < port_ambient_temp else port_ambient_temp + try: + fan_ambient_temp = utils.read_int_from_file(fan_ambient_path, raise_exception=True) + port_ambient_temp = utils.read_int_from_file(port_ambient_path, raise_exception=True) + return fan_ambient_temp if fan_ambient_temp < port_ambient_temp else port_ambient_temp + except Exception as e: + # Can't get ambient temperature, return maximum + logger.log_error('Failed to get minimum ambient temperature, use pessimistic instead') + return MAX_AMBIENT_TEMP + + +class RemovableThermal(Thermal): + def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb): + super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, position) + self.presence_cb = presence_cb + + def get_temperature(self): + """ + Retrieves current temperature reading from thermal + + Returns: + A float number of current temperature in Celsius up to nearest thousandth + of one degree Celsius, e.g. 30.125 + """ + status, hint = self.presence_cb() + if not status: + logger.log_debug("get_temperature for {} failed due to {}".format(self.name, hint)) + return None + return super(RemovableThermal, self).get_temperature() + + def get_high_threshold(self): + """ + Retrieves the high threshold temperature of thermal + + Returns: + A float number, the high threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + status, hint = self.presence_cb() + if not status: + logger.log_debug("get_high_threshold for {} failed due to {}".format(self.name, hint)) + return None + return super(RemovableThermal, self).get_high_threshold() + + def get_high_critical_threshold(self): + """ + Retrieves the high critical threshold temperature of thermal + + Returns: + A float number, the high critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + status, hint = self.presence_cb() + if not status: + logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint)) + return None + return super(RemovableThermal, self).get_high_critical_threshold() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py index f9e2c55328b0..e5bc9e8b25a4 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py @@ -166,19 +166,19 @@ def execute(self, thermal_info_dict): class ChangeMinCoolingLevelAction(ThermalPolicyActionBase): UNKNOWN_SKU_COOLING_LEVEL = 6 def execute(self, thermal_info_dict): - from .device_data import DEVICE_DATA + from .device_data import DeviceDataManager from .fan import Fan from .thermal_infos import ChassisInfo from .thermal_conditions import MinCoolingLevelChangeCondition from .thermal_conditions import UpdateCoolingLevelToMinCondition - chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis() - if chassis.platform_name not in DEVICE_DATA or 'thermal' not in DEVICE_DATA[chassis.platform_name] or 'minimum_table' not in DEVICE_DATA[chassis.platform_name]['thermal']: + minimum_table = DeviceDataManager.get_minimum_table() + if not minimum_table: Fan.min_cooling_level = ChangeMinCoolingLevelAction.UNKNOWN_SKU_COOLING_LEVEL else: trust_state = MinCoolingLevelChangeCondition.trust_state temperature = MinCoolingLevelChangeCondition.temperature - minimum_table = DEVICE_DATA[chassis.platform_name]['thermal']['minimum_table']['unk_{}'.format(trust_state)] + minimum_table = minimum_table['unk_{}'.format(trust_state)] for key, cooling_level in minimum_table.items(): temp_range = key.split(':') diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 0dd29e11dbc8..0650d9af1a1c 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -16,23 +16,27 @@ # import functools import subprocess +from sonic_py_common.logger import Logger -# flags to indicate whether this process is running in docker or host -_is_host = None +logger = Logger() -def read_str_from_file(file_path, default='', raise_exception=False): +def read_from_file(file_path, target_type, default='', raise_exception=False, log_func=logger.log_error): """ - Read string content from file + Read content from file and convert to target type :param file_path: File path + :param target_type: target type :param default: Default return value if any exception occur :param raise_exception: Raise exception to caller if True else just return default value + :param log_func: function to log the error :return: String content of the file """ try: with open(file_path, 'r') as f: - value = f.read().strip() + value = target_type(f.read().strip()) except (ValueError, IOError) as e: + if log_func: + log_func('Failed to read from file {} - {}'.format(file_path, repr(e))) if not raise_exception: value = default else: @@ -41,27 +45,65 @@ def read_str_from_file(file_path, default='', raise_exception=False): return value -def read_int_from_file(file_path, default=0, raise_exception=False): +def read_str_from_file(file_path, default='', raise_exception=False, log_func=logger.log_error): + """ + Read string content from file + :param file_path: File path + :param default: Default return value if any exception occur + :param raise_exception: Raise exception to caller if True else just return default value + :param log_func: function to log the error + :return: String content of the file + """ + return read_from_file(file_path=file_path, target_type=str, default=default, raise_exception=raise_exception, log_func=log_func) + + +def read_int_from_file(file_path, default=0, raise_exception=False, log_func=logger.log_error): """ Read content from file and cast it to integer :param file_path: File path :param default: Default return value if any exception occur :param raise_exception: Raise exception to caller if True else just return default value + :param log_func: function to log the error :return: Integer value of the file content """ - try: - with open(file_path, 'r') as f: - value = int(f.read().strip()) - except (ValueError, IOError) as e: - if not raise_exception: - value = default - else: - raise e + return read_from_file(file_path=file_path, target_type=int, default=default, raise_exception=raise_exception, log_func=log_func) - return value + +def read_float_from_file(file_path, default=0.0, raise_exception=False, log_func=logger.log_error): + """ + Read content from file and cast it to integer + :param file_path: File path + :param default: Default return value if any exception occur + :param raise_exception: Raise exception to caller if True else just return default value + :param log_func: function to log the error + :return: Integer value of the file content + """ + return read_from_file(file_path=file_path, target_type=float, default=default, raise_exception=raise_exception, log_func=log_func) + + +def _key_value_converter(content): + ret = {} + for line in content.splitlines(): + k,v = line.split(':') + ret[k.strip()] = v.strip() + return ret + + +def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error): + """Read file content and parse the content to a dict. The file content should like: + key1:value1 + key2:value2 + + Args: + file_path (str): file path + default (dict, optional): default return value. Defaults to {}. + raise_exception (bool, optional): If exception should be raised or hiden. Defaults to False. + log_func (optional): logger function.. Defaults to logger.log_error. + """ + return read_from_file(file_path=file_path, target_type=_key_value_converter, default=default, raise_exception=raise_exception, log_func=log_func) -def write_file(file_path, content, raise_exception=False): +def write_file(file_path, content, raise_exception=False, log_func=logger.log_error): """ Write the given value to a file :param file_path: File path @@ -73,6 +115,8 @@ def write_file(file_path, content, raise_exception=False): with open(file_path, 'w') as f: f.write(str(content)) except (ValueError, IOError) as e: + if log_func: + log_func('Failed to write {} to file {} - {}'.format(content, file_path, repr(e))) if not raise_exception: return False else: @@ -80,16 +124,50 @@ def write_file(file_path, content, raise_exception=False): return True +def pre_initialize(init_func): + def decorator(method): + @functools.wraps(method) + def _impl(self, *args, **kwargs): + init_func(self) + return method(self, *args, **kwargs) + return _impl + return decorator + + +def pre_initialize_one(init_func): + def decorator(method): + @functools.wraps(method) + def _impl(self, index): + init_func(self, index) + return method(self, index) + return _impl + return decorator + + +def read_only_cache(): + """Decorator to cache return value for a method/function once. + This decorator should be used for method/function when: + 1. Executing the method/function takes time. e.g. reading sysfs. + 2. The return value of this method/function never changes. + """ + def decorator(method): + method.return_value = None + + @functools.wraps(method) + def _impl(*args, **kwargs): + if not method.return_value: + method.return_value = method(*args, **kwargs) + return method.return_value + return _impl + return decorator + + +@read_only_cache() def is_host(): """ Test whether current process is running on the host or an docker return True for host and False for docker - """ - global _is_host - if _is_host is not None: - return _is_host - - _is_host = False + """ try: proc = subprocess.Popen("docker --version 2>/dev/null", stdout=subprocess.PIPE, @@ -99,22 +177,20 @@ def is_host(): stdout = proc.communicate()[0] proc.wait() result = stdout.rstrip('\n') - if result != '': - _is_host = True - + return result != '' except OSError as e: - pass - - return _is_host + return False -def default_return(return_value): +def default_return(return_value, log_func=logger.log_debug): def wrapper(method): @functools.wraps(method) def _impl(*args, **kwargs): try: return method(*args, **kwargs) - except: + except Exception as e: + if log_func: + log_func('Faield to execute method {} - {}'.format(method.__name__, repr(e))) return return_value return _impl return wrapper diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/vpd_parser.py b/platform/mellanox/mlnx-platform-api/sonic_platform/vpd_parser.py new file mode 100644 index 000000000000..ea66234e4bff --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/vpd_parser.py @@ -0,0 +1,84 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from sonic_py_common.logger import Logger + +from . import utils + +logger = Logger() +SN_VPD_FIELD = "SN_VPD_FIELD" +PN_VPD_FIELD = "PN_VPD_FIELD" +REV_VPD_FIELD = "REV_VPD_FIELD" + + +class VpdParser: + def __init__(self, file_path): + self.vpd_data = {} + self.vpd_file = file_path + self.vpd_file_last_mtime = None + + def _get_data(self): + if not os.path.exists(self.vpd_file): + self.vpd_data = {} + return False + + try: + mtime = os.stat(self.vpd_file).st_mtime + if mtime != self.vpd_file_last_mtime: + self.vpd_file_last_mtime = mtime + self.vpd_data = utils.read_key_value_file(self.vpd_file) + return True + except Exception as e: + self.vpd_data = {} + return False + + def get_model(self): + """ + Retrieves the model number (or part number) of the device + + Returns: + string: Model/part number of device + """ + if self._get_data() and PN_VPD_FIELD not in self.vpd_data: + logger.log_error("Fail to read model number: No key {} in VPD {}".format(PN_VPD_FIELD, self.vpd_file)) + return 'N/A' + return self.vpd_data.get(PN_VPD_FIELD, 'N/A') + + def get_serial(self): + """ + Retrieves the serial number of the device + + Returns: + string: Serial number of device + """ + if self._get_data() and SN_VPD_FIELD not in self.vpd_data: + logger.log_error("Fail to read serial number: No key {} in VPD {}".format(SN_VPD_FIELD, self.vpd_file)) + return 'N/A' + return self.vpd_data.get(SN_VPD_FIELD, 'N/A') + + def get_revision(self): + """ + Retrieves the hardware revision of the device + + Returns: + string: Revision value of device + """ + if self._get_data() and REV_VPD_FIELD not in self.vpd_data: + logger.log_error("Fail to read revision: No key {} in VPD {}".format(REV_VPD_FIELD, self.vpd_file)) + return 'N/A' + return self.vpd_data.get(REV_VPD_FIELD, 'N/A') diff --git a/platform/mellanox/mlnx-platform-api/tests/conftest.py b/platform/mellanox/mlnx-platform-api/tests/conftest.py new file mode 100644 index 000000000000..f232c55c47a2 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/conftest.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import pytest +import sys + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +os.environ["PLATFORM_API_UNIT_TESTING"] = "1" + +from sonic_platform import utils + +@pytest.fixture(scope='function', autouse=True) +def auto_recover_mock(): + """Auto used fixture to recover some critical mocked functions + """ + origin_os_path_exists = os.path.exists + origin_read_int_from_file = utils.read_int_from_file + origin_read_str_from_file = utils.read_str_from_file + origin_read_float_from_file = utils.read_float_from_file + origin_write_file = utils.write_file + yield + os.path.exists = origin_os_path_exists + utils.read_int_from_file = origin_read_int_from_file + utils.read_str_from_file = origin_read_str_from_file + utils.write_file = origin_write_file + utils.read_float_from_file = origin_read_float_from_file diff --git a/platform/mellanox/mlnx-platform-api/tests/mock_eeprom_data b/platform/mellanox/mlnx-platform-api/tests/mock_eeprom_data new file mode 100644 index 0000000000000000000000000000000000000000..b8b939944f26c9c6daca094216147b614bb64fa0 GIT binary patch literal 606 zcmWH^Df7%r%V%I@3RZOR4fZp(Ffd>s7buB9P0)1?HgfZa8lx=X8)9f_Y#w1?U}|iR zY_!2dO&C;#4b1fo4E2o+j0_YEjjc>gtPBj)7#X#g82)K<_@?IMB 0 + assert len(m._thermal_list) > 0 + + # seq number changes, but state keeps deactivated, no need re-init module + m._get_seq_no = mock.MagicMock(return_value=1) + m._check_state() + assert len(m._sfp_list) > 0 + assert len(m._thermal_list) > 0 + + # seq number not change, state changes from deactivated to activated, need re-init module + utils.read_int_from_file = mock.MagicMock(return_value=1) + m._check_state() + assert len(m._sfp_list) == 0 + assert len(m._thermal_list) == 0 + + # seq number changes, state keeps activated, which means the module has been replaced, need re-init module + m._sfp_list.append(1) + m._thermal_list.append(1) + m._get_seq_no = mock.MagicMock(return_value=2) + m._check_state() + assert len(m._sfp_list) == 0 + assert len(m._thermal_list) == 0 + + # seq number not change, state changes from activated to deactivated, need re-init module + m._sfp_list.append(1) + m._thermal_list.append(1) + utils.read_int_from_file = mock.MagicMock(return_value=0) + m._check_state() + assert len(m._sfp_list) == 0 + assert len(m._thermal_list) == 0 + + def test_module_vpd(self): + m = Module(1) + m.vpd_parser.vpd_file = os.path.join(test_path, 'mock_psu_vpd') + + assert m.get_model() == 'MTEF-PSF-AC-C' + assert m.get_serial() == 'MT1946X07684' + assert m.get_revision() == 'A3' + + m.vpd_parser.vpd_file = 'not exists' + assert m.get_model() == 'N/A' + assert m.get_serial() == 'N/A' + assert m.get_revision() == 'N/A' + + m.vpd_parser.vpd_file_last_mtime = None + m.vpd_parser.vpd_file = os.path.join(test_path, 'mock_psu_vpd') + assert m.get_model() == 'MTEF-PSF-AC-C' + assert m.get_serial() == 'MT1946X07684' + assert m.get_revision() == 'A3' diff --git a/platform/mellanox/mlnx-platform-api/tests/test_psu.py b/platform/mellanox/mlnx-platform-api/tests/test_psu.py new file mode 100644 index 000000000000..6de042e7bd8b --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_psu.py @@ -0,0 +1,112 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import utils +from sonic_platform.psu import FixedPsu, Psu + + +class TestPsu: + def test_fixed_psu(self): + psu = FixedPsu(0) + assert psu.get_name() == 'PSU 1' + assert psu.get_model() == 'N/A' + assert psu.get_serial() == 'N/A' + assert psu.get_revision() == 'N/A' + utils.read_int_from_file = mock.MagicMock(return_value=1) + assert psu.get_powergood_status() + utils.read_int_from_file = mock.MagicMock(return_value=0) + assert not psu.get_powergood_status() + assert psu.get_presence() + assert psu.get_voltage() is None + assert psu.get_current() is None + assert psu.get_power() is None + assert psu.get_position_in_parent() == 1 + assert psu.is_replaceable() is False + assert psu.get_temperature() is None + assert psu.get_temperature_high_threshold() is None + + def test_psu(self): + psu = Psu(0) + assert len(psu._fan_list) == 1 + assert psu.get_fan(0).get_name() == 'psu1_fan1' + mock_sysfs_content = { + psu.psu_presence: 1, + psu.psu_oper_status: 1, + psu.psu_voltage: 10234, + psu.psu_current: 20345, + psu.psu_power: 30456, + psu.psu_temp: 40567, + psu.psu_temp_threshold: 50678 + } + + def mock_read_int_from_file(file_path, **kwargs): + return mock_sysfs_content[file_path] + + utils.read_int_from_file = mock_read_int_from_file + assert psu.get_presence() is True + mock_sysfs_content[psu.psu_presence] = 0 + assert psu.get_presence() is False + + assert psu.get_powergood_status() is True + mock_sysfs_content[psu.psu_oper_status] = 0 + assert psu.get_powergood_status() is False + + assert psu.get_voltage() is None + assert psu.get_current() is None + assert psu.get_power() is None + assert psu.get_temperature() is None + assert psu.get_temperature_high_threshold() is None + + mock_sysfs_content[psu.psu_oper_status] = 1 + assert psu.get_voltage() == 10.234 + assert psu.get_current() == 20.345 + assert psu.get_power() == 0.030456 + assert psu.get_temperature() == 40.567 + assert psu.get_temperature_high_threshold() == 50.678 + + assert psu.get_position_in_parent() == 1 + assert psu.is_replaceable() is True + + def test_psu_vpd(self): + psu = Psu(0) + psu.vpd_parser.vpd_file = os.path.join(test_path, 'mock_psu_vpd') + + assert psu.get_model() == 'MTEF-PSF-AC-C' + assert psu.get_serial() == 'MT1946X07684' + assert psu.get_revision() == 'A3' + + psu.vpd_parser.vpd_file = 'not exists' + assert psu.get_model() == 'N/A' + assert psu.get_serial() == 'N/A' + assert psu.get_revision() == 'N/A' + + psu.vpd_parser.vpd_file_last_mtime = None + psu.vpd_parser.vpd_file = os.path.join(test_path, 'mock_psu_vpd') + assert psu.get_model() == 'MTEF-PSF-AC-C' + assert psu.get_serial() == 'MT1946X07684' + assert psu.get_revision() == 'A3' diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index f85d1891ae7e..0ad9537430b9 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -16,126 +16,67 @@ # import os import sys -import pytest -from mock import MagicMock -from .mock_platform import MockFan +if sys.version_info.major == 3: + from unittest import mock +else: + import mock test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) sys.path.insert(0, modules_path) -os.environ["PLATFORM_API_UNIT_TESTING"] = "1" - -from sonic_py_common import device_info from sonic_platform.sfp import SFP, SX_PORT_MODULE_STATUS_INITIALIZING, SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_UNPLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED - from sonic_platform.chassis import Chassis +class TestSfp: + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_linecard_count', mock.MagicMock(return_value=8)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_linecard_max_port_count') + def test_sfp_index(self, mock_max_port): + sfp = SFP(0) + assert sfp.sdk_index == 0 + assert sfp.index == 1 -def mock_get_platform(): - return 'x86_64-mlnx_msn2410-r0' - - -def mock_read_eeprom_specific_bytes(self, offset, num_bytes): - return None - - -def mock_get_sdk_handle(self): - if not self.sdk_handle: - self.sdk_handle = 1 - return self.sdk_handle - - -def mock_get_sfp_error_code(self): - return self.oper_code, self.error_code - - -device_info.get_platform = mock_get_platform -SFP._read_eeprom_specific_bytes = mock_read_eeprom_specific_bytes -SFP._get_error_code = mock_get_sfp_error_code -Chassis.get_sdk_handle = mock_get_sdk_handle - + mock_max_port.return_value = 16 + sfp = SFP(sfp_index=0, slot_id=1, linecard_port_count=16, lc_name='LINE-CARD1') + assert sfp.sdk_index == 0 + assert sfp.index == 1 -def test_sfp_partial_and_then_full_initialize(): - """ - Verify SFP initialization flow (partial and then full): - 1. get_sfp to tirgger a partial initialization - 2. get_sfp for another SPF module and verify the partial initialization isn't executed again - 3. get_all_sfps to trigger a full initialization - """ - chassis = Chassis() + sfp = SFP(sfp_index=5, slot_id=3, linecard_port_count=16, lc_name='LINE-CARD1') + assert sfp.sdk_index == 5 + assert sfp.index == 38 - # Fetch a sfp - # This should trigger SFP modules be partial initialized - sfp1 = chassis.get_sfp(1) - # Verify the SFP list has been created - assert len(chassis._sfp_list) == chassis.PORT_END + 1 - assert chassis.sfp_module_partial_initialized == True - assert chassis.sfp_module_full_initialized == False + sfp = SFP(sfp_index=1, slot_id=1, linecard_port_count=4, lc_name='LINE-CARD1') + assert sfp.sdk_index == 1 + assert sfp.index == 5 - # Fetch another SFP module - sfp2 = chassis.get_sfp(2) - # Verify the previous SFP module isn't changed - assert sfp1 == chassis.get_sfp(1) + @mock.patch('sonic_platform.sfp.SFP._read_eeprom_specific_bytes', mock.MagicMock(return_value=None)) + @mock.patch('sonic_platform.sfp.SFP._get_error_code') + @mock.patch('sonic_platform.chassis.Chassis.get_num_sfps', mock.MagicMock(return_value=2)) + def test_sfp_get_error_status(self, mock_get_error_code): + chassis = Chassis() - # Fetch all SFP modules - allsfp = chassis.get_all_sfps() - # Verify sfp1 and sfp2 aren't changed - assert sfp1 == chassis.get_sfp(1) - assert sfp2 == chassis.get_sfp(2) - # Verify the SFP has been fully initialized - assert chassis.sfp_module_partial_initialized == True - assert chassis.sfp_module_full_initialized == True + # Fetch an SFP module to test + sfp = chassis.get_sfp(1) + description_dict = sfp._get_error_description_dict() + for error in description_dict.keys(): + mock_get_error_code.return_value = (SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, error) + description = sfp.get_error_description() -def test_sfp_full_initialize_without_partial(): - """ - Verify SFP initialization flow (full): - 1. get_all_sfps to trigger a full initialization - 2. get_sfp for a certain SFP module and verify the partial initialization isn't executed again - """ - chassis = Chassis() + assert description == description_dict[error] - # Fetch all SFP modules - allsfp = chassis.get_all_sfps() - # Verify the SFP has been fully initialized - assert chassis.sfp_module_partial_initialized == True - assert chassis.sfp_module_full_initialized == True - for sfp in allsfp: - assert sfp is not None - - # Verify when get_sfp is called, the SFP modules won't be initialized again - sfp1 = allsfp[0] - assert sfp1 == chassis.get_sfp(1) - - -def test_sfp_get_error_status(): - chassis = Chassis() - - # Fetch an SFP module to test - sfp = chassis.get_sfp(1) - - description_dict = sfp._get_error_description_dict() - - sfp.oper_code = SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR - for error in description_dict.keys(): - sfp.error_code = error + mock_get_error_code.return_value = (SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, -1) description = sfp.get_error_description() - - assert description == description_dict[sfp.error_code] - - sfp.error_code = -1 - description = sfp.get_error_description() - assert description == "Unknown error (-1)" - - expected_description_list = [ - (SX_PORT_MODULE_STATUS_INITIALIZING, "Initializing"), - (SX_PORT_MODULE_STATUS_PLUGGED, "OK"), - (SX_PORT_MODULE_STATUS_UNPLUGGED, "Unplugged"), - (SX_PORT_MODULE_STATUS_PLUGGED_DISABLED, "Disabled") - ] - for oper_code, expected_description in expected_description_list: - sfp.oper_code = oper_code - description = sfp.get_error_description() - - assert description == expected_description + assert description == "Unknown error (-1)" + + expected_description_list = [ + (SX_PORT_MODULE_STATUS_INITIALIZING, "Initializing"), + (SX_PORT_MODULE_STATUS_PLUGGED, "OK"), + (SX_PORT_MODULE_STATUS_UNPLUGGED, "Unplugged"), + (SX_PORT_MODULE_STATUS_PLUGGED_DISABLED, "Disabled") + ] + for oper_code, expected_description in expected_description_list: + mock_get_error_code.return_value = (oper_code, -1) + description = sfp.get_error_description() + + assert description == expected_description diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py index 461b2417e598..ef4820ecfd8f 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py @@ -15,10 +15,9 @@ # limitations under the License. # import os -import select import sys -from mock import MagicMock +from mock import MagicMock, patch test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) @@ -30,8 +29,8 @@ class TestSfpEvent(object): @classmethod def setup_class(cls): os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1" - select.select = MagicMock(return_value=([99], None, None)) + @patch('select.select', MagicMock(return_value=([99], None, None))) def test_check_sfp_status(self): from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal.py new file mode 100644 index 000000000000..a3e90d7c56cd --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal.py @@ -0,0 +1,239 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import glob +import os +import sys +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform.chassis import Chassis +from sonic_platform.device_data import DeviceDataManager + + +class TestThermal: + def test_chassis_thermal(self): + from sonic_platform.thermal import THERMAL_NAMING_RULE + os.path.exists = mock.MagicMock(return_value=True) + DeviceDataManager.get_gearbox_count = mock.MagicMock(return_value=2) + DeviceDataManager.get_cpu_thermal_count = mock.MagicMock(return_value=2) + DeviceDataManager.get_platform_name = mock.MagicMock(return_value='x86_64-mlnx_msn2700-r0') + chassis = Chassis() + thermal_list = chassis.get_all_thermals() + assert thermal_list + thermal_dict = {thermal.get_name(): thermal for thermal in thermal_list} + gearbox_thermal_rule = None + cpu_thermal_rule = None + for rule in THERMAL_NAMING_RULE['chassis thermals']: + thermal_type = rule.get('type', 'single') + if thermal_type == 'single': + thermal_name = rule['name'] + if rule['temperature'] == 'comex_amb': + assert thermal_name not in thermal_dict + continue + assert thermal_name in thermal_dict + thermal = thermal_dict[thermal_name] + assert rule['temperature'] in thermal.temperature + assert rule['high_threshold'] in thermal.high_threshold if 'high_threshold' in rule else thermal.high_threshold is None + assert rule['high_critical_threshold'] in thermal.high_critical_threshold if 'high_critical_threshold' in rule else thermal.high_critical_threshold is None + else: + if 'Gearbox' in rule['name']: + gearbox_thermal_rule = rule + elif 'CPU Core' in rule['name']: + cpu_thermal_rule = rule + + gearbox_thermal_count = 0 + cpu_thermal_count = 0 + for thermal in thermal_list: + if 'Gearbox' in thermal.get_name(): + start_index = gearbox_thermal_rule.get('start_index', 1) + start_index += gearbox_thermal_count + assert thermal.get_name() == gearbox_thermal_rule['name'].format(start_index) + assert gearbox_thermal_rule['temperature'].format(start_index) in thermal.temperature + assert gearbox_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold + assert gearbox_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold + gearbox_thermal_count += 1 + elif 'CPU Core' in thermal.get_name(): + start_index = cpu_thermal_rule.get('start_index', 1) + start_index += cpu_thermal_count + assert thermal.get_name() == cpu_thermal_rule['name'].format(start_index) + assert cpu_thermal_rule['temperature'].format(start_index) in thermal.temperature + assert cpu_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold + assert cpu_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold + cpu_thermal_count += 1 + + assert gearbox_thermal_count == 2 + assert cpu_thermal_count == 2 + + def test_psu_thermal(self): + from sonic_platform.thermal import initialize_psu_thermal, THERMAL_NAMING_RULE + os.path.exists = mock.MagicMock(return_value=True) + presence_cb = mock.MagicMock(return_value=(True, '')) + thermal_list = initialize_psu_thermal(0, presence_cb) + assert len(thermal_list) == 1 + thermal = thermal_list[0] + rule = THERMAL_NAMING_RULE['psu thermals'] + start_index = rule.get('start_index', 1) + assert thermal.get_name() == rule['name'].format(start_index) + assert rule['temperature'].format(start_index) in thermal.temperature + assert rule['high_threshold'].format(start_index) in thermal.high_threshold + assert thermal.high_critical_threshold is None + assert thermal.get_position_in_parent() == 1 + assert thermal.is_replaceable() == False + + presence_cb = mock.MagicMock(return_value=(False, 'Not present')) + thermal_list = initialize_psu_thermal(0, presence_cb) + assert len(thermal_list) == 1 + thermal = thermal_list[0] + assert thermal.get_temperature() is None + assert thermal.get_high_threshold() is None + assert thermal.get_high_critical_threshold() is None + + def test_sfp_thermal(self): + from sonic_platform.thermal import initialize_sfp_thermal, THERMAL_NAMING_RULE + os.path.exists = mock.MagicMock(return_value=True) + thermal_list = initialize_sfp_thermal(0) + assert len(thermal_list) == 1 + thermal = thermal_list[0] + rule = THERMAL_NAMING_RULE['sfp thermals'] + start_index = rule.get('start_index', 1) + assert thermal.get_name() == rule['name'].format(start_index) + assert rule['temperature'].format(start_index) in thermal.temperature + assert rule['high_threshold'].format(start_index) in thermal.high_threshold + assert rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold + assert thermal.get_position_in_parent() == 1 + assert thermal.is_replaceable() == False + + def test_get_temperature(self): + from sonic_platform.thermal import Thermal + from sonic_platform import utils + thermal = Thermal('test', 'temp_file', None, None, 1) + utils.read_float_from_file = mock.MagicMock(return_value=35727) + assert thermal.get_temperature() == 35.727 + + utils.read_float_from_file = mock.MagicMock(return_value=0.0) + assert thermal.get_temperature() is None + + utils.read_float_from_file = mock.MagicMock(return_value=None) + assert thermal.get_temperature() is None + + def test_get_high_threshold(self): + from sonic_platform.thermal import Thermal + from sonic_platform import utils + thermal = Thermal('test', None, None, None, 1) + assert thermal.get_high_threshold() is None + + thermal.high_threshold = 'high_th_file' + utils.read_float_from_file = mock.MagicMock(return_value=25833) + assert thermal.get_temperature() == 25.833 + + utils.read_float_from_file = mock.MagicMock(return_value=0.0) + assert thermal.get_temperature() is None + + utils.read_float_from_file = mock.MagicMock(return_value=None) + assert thermal.get_temperature() is None + + def test_get_high_critical_threshold(self): + from sonic_platform.thermal import Thermal + from sonic_platform import utils + thermal = Thermal('test', None, None, None, 1) + assert thermal.get_high_critical_threshold() is None + + thermal.high_critical_threshold = 'high_th_file' + utils.read_float_from_file = mock.MagicMock(return_value=120839) + assert thermal.get_high_critical_threshold() == 120.839 + + utils.read_float_from_file = mock.MagicMock(return_value=0.0) + assert thermal.get_high_critical_threshold() is None + + utils.read_float_from_file = mock.MagicMock(return_value=None) + assert thermal.get_high_critical_threshold() is None + + def test_set_thermal_algorithm_status(self): + from sonic_platform.thermal import Thermal, THERMAL_ZONE_FOLDER_WILDCARD, THERMAL_ZONE_POLICY_FILE, THERMAL_ZONE_MODE_FILE + from sonic_platform import utils + glob.iglob = mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2']) + utils.write_file = mock.MagicMock() + assert Thermal.set_thermal_algorithm_status(True, False) + + for folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD): + utils.write_file.assert_any_call(os.path.join(folder, THERMAL_ZONE_POLICY_FILE), 'step_wise') + utils.write_file.assert_any_call(os.path.join(folder, THERMAL_ZONE_MODE_FILE), 'enabled') + + assert Thermal.set_thermal_algorithm_status(False, False) + for folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD): + utils.write_file.assert_any_call(os.path.join(folder, THERMAL_ZONE_POLICY_FILE), 'user_space') + utils.write_file.assert_any_call(os.path.join(folder, THERMAL_ZONE_MODE_FILE), 'disabled') + + assert not Thermal.set_thermal_algorithm_status(False, False) + + assert Thermal.set_thermal_algorithm_status(False) + + def test_check_thermal_zone_temperature(self): + from sonic_platform.thermal import Thermal, THERMAL_ZONE_FOLDER_WILDCARD, THERMAL_ZONE_THRESHOLD_FILE, THERMAL_ZONE_TEMP_FILE + from sonic_platform import utils + glob.iglob = mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2']) + + utils.read_int_from_file = mock.MagicMock(side_effect=Exception('')) + assert not Thermal.check_thermal_zone_temperature() + + mock_file_content = {} + def mock_read_int_from_file(file_path, default=0, raise_exception=False): + return mock_file_content[file_path] + + utils.read_int_from_file = mock_read_int_from_file + mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_THRESHOLD_FILE)] = 25 + mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 30 + mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_THRESHOLD_FILE)] = 25 + mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 24 + assert not Thermal.check_thermal_zone_temperature() + + mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 24 + assert Thermal.check_thermal_zone_temperature() + + def test_check_module_temperature_trustable(self): + from sonic_platform.thermal import Thermal + from sonic_platform import utils + glob.iglob = mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2']) + + utils.read_int_from_file = mock.MagicMock(return_value=1) + assert Thermal.check_module_temperature_trustable() == 'untrust' + + utils.read_int_from_file = mock.MagicMock(return_value=0) + assert Thermal.check_module_temperature_trustable() == 'trust' + + def test_get_min_amb_temperature(self): + from sonic_platform.thermal import Thermal, MAX_AMBIENT_TEMP, CHASSIS_THERMAL_SYSFS_FOLDER + from sonic_platform import utils + + utils.read_int_from_file = mock.MagicMock(side_effect=Exception('')) + assert Thermal.get_min_amb_temperature() == MAX_AMBIENT_TEMP + + mock_file_content = {} + def mock_read_int_from_file(file_path, default=0, raise_exception=False): + return mock_file_content[file_path] + + utils.read_int_from_file = mock_read_int_from_file + mock_file_content[os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'fan_amb')] = 50 + mock_file_content[os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'port_amb')] = 40 + assert Thermal.get_min_amb_temperature() == 40 diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py index 27f512f048eb..21189d9defdd 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py @@ -29,9 +29,20 @@ from sonic_platform.thermal_infos import FanInfo, PsuInfo from sonic_platform.fan import Fan from sonic_platform.thermal import Thermal +from sonic_platform.device_data import DeviceDataManager -Thermal.check_thermal_zone_temperature = MagicMock() -Thermal.set_thermal_algorithm_status = MagicMock() + +@pytest.fixture(scope='module', autouse=True) +def configure_mocks(): + check_thermal_zone_temperature = Thermal.check_thermal_zone_temperature + set_thermal_algorithm_status = Thermal.set_thermal_algorithm_status + Thermal.check_thermal_zone_temperature = MagicMock() + Thermal.set_thermal_algorithm_status = MagicMock() + + yield + + Thermal.check_thermal_zone_temperature = check_thermal_zone_temperature + Thermal.set_thermal_algorithm_status = set_thermal_algorithm_status @pytest.fixture(scope='session', autouse=True) @@ -489,7 +500,7 @@ def check_minimum_table_data(platform, minimum_table): def test_dynamic_minimum_policy(thermal_manager): from sonic_platform.thermal_conditions import MinCoolingLevelChangeCondition from sonic_platform.thermal_actions import ChangeMinCoolingLevelAction - from sonic_platform.thermal_infos import ChassisInfo + from sonic_platform.thermal_infos import ChassisInfo, FanInfo from sonic_platform.thermal import Thermal from sonic_platform.fan import Fan ThermalManager.initialize() @@ -516,10 +527,15 @@ def test_dynamic_minimum_policy(thermal_manager): assert MinCoolingLevelChangeCondition.temperature == 25 chassis = MockChassis() - chassis.platform_name = 'invalid' info = ChassisInfo() info._chassis = chassis - thermal_info_dict = {ChassisInfo.INFO_NAME: info} + fan_info = FanInfo() + + thermal_info_dict = { + ChassisInfo.INFO_NAME: info, + FanInfo.INFO_NAME: fan_info + } + DeviceDataManager.get_platform_name = MagicMock(return_value=None) Fan.get_cooling_level = MagicMock(return_value=5) Fan.set_cooling_level = MagicMock() action.execute(thermal_info_dict) @@ -527,7 +543,8 @@ def test_dynamic_minimum_policy(thermal_manager): Fan.set_cooling_level.assert_called_with(6, 6) Fan.set_cooling_level.call_count = 0 - chassis.platform_name = 'x86_64-mlnx_msn2700-r0' + DeviceDataManager.get_platform_name = MagicMock(return_value='x86_64-mlnx_msn2700-r0') + print('Before execute') action.execute(thermal_info_dict) assert Fan.min_cooling_level == 3 Fan.set_cooling_level.assert_called_with(3, 5) diff --git a/platform/mellanox/mlnx-platform-api/tests/test_utils.py b/platform/mellanox/mlnx-platform-api/tests/test_utils.py new file mode 100644 index 000000000000..7da17dc5e7bc --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_utils.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import pytest +import sys +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import utils + + +class TestUtils: + def test_read_file(self): + ret = utils.read_str_from_file('not exist', 'default return') + assert ret == 'default return' + + with pytest.raises(IOError): + ret = utils.read_str_from_file('not exist', 'default return', raise_exception=True) + assert ret == 'default return' + + ret = utils.read_int_from_file('not exist', 100) + assert ret == 100 + + with pytest.raises(IOError): + ret = utils.read_int_from_file('not exist', 200, raise_exception=True) + assert ret == 200 + + ret = utils.read_float_from_file('not exist', 3.14) + assert ret == 3.14 + + with pytest.raises(IOError): + ret = utils.read_float_from_file('not exist', 2.25, raise_exception=True) + assert ret == 2.25 + + def test_write_file(self): + file_path = '/tmp/test.txt' + utils.write_file(file_path, ' hello ') + assert utils.read_str_from_file(file_path) == 'hello' + + utils.write_file(file_path, '123 ') + assert utils.read_int_from_file(file_path) == 123 + + utils.write_file(file_path, '3.14 ') + assert utils.read_float_from_file(file_path) == 3.14 + + with pytest.raises(IOError): + utils.write_file('/not/exist/file', '123', raise_exception=True) + + def test_pre_initialize(self): + mock_call = mock.MagicMock() + + class A: + @utils.pre_initialize(mock_call) + def func(self): + pass + + A().func() + assert mock_call.call_count == 1 + + def test_pre_initialize_one(self): + mock_call = mock.MagicMock() + + class A: + @utils.pre_initialize_one(mock_call) + def func(self, index): + pass + + a = A() + a.func(34) + mock_call.assert_called_once_with(a, 34) + + def test_read_only_cache(self): + value = 100 + + def func(): + return value + + assert func() == 100 + value = 1000 + assert func() == 1000 + + @utils.read_only_cache() + def func(): + return value + + assert func() == 1000 + value = 10000 + assert func() == 1000 + + @mock.patch('sonic_py_common.logger.Logger.log_debug') + def test_default_return(self, mock_log): + @utils.default_return(100, log_func=mock_log) + def func(): + raise RuntimeError('') + + assert func() == 100 + assert mock_log.call_count == 1