diff --git a/orchagent/aclorch.cpp b/orchagent/aclorch.cpp index 116d315f7f4..84e7a12b69a 100644 --- a/orchagent/aclorch.cpp +++ b/orchagent/aclorch.cpp @@ -2311,7 +2311,11 @@ void AclOrch::init(vector& connectors, PortsOrch *portOrch, Mirr else { SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status); - throw "AclOrch initialization failure"; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw "AclOrch initialization failure"; + } } queryAclActionCapability(); diff --git a/orchagent/copporch.cpp b/orchagent/copporch.cpp index 403fcb98d91..34d83dd2746 100644 --- a/orchagent/copporch.cpp +++ b/orchagent/copporch.cpp @@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status); - throw "CoppOrch initialization failure"; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw "CoppOrch initialization failure"; + } } SWSS_LOG_INFO("Get default trap group"); diff --git a/orchagent/crmorch.cpp b/orchagent/crmorch.cpp index 659d35fb728..829d7a9ac6b 100644 --- a/orchagent/crmorch.cpp +++ b/orchagent/crmorch.cpp @@ -474,7 +474,11 @@ void CrmOrch::getResAvailableCounters() break; } SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status); - break; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + break; + } } res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32; @@ -500,7 +504,11 @@ void CrmOrch::getResAvailableCounters() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status); - break; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + break; + } } for (uint32_t i = 0; i < attr.value.aclresource.count; i++) diff --git a/orchagent/fdborch.cpp b/orchagent/fdborch.cpp index cd4888bc1cc..ae432773905 100644 --- a/orchagent/fdborch.cpp +++ b/orchagent/fdborch.cpp @@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port) { SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d", mac.to_string().c_str(), status); - return false; + task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status); + if (handle_status != task_process_status::task_success) + { + return false; + } } if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port)) diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp index de791678ac0..4111665e09d 100644 --- a/orchagent/fgnhgorch.cpp +++ b/orchagent/fgnhgorch.cpp @@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout { SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d", nextHops.to_string().c_str(), status); - if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry)) + task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status); + if (handle_status != task_process_status::task_success) { - SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure"); + if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry)) + { + SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure"); + } + return false; } - return false; } fgNhgEntry->real_bucket_size = nhg_attr.value.u32; } diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index d6d1eb6840a..44a21ac7372 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat return task_need_retry; } +task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context) +{ + /* + * This function aims to provide coarse handling of failures in sairedis get + * operation (i.e., notify users by throwing excepions when failures happen). + * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. + * task_need_retry - Cannot handle the status. Need to retry the SAI operation. + * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. + * TODO: 1. Add general handling logic for specific statuses + * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling + * in each orch. + * 3. Take the type of sai api into consideration. + */ + switch (status) + { + case SAI_STATUS_SUCCESS: + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus"); + return task_success; + case SAI_STATUS_NOT_IMPLEMENTED: + SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s", + sai_serialize_api(api).c_str()); + throw std::logic_error("SAI get function not implemented"); + default: + SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + } + return task_failed; +} + bool Orch::parseHandleSaiStatusFailure(task_process_status status) { /* diff --git a/orchagent/orch.h b/orchagent/orch.h index b61cdb53e2b..766d02c7661 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -240,6 +240,7 @@ class Orch virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr); virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr); + virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); bool parseHandleSaiStatusFailure(task_process_status status); private: void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name); diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 049b80fda9a..31076d70ead 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -328,7 +328,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get CPU port, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } m_cpuPort = Port("CPU", Port::CPU); @@ -343,7 +347,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get port number, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } m_portCount = attr.value.u32; @@ -361,7 +369,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get port list, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } /* Get port hardware lane info */ @@ -376,7 +388,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get hardware lane list pid:%" PRIx64, port_list[i]); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } set tmp_lane_set; @@ -407,7 +423,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get default 1Q bridge and/or default VLAN, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } m_default1QBridge = attrs[0].value.oid; @@ -437,7 +457,11 @@ void PortsOrch::removeDefaultVlanMembers() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get VLAN member list in default VLAN, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_VLAN, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } /* Remove VLAN members in default VLAN */ @@ -471,7 +495,11 @@ void PortsOrch::removeDefaultBridgePorts() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get bridge port list in default 1Q bridge, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } auto bridge_port_count = attr.value.objlist.count; @@ -486,7 +514,11 @@ void PortsOrch::removeDefaultBridgePorts() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get bridge port type, rv:%d", status); - throw runtime_error("PortsOrch initialization failure"); + task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure"); + } } if (attr.value.s32 == SAI_BRIDGE_PORT_TYPE_PORT) { @@ -880,7 +912,11 @@ bool PortsOrch::getPortAdminStatus(sai_object_id_t id, bool &up) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get admin status for port pid:%" PRIx64, id); - return false; + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + return false; + } } up = attr.value.booldata; @@ -1805,9 +1841,19 @@ bool PortsOrch::getPortSpeed(sai_object_id_t id, sai_uint32_t &speed) status = sai_port_api->get_port_attribute(id, 1, &attr); if (status == SAI_STATUS_SUCCESS) + { speed = attr.value.u32; + } + else + { + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + return false; + } + } - return status == SAI_STATUS_SUCCESS; + return true; } bool PortsOrch::setPortAdvSpeed(sai_object_id_t port_id, sai_uint32_t speed) @@ -1847,7 +1893,11 @@ bool PortsOrch::getQueueTypeAndIndex(sai_object_id_t queue_id, string &type, uin if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get queue type and index for queue %" PRIu64 " rv:%d", queue_id, status); - return false; + task_process_status handle_status = handleSaiGetStatus(SAI_API_QUEUE, status); + if (handle_status != task_process_status::task_success) + { + return false; + } } switch (attr[0].value.s32) @@ -3473,7 +3523,11 @@ void PortsOrch::initializeQueues(Port &port) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get number of queues for port %s rv:%d", port.m_alias.c_str(), status); - throw runtime_error("PortsOrch initialization failure."); + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure."); + } } SWSS_LOG_INFO("Get %d queues for port %s", attr.value.u32, port.m_alias.c_str()); @@ -3493,7 +3547,11 @@ void PortsOrch::initializeQueues(Port &port) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get queue list for port %s rv:%d", port.m_alias.c_str(), status); - throw runtime_error("PortsOrch initialization failure."); + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure."); + } } SWSS_LOG_INFO("Get queues for port %s", port.m_alias.c_str()); @@ -3509,7 +3567,11 @@ void PortsOrch::initializePriorityGroups(Port &port) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get number of priority groups for port %s rv:%d", port.m_alias.c_str(), status); - throw runtime_error("PortsOrch initialization failure."); + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure."); + } } SWSS_LOG_INFO("Get %d priority groups for port %s", attr.value.u32, port.m_alias.c_str()); @@ -3530,7 +3592,11 @@ void PortsOrch::initializePriorityGroups(Port &port) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Fail to get priority group list for port %s rv:%d", port.m_alias.c_str(), status); - throw runtime_error("PortsOrch initialization failure."); + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + throw runtime_error("PortsOrch initialization failure."); + } } SWSS_LOG_INFO("Get priority groups for port %s", port.m_alias.c_str()); } @@ -4844,7 +4910,11 @@ bool PortsOrch::setPortSerdesAttribute(sai_object_id_t port_id, { SWSS_LOG_ERROR("Failed to get port attr serdes id %d to port pid:0x%" PRIx64, port_attr.id, port_id); - return false; + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status); + if (handle_status != task_process_status::task_success) + { + return false; + } } if (port_attr.value.oid != SAI_NULL_OBJECT_ID) diff --git a/orchagent/qosorch.cpp b/orchagent/qosorch.cpp index 31e61b5433e..c2e15aa7631 100644 --- a/orchagent/qosorch.cpp +++ b/orchagent/qosorch.cpp @@ -933,7 +933,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id if (SAI_STATUS_SUCCESS != sai_status) { SWSS_LOG_ERROR("Failed to get number of scheduler groups for port:%s", port.m_alias.c_str()); - return SAI_NULL_OBJECT_ID; + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status); + if (handle_status != task_process_status::task_success) + { + return SAI_NULL_OBJECT_ID; + } } /* Get total groups list on the port */ @@ -947,7 +951,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id if (SAI_STATUS_SUCCESS != sai_status) { SWSS_LOG_ERROR("Failed to get scheduler group list for port:%s", port.m_alias.c_str()); - return SAI_NULL_OBJECT_ID; + task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status); + if (handle_status != task_process_status::task_success) + { + return SAI_NULL_OBJECT_ID; + } } m_scheduler_group_port_info[port.m_port_id] = { @@ -969,7 +977,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id if (SAI_STATUS_SUCCESS != sai_status) { SWSS_LOG_ERROR("Failed to get child count for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str()); - return SAI_NULL_OBJECT_ID; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status); + if (handle_status != task_process_status::task_success) + { + return SAI_NULL_OBJECT_ID; + } } uint32_t child_count = attr.value.u32; @@ -988,7 +1000,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id if (SAI_STATUS_SUCCESS != sai_status) { SWSS_LOG_ERROR("Failed to get child list for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str()); - return SAI_NULL_OBJECT_ID; + task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status); + if (handle_status != task_process_status::task_success) + { + return SAI_NULL_OBJECT_ID; + } } m_scheduler_group_port_info[port.m_port_id].child_groups[ii] = std::move(child_groups);