Skip to content

Commit

Permalink
Add failure handling for SAI get operations (#1768)
Browse files Browse the repository at this point in the history
What I did
Add failure handling for SAI get operations. The function allows handling failures in SAI get operations according to the orch type, SAI type, SAI status.

Why I did it
Enable custom failure handling for SAI get operations.
  • Loading branch information
shi-su authored and judyjoseph committed Aug 10, 2021
1 parent 8e6a04e commit e99beb6
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 44 deletions.
6 changes: 5 additions & 1 deletion orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2309,7 +2309,11 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
else
{
SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status);
throw "AclOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "AclOrch initialization failure";
}
}

queryAclActionCapability();
Expand Down
6 changes: 5 additions & 1 deletion orchagent/copporch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status);
throw "CoppOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "CoppOrch initialization failure";
}
}

SWSS_LOG_INFO("Get default trap group");
Expand Down
12 changes: 10 additions & 2 deletions orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,11 @@ void CrmOrch::getResAvailableCounters()
break;
}
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32;
Expand Down Expand Up @@ -517,7 +521,11 @@ void CrmOrch::getResAvailableCounters()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

for (uint32_t i = 0; i < attr.value.aclresource.count; i++)
Expand Down
36 changes: 30 additions & 6 deletions orchagent/fabricportsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ int FabricPortsOrch::getFabricPortList()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status);
return FABRIC_PORT_ERROR;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
return FABRIC_PORT_ERROR;
}
}
m_fabricPortCount = attr.value.u32;
SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount);
Expand All @@ -101,7 +105,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port list failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port list failure");
}
}

for (i = 0; i < m_fabricPortCount; i++)
Expand All @@ -113,7 +121,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port lane failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port lane failure");
}
}
int lane = attr.value.u32list.list[0];
m_fabricLanePortMap[lane] = fabric_port_list[i];
Expand Down Expand Up @@ -198,7 +210,11 @@ void FabricPortsOrch::updateFabricPortState()
{
// Port may not be ready for query
SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (m_portStatus.find(lane) != m_portStatus.end() &&
Expand All @@ -215,15 +231,23 @@ void FabricPortsOrch::updateFabricPortState()
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote id failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote id failure");
}
}
remote_peer = attr.value.u32;

attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX;
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
}
}
remote_port = attr.value.u32;
}
Expand Down
6 changes: 5 additions & 1 deletion orchagent/fdborch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port)
{
SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d",
mac.to_string().c_str(), status);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port))
Expand Down
10 changes: 7 additions & 3 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
{
SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d",
nextHops.to_string().c_str(), status);
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status);
if (handle_status != task_process_status::task_success)
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
}
return false;
}
return false;
}
fgNhgEntry->real_bucket_size = nhg_attr.value.u32;
}
Expand Down
15 changes: 10 additions & 5 deletions orchagent/macsecorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -854,15 +854,20 @@ bool MACsecOrch::initMACsecObject(sai_object_id_t switch_id)
attrs.clear();
attr.id = SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL;
attrs.push_back(attr);
if (sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data()) != SAI_STATUS_SUCCESS)
status = sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_WARN(
"Cannot get MACsec attribution SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL at the switch 0x%" PRIx64,
switch_id);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_MACSEC, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}
macsec_obj.first->second.m_sci_in_ingress_macsec_acl = attrs.front().value.booldata;

Expand Down
6 changes: 5 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,11 @@ void NeighOrch::voqSyncAddNeigh(string &alias, IpAddress &ip_address, const MacA
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get neighbor attribute for %s on %s, rv:%d", ip_address.to_string().c_str(), alias.c_str(), status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEIGHBOR, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (!attr.value.u32)
Expand Down
29 changes: 29 additions & 0 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat
return task_need_retry;
}

task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis get
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: task_success - Handled the status successfully. No need to retry this SAI operation.
* task_need_retry - Cannot handle the status. Need to retry the SAI operation.
* task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure.
* TODO: 1. Add general handling logic for specific statuses
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
return task_success;
case SAI_STATUS_NOT_IMPLEMENTED:
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
sai_serialize_api(api).c_str());
throw std::logic_error("SAI get function not implemented");
default:
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
}
return task_failed;
}

bool Orch::parseHandleSaiStatusFailure(task_process_status status)
{
/*
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ class Orch
virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);
private:
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);
Expand Down
Loading

0 comments on commit e99beb6

Please sign in to comment.