Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add failure handling for SAI get operations #1768

Merged
merged 12 commits into from
Jul 7, 2021
6 changes: 5 additions & 1 deletion orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
@@ -2309,7 +2309,11 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
else
{
SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status);
throw "AclOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "AclOrch initialization failure";
}
}

queryAclActionCapability();
6 changes: 5 additions & 1 deletion orchagent/copporch.cpp
Original file line number Diff line number Diff line change
@@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status);
throw "CoppOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "CoppOrch initialization failure";
}
}

SWSS_LOG_INFO("Get default trap group");
12 changes: 10 additions & 2 deletions orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
@@ -488,7 +488,11 @@ void CrmOrch::getResAvailableCounters()
break;
}
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32;
@@ -517,7 +521,11 @@ void CrmOrch::getResAvailableCounters()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

for (uint32_t i = 0; i < attr.value.aclresource.count; i++)
36 changes: 30 additions & 6 deletions orchagent/fabricportsorch.cpp
Original file line number Diff line number Diff line change
@@ -88,7 +88,11 @@ int FabricPortsOrch::getFabricPortList()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status);
return FABRIC_PORT_ERROR;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
return FABRIC_PORT_ERROR;
}
}
m_fabricPortCount = attr.value.u32;
SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount);
@@ -101,7 +105,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port list failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port list failure");
}
}

for (i = 0; i < m_fabricPortCount; i++)
@@ -113,7 +121,11 @@ int FabricPortsOrch::getFabricPortList()
status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get port lane failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get port lane failure");
}
}
int lane = attr.value.u32list.list[0];
m_fabricLanePortMap[lane] = fabric_port_list[i];
@@ -198,7 +210,11 @@ void FabricPortsOrch::updateFabricPortState()
{
// Port may not be ready for query
SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (m_portStatus.find(lane) != m_portStatus.end() &&
@@ -215,15 +231,23 @@ void FabricPortsOrch::updateFabricPortState()
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote id failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote id failure");
}
}
remote_peer = attr.value.u32;

attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX;
status = sai_port_api->get_port_attribute(port, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("FabricPortsOrch get remote port index failure");
}
}
remote_port = attr.value.u32;
}
6 changes: 5 additions & 1 deletion orchagent/fdborch.cpp
Original file line number Diff line number Diff line change
@@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port)
{
SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d",
mac.to_string().c_str(), status);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port))
10 changes: 7 additions & 3 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
@@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
{
SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d",
nextHops.to_string().c_str(), status);
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status);
if (handle_status != task_process_status::task_success)
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
}
return false;
}
return false;
}
fgNhgEntry->real_bucket_size = nhg_attr.value.u32;
}
15 changes: 10 additions & 5 deletions orchagent/macsecorch.cpp
Original file line number Diff line number Diff line change
@@ -854,15 +854,20 @@ bool MACsecOrch::initMACsecObject(sai_object_id_t switch_id)
attrs.clear();
attr.id = SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL;
attrs.push_back(attr);
if (sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data()) != SAI_STATUS_SUCCESS)
status = sai_macsec_api->get_macsec_attribute(
macsec_obj.first->second.m_ingress_id,
static_cast<uint32_t>(attrs.size()),
attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_WARN(
"Cannot get MACsec attribution SAI_MACSEC_ATTR_SCI_IN_INGRESS_MACSEC_ACL at the switch 0x%" PRIx64,
switch_id);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_MACSEC, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}
macsec_obj.first->second.m_sci_in_ingress_macsec_acl = attrs.front().value.booldata;

6 changes: 5 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
@@ -1547,7 +1547,11 @@ void NeighOrch::voqSyncAddNeigh(string &alias, IpAddress &ip_address, const MacA
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get neighbor attribute for %s on %s, rv:%d", ip_address.to_string().c_str(), alias.c_str(), status);
return;
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEIGHBOR, status);
if (handle_status != task_process_status::task_success)
{
return;
}
}

if (!attr.value.u32)
29 changes: 29 additions & 0 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
@@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat
return task_need_retry;
}

task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis get
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: task_success - Handled the status successfully. No need to retry this SAI operation.
* task_need_retry - Cannot handle the status. Need to retry the SAI operation.
* task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure.
* TODO: 1. Add general handling logic for specific statuses
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
return task_success;
case SAI_STATUS_NOT_IMPLEMENTED:
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
sai_serialize_api(api).c_str());
throw std::logic_error("SAI get function not implemented");
default:
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
}
return task_failed;
}

bool Orch::parseHandleSaiStatusFailure(task_process_status status)
{
/*
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
@@ -240,6 +240,7 @@ class Orch
virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);
private:
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);
Loading