Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Voq chassis orchagent crash with 34K routes #3329

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1750,6 +1750,15 @@ void NeighOrch::doVoqSystemNeighTask(Consumer &consumer)
it++;
continue;
}
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
NextHopKey nexthop = { ip_address, ibif.m_alias};
if (hasNextHop(nexthop))
{
it++;
continue;
}
}

if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end() ||
m_syncdNeighbors[neighbor_entry].mac != mac_address ||
Expand Down Expand Up @@ -2251,4 +2260,4 @@ bool NeighOrch::ifChangeInformRemoteNextHop(const string &alias, bool if_up)
}
}
return rc;
}
}
127 changes: 127 additions & 0 deletions tests/test_virtual_chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,133 @@ def test_remote_port_down(self, vct):

# Cleanup inband if configuration
self.del_inbandif_port(vct, inband_port)
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_2, test_neigh_mac_2, test_neigh_dev_2)


def test_remote_neighbor_add(self, vct):
# test params
local_lc_switch_id = '0'
remote_lc_switch_id = '2'
test_prefix = "14.14.0.0/16"
inband_port = "Ethernet0"
test_neigh_ip_1 = "10.8.104.50"
test_neigh_dev_1 = "Ethernet4"
test_neigh_mac_1 = "00:09:03:04:05:06"
test_neigh_dev_2 = "Ethernet8"

local_lc_dvs = self.get_lc_dvs(vct, local_lc_switch_id)
remote_lc_dvs = self.get_lc_dvs(vct, remote_lc_switch_id)

# config inband port
self.config_inbandif_port(vct, inband_port)

# add neighbor
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_1)

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break

assert remote_neigh != "", "Remote neigh not found in ASIC_DB"

# Preserve remote neigh asic db neigh key for delete verification later
test_remote_neigh_asic_db_key = remote_neigh

asic_db = remote_lc_dvs.get_asic_db()
nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"

nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
ip = nexthop_entry.get("SAI_NEXT_HOP_ATTR_IP")
assert ip != "", "Ip address not found for nexthop entry in asic db"
rif1 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")


# add route of LC1(pretend learnt via bgp)
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route add {test_prefix} nexthop via {test_neigh_ip_1}"])
assert res == "", "Error configuring route"
time.sleep(5)

# del neighbor on first port and add it on second port
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_1)
time.sleep(5)
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_2)

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break

assert remote_neigh != "", "Remote neigh not found in ASIC_DB"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the remote neigh entry will be present. Should we check if the remote neighbor entry is still old neighbor? also should we add a check to see if the nexthop is not updated because of addneighbor?


nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"
nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
print("2:nexthop_entrty:",nexthop_entry)
rif2 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")
assert rif1 == rif2, "Neighbor is already replaced with new rif"

#del the route
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route del {test_prefix} nexthop via {test_neigh_ip_1} "])
assert res == "", "Error configuring route"

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break
assert remote_neigh != "", "Remote neigh not found in ASIC_DB"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add a check to see we have new nexthop for new neighbor?


nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"
nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
print("3:nexthop_entrty:",nexthop_entry)
rif3 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")
assert rif1 != rif3, "Neighbor is not replaced with new rif"

#del the neighbor
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_2)
time.sleep(10)
asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 0)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys) == 0, "Neigh entries still in ASIC_DB"

nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 0)
assert len(nexthop_keys) == 0, "Nexthop entries in still ASIC_DB"

# Cleanup inband if configuration
self.del_inbandif_port(vct, inband_port)

def test_voq_drop_counters(self, vct):
"""Test VOQ switch drop counters.
Expand Down
Loading