From ab299201be9d37654e3ba88f9e7e180023896722 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Tue, 18 Oct 2022 19:33:00 +0200 Subject: [PATCH] [QoS] Support dynamic headroom calculation for Barefoot platforms (#2412) Signed-off-by: Mariusz Stachura What I did Adding the dynamic headroom calculation support for Barefoot platforms. Why I did it Enabling dynamic mode for barefoot case. How I verified it The community tests are adjusted and pass. --- cfgmgr/Makefile.am | 5 +- cfgmgr/buffer_check_headroom_barefoot.lua | 6 + cfgmgr/buffer_headroom_barefoot.lua | 147 ++++++++++++++++++++++ cfgmgr/buffer_pool_barefoot.lua | 30 +++++ cfgmgr/buffermgr.cpp | 2 +- 5 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 cfgmgr/buffer_check_headroom_barefoot.lua create mode 100644 cfgmgr/buffer_headroom_barefoot.lua create mode 100644 cfgmgr/buffer_pool_barefoot.lua diff --git a/cfgmgr/Makefile.am b/cfgmgr/Makefile.am index 64a57a6e583c..69cefc8052c2 100644 --- a/cfgmgr/Makefile.am +++ b/cfgmgr/Makefile.am @@ -15,7 +15,10 @@ dist_cfgmgr_DATA = \ buffer_pool_mellanox.lua \ buffer_check_headroom_vs.lua \ buffer_headroom_vs.lua \ - buffer_pool_vs.lua + buffer_pool_vs.lua \ + buffer_check_headroom_barefoot.lua \ + buffer_headroom_barefoot.lua \ + buffer_pool_barefoot.lua if DEBUG DBGFLAGS = -ggdb -DDEBUG diff --git a/cfgmgr/buffer_check_headroom_barefoot.lua b/cfgmgr/buffer_check_headroom_barefoot.lua new file mode 100644 index 000000000000..74551b1a426f --- /dev/null +++ b/cfgmgr/buffer_check_headroom_barefoot.lua @@ -0,0 +1,6 @@ +local ret = {} + +table.insert(ret, "result:true") +table.insert(ret, "debug:No need to check port headroom limit as shared headroom pool model is supported.") + +return ret diff --git a/cfgmgr/buffer_headroom_barefoot.lua b/cfgmgr/buffer_headroom_barefoot.lua new file mode 100644 index 000000000000..f5e61013b314 --- /dev/null +++ b/cfgmgr/buffer_headroom_barefoot.lua @@ -0,0 +1,147 @@ +-- KEYS - profile name +-- ARGV[1] - port speed +-- ARGV[2] - cable length +-- ARGV[3] - port mtu +-- ARGV[4] - gearbox delay + +-- Parameters retried from databases: +-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN +-- small packet percentage: the parameter which is used to control worst case regarding the cell utilization +-- mtu: the mtu of lossless packet +-- From STATE_DB.ASIC_TABLE: +-- cell size: cell_size of the ASIC +-- pipeline_latency: the latency (XON) +-- mac_phy_delay: the bytes held in the switch chip's egress pipeline and PHY when XOFF has been generated +-- peer_response_time: the bytes that are held in the peer switch's pipeline and will be send out when the XOFF packet is received + +local lossless_mtu +local small_packet_percentage +local cell_size +local pipeline_latency +local mac_phy_delay +local peer_response_time + +local port_speed = tonumber(ARGV[1]) +local cable_length = tonumber(string.sub(ARGV[2], 1, -2)) +local port_mtu = tonumber(ARGV[3]) +local gearbox_delay = tonumber(ARGV[4]) + +local config_db = "4" +local state_db = "6" + +local ret = {} + +-- Pause quanta should be taken for each operating speed is defined in IEEE 802.3 31B.3.7. +-- The key of table pause_quanta_per_speed is operating speed at Mb/s. +-- The value of table pause_quanta_per_speed is the number of pause_quanta. +local pause_quanta_per_speed = {} +pause_quanta_per_speed[400000] = 905 +pause_quanta_per_speed[200000] = 453 +pause_quanta_per_speed[100000] = 394 +pause_quanta_per_speed[50000] = 147 +pause_quanta_per_speed[40000] = 118 +pause_quanta_per_speed[25000] = 80 +pause_quanta_per_speed[10000] = 67 +pause_quanta_per_speed[1000] = 2 +pause_quanta_per_speed[100] = 1 + +-- Get pause_quanta from the pause_quanta_per_speed table +local pause_quanta = pause_quanta_per_speed[port_speed] + +if gearbox_delay == nil then + gearbox_delay = 0 +end + +-- Fetch ASIC info from ASIC table in STATE_DB +redis.call("SELECT", state_db) +local asic_keys = redis.call("KEYS", "ASIC_TABLE*") + +-- Only one key should exist +local asic_table_content = redis.call("HGETALL", asic_keys[1]) + +for i = 1, #asic_table_content, 2 do + if asic_table_content[i] == "cell_size" then + cell_size = tonumber(asic_table_content[i+1]) + end + if asic_table_content[i] == "pipeline_latency" then + pipeline_latency = tonumber(asic_table_content[i+1]) * 1024 + end + if asic_table_content[i] == "mac_phy_delay" then + mac_phy_delay = tonumber(asic_table_content[i+1]) * 1024 + end + -- If failed to get pause_quanta from the table, then use the default peer_response_time stored in state_db + if asic_table_content[i] == "peer_response_time" and pause_quanta == nil then + peer_response_time = tonumber(asic_table_content[i+1]) * 1024 + end +end + +-- Fetch lossless traffic info from CONFIG_DB +redis.call("SELECT", config_db) +local lossless_traffic_keys = redis.call("KEYS", "LOSSLESS_TRAFFIC_PATTERN*") + +-- Only one key should exist +local lossless_traffic_table_content = redis.call("HGETALL", lossless_traffic_keys[1]) +for i = 1, #lossless_traffic_table_content, 2 do + if lossless_traffic_table_content[i] == "mtu" then + lossless_mtu = tonumber(lossless_traffic_table_content[i+1]) + end + if lossless_traffic_table_content[i] == "small_packet_percentage" then + small_packet_percentage = tonumber(lossless_traffic_table_content[i+1]) + end +end + +-- Fetch the shared headroom pool size +local shp_size = tonumber(redis.call("HGET", "BUFFER_POOL|ingress_lossless_pool", "xoff")) + +-- Calculate the headroom information +local speed_of_light = 198000000 +local minimal_packet_size = 64 +local cell_occupancy +local worst_case_factor +local propagation_delay +local bytes_on_cable +local bytes_on_gearbox +local xoff_value +local xon_value +local headroom_size + +if cell_size > 2 * minimal_packet_size then + worst_case_factor = cell_size / minimal_packet_size +else + worst_case_factor = (2 * cell_size) / (1 + cell_size) +end + +cell_occupancy = (100 - small_packet_percentage + small_packet_percentage * worst_case_factor) / 100 + +if (gearbox_delay == 0) then + bytes_on_gearbox = 0 +else + bytes_on_gearbox = port_speed * gearbox_delay / (8 * 1024) +end + +-- If successfully get pause_quanta from the table, then calculate peer_response_time from it +if pause_quanta ~= nil then + peer_response_time = (pause_quanta) * 512 / 8 +end + +if port_speed == 400000 then + peer_response_time = 2 * peer_response_time +end + +bytes_on_cable = 2 * cable_length * port_speed * 1000000000 / speed_of_light / (8 * 1024) +propagation_delay = port_mtu + bytes_on_cable + 2 * bytes_on_gearbox + mac_phy_delay + peer_response_time + +-- Calculate the xoff and xon and then round up at 1024 bytes +xoff_value = lossless_mtu + propagation_delay * cell_occupancy +xoff_value = math.ceil(xoff_value / 1024) * 1024 +xon_value = pipeline_latency +xon_value = math.ceil(xon_value / 1024) * 1024 + +headroom_size = xon_value +headroom_size = math.ceil(headroom_size / 1024) * 1024 + +table.insert(ret, "xon" .. ":" .. math.ceil(xon_value)) +table.insert(ret, "xoff" .. ":" .. math.ceil(xoff_value)) +table.insert(ret, "size" .. ":" .. math.ceil(headroom_size)) + +return ret diff --git a/cfgmgr/buffer_pool_barefoot.lua b/cfgmgr/buffer_pool_barefoot.lua new file mode 100644 index 000000000000..49c3a961f7f2 --- /dev/null +++ b/cfgmgr/buffer_pool_barefoot.lua @@ -0,0 +1,30 @@ +-- KEYS - None +-- ARGV - None + +local result = {} +local config_db = "4" +local state_db = "6" + +redis.call("SELECT", state_db) +local asic_keys = redis.call("KEYS", "ASIC_TABLE*") +local cell_size = tonumber(redis.call("HGET", asic_keys[1], "cell_size")) + +-- Based on cell_size, calculate singular headroom +local ppg_headroom = 400 * cell_size + +redis.call("SELECT", config_db) +local ports = redis.call("KEYS", "PORT|*") +local ports_num = #ports + +-- 2 PPGs per port, 70% of possible maximum value. +local shp_size = math.ceil(ports_num * 2 * ppg_headroom * 0.7) + +local ingress_lossless_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'size')) +local ingress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossy_pool', 'size')) +local egress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|egress_lossy_pool', 'size')) + +table.insert(result, "ingress_lossless_pool" .. ":" .. ingress_lossless_pool_size_fixed .. ":" .. shp_size) +table.insert(result, "ingress_lossy_pool" .. ":" .. ingress_lossy_pool_size_fixed) +table.insert(result, "egress_lossy_pool" .. ":" .. egress_lossy_pool_size_fixed) + +return result diff --git a/cfgmgr/buffermgr.cpp b/cfgmgr/buffermgr.cpp index d8faa1033bb3..ba247197c196 100644 --- a/cfgmgr/buffermgr.cpp +++ b/cfgmgr/buffermgr.cpp @@ -196,7 +196,7 @@ task_process_status BufferMgr::doSpeedUpdateTask(string port) // Although we have up to 8 PGs for now, the range to check is expanded to 32 support more PGs set lossless_pg_combinations = generateIdListFromMap(lossless_pg_id, sizeof(lossless_pg_id)); - if (m_portStatusLookup[port] == "down" && m_platform == "mellanox") + if (m_portStatusLookup[port] == "down" && (m_platform == "mellanox" || m_platform == "barefoot")) { for (auto lossless_pg : lossless_pg_combinations) {