Skip to content

Commit

Permalink
Reliable TSA implementation (sonic-net#18928)
Browse files Browse the repository at this point in the history
* Reliable TSA implementation

---------

Signed-off-by: fountzou <[email protected]>
  • Loading branch information
fountzou authored and pull[bot] committed Nov 14, 2024
1 parent 422611e commit b696b4c
Show file tree
Hide file tree
Showing 11 changed files with 328 additions and 16 deletions.
14 changes: 12 additions & 2 deletions dockers/docker-fpm-frr/base_image_files/TS
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,23 @@
PLATFORM=${PLATFORM:-`sonic-cfggen -H -v DEVICE_METADATA.localhost.platform`}

if [[ $1 == "TSA" ]]; then
TSA_CHASSIS_STATE="$(sonic-db-cli CHASSIS_APP_DB HGET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled)"
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "true"}}}'
log_msg='System Mode: Normal -> Maintenance'
if [[ $TSA_CHASSIS_STATE == true ]]; then
log_msg='System Mode: Maintenance -> Maintenance'
else
log_msg='System Mode: Normal -> Maintenance'
fi
err_msg='System is already in Maintenance'
desired_tsa_state=true
elif [[ $1 == "TSB" ]]; then
TSA_CHASSIS_STATE="$(sonic-db-cli CHASSIS_APP_DB HGET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled)"
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "false"}}}'
log_msg='System Mode: Maintenance -> Normal'
if [[ $TSA_CHASSIS_STATE == true ]]; then
log_msg='System Mode: Maintenance -> Maintenance'
else
log_msg='System Mode: Maintenance -> Normal'
fi
err_msg='System is already in Normal mode'
desired_tsa_state=false
fi
Expand Down
13 changes: 12 additions & 1 deletion dockers/docker-fpm-frr/base_image_files/TSA
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@ if [ "$EUID" -ne 0 ] ; then
fi

if [ -f /etc/sonic/chassisdb.conf ]; then
rexec all -c "sudo TSA chassis"
CHASSIS_TSA_STATE_UPDATE="CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL\|STATE" tsa_enabled "true""
CONFIG_DB_TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "true"}}}'
current_tsa_state="$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)"
if [[ $current_tsa_state == true ]]; then
echo "Chassis is already in Maintenance"
logger -t TSA -p user.info "Chassis is already in Maintenance"
else
sonic-db-cli $CHASSIS_TSA_STATE_UPDATE
sonic-cfggen -a "$CONFIG_DB_TSA_STATE_UPDATE" -w
echo "Chassis Mode: Normal -> Maintenance"
logger -t TSA -p user.info "Chassis Mode: Normal -> Maintenance"
fi
echo "Please execute \"rexec all -c 'sudo config save -y'\" to preserve System mode in Maintenance after reboot\
or config reload on all linecards"
exit 0
Expand Down
16 changes: 13 additions & 3 deletions dockers/docker-fpm-frr/base_image_files/TSB
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@ if [ "$EUID" -ne 0 ] ; then
exit 1
fi

# If run on supervisor of chassis, trigger remote execution of TSB on all linecards
if [ -f /etc/sonic/chassisdb.conf ]; then
rexec all -c "sudo TSB chassis"
CHASSIS_TSA_STATE_UPDATE="CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL\|STATE" tsa_enabled "false""
CONFIG_DB_TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "false"}}}'
current_tsa_state="$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)"
if [[ $current_tsa_state == false ]]; then
echo "Chassis is already in Normal mode"
logger -t TSB -p user.info "Chassis is already in Normal mode"
else
sonic-db-cli $CHASSIS_TSA_STATE_UPDATE
sonic-cfggen -a "$CONFIG_DB_TSA_STATE_UPDATE" -w
echo "Chassis Mode: Maintenance -> Normal"
logger -t TSB -p user.info "Chassis Mode: Maintenance -> Normal"
fi
echo "Please execute \"rexec all -c 'sudo config save -y'\" to preserve System mode in Normal state after reboot\
or config reload on all linecards"
or config reload on all linecards"
exit 0
fi

Expand Down
8 changes: 8 additions & 0 deletions files/build_templates/docker_image_ctl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ function postStartAction()
$SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1"
fi

# In SUP, enforce CHASSIS_APP_DB.tsa_enabled to be in sync with BGP_DEVICE_GLOBAL.STATE.tsa_enabled
if [[ -z "$DEV" ]] && [[ -f /etc/sonic/chassisdb.conf ]]; then
tsa_cfg="$($SONIC_DB_CLI CONFIG_DB HGET "BGP_DEVICE_GLOBAL|STATE" "tsa_enabled")"
if [[ -n "$tsa_cfg" ]]; then
docker exec -i ${DOCKERNAME} $SONIC_DB_CLI CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled ${tsa_cfg}
fi
fi

# Add redis UDS to the redis group and give read/write access to the group
REDIS_SOCK="/var/run/redis${DEV}/redis.sock"
else
Expand Down
6 changes: 6 additions & 0 deletions files/image_config/config-setup/config-setup
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,12 @@ do_db_migration()
/usr/local/bin/db_migrator.py -o migrate
fi
sonic-db-cli CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1"
#Enforce CHASSIS_APP_DB.tsa_enabled to be in sync with BGP_DEVICE_GLOBAL.STATE.tsa_enabled

if [[ -f /etc/sonic/chassisdb.conf ]]; then
tsa_cfg="$(sonic-db-cli CONFIG_DB HGET "BGP_DEVICE_GLOBAL|STATE" "tsa_enabled")"
sonic-db-cli CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled ${tsa_cfg}
fi
}

# Perform configuration migration from backup copy.
Expand Down
2 changes: 2 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .managers_static_rt import StaticRouteMgr
from .managers_rm import RouteMapMgr
from .managers_device_global import DeviceGlobalCfgMgr
from .managers_chassis_app_db import ChassisAppDbMgr
from .static_rt_timer import StaticRouteTimer
from .runner import Runner, signal_handler
from .template import TemplateFabric
Expand Down Expand Up @@ -73,6 +74,7 @@ def do_work():
RouteMapMgr(common_objs, "APPL_DB", swsscommon.APP_BGP_PROFILE_TABLE_NAME),
# Device Global Manager
DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME),
ChassisAppDbMgr(common_objs, "CHASSIS_APP_DB", "BGP_DEVICE_GLOBAL"),
]
runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
Expand Down
50 changes: 50 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/managers_chassis_app_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from .manager import Manager
from .managers_device_global import DeviceGlobalCfgMgr
from .log import log_err, log_debug, log_notice
import re
from swsscommon import swsscommon

class ChassisAppDbMgr(Manager):
"""This class responds to change in tsa_enabled state of the supervisor"""

def __init__(self, common_objs, db, table):
"""
Initialize the object
:param common_objs: common object dictionary
:param db: name of the db
:param table: name of the table in the db
"""
self.lc_tsa = ""
self.directory = common_objs['directory']
self.dev_cfg_mgr = DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)
self.directory.subscribe([("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"),], self.on_lc_tsa_status_change)
super(ChassisAppDbMgr, self).__init__(
common_objs,
[],
db,
table,
)

def on_lc_tsa_status_change(self):
if self.directory.path_exist("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"):
self.lc_tsa = self.directory.get_slot("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)["tsa_enabled"]
log_debug("ChassisAppDbMgr:: LC TSA update handler status %s" % self.lc_tsa)

def set_handler(self, key, data):
log_debug("ChassisAppDbMgr:: set handler")

if not data:
log_err("ChassisAppDbMgr:: data is None")
return False

if "tsa_enabled" in data:
if self.lc_tsa == "false":
self.dev_cfg_mgr.cfg_mgr.commit()
self.dev_cfg_mgr.cfg_mgr.update()
self.dev_cfg_mgr.isolate_unisolate_device(data["tsa_enabled"])
return True
return False

def del_handler(self, key):
log_debug("ChassisAppDbMgr:: del handler")
return True
23 changes: 19 additions & 4 deletions src/sonic-bgpcfgd/bgpcfgd/managers_device_global.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(self, common_objs, db, table):
:param table: name of the table in the db
"""
self.switch_type = ""
self.chassis_tsa = ""
self.directory = common_objs['directory']
self.cfg_mgr = common_objs['cfg_mgr']
self.constants = common_objs['constants']
Expand Down Expand Up @@ -54,12 +55,13 @@ def set_handler(self, key, data):

if "tsa_enabled" in data:
self.directory.put(self.db_name, self.table_name, "tsa_enabled", data["tsa_enabled"])
if tsa_status != data["tsa_enabled"]:

self.chassis_tsa = self.get_chassis_tsa_status()
if self.chassis_tsa == "false" and tsa_status != data["tsa_enabled"]:
self.cfg_mgr.commit()
self.cfg_mgr.update()
self.isolate_unisolate_device(data["tsa_enabled"])


if "idf_isolation_state" in data:
self.directory.put(self.db_name, self.table_name, "idf_isolation_state", data["idf_isolation_state"])
if idf_isolation_state != data["idf_isolation_state"]:
Expand All @@ -79,7 +81,9 @@ def check_state_and_get_tsa_routemaps(self, cfg):
cmd = ""
if self.directory.path_exist("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"):
tsa_status = self.directory.get_slot("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)["tsa_enabled"]
if tsa_status == "true":
chassis_tsa = self.get_chassis_tsa_status()

if tsa_status == "true" or chassis_tsa == "true":
cmds = cfg.replace("#012", "\n").split("\n")
log_notice("DeviceGlobalCfgMgr:: Device is isolated. Applying TSA route-maps")
cmd = self.get_ts_routemaps(cmds, self.tsa_template)
Expand Down Expand Up @@ -133,6 +137,17 @@ def __extract_out_route_map_names(self, cmds):
route_map_names.add(result.group(1))
return route_map_names

def get_chassis_tsa_status(self):
chassis_tsa_status = "false"
try:
ch = swsscommon.SonicV2Connector(use_unix_socket_path=False)
ch.connect(ch.CHASSIS_APP_DB, False)
chassis_tsa_status = ch.get(ch.CHASSIS_APP_DB, "BGP_DEVICE_GLOBAL|STATE", 'tsa_enabled')
except Exception:
pass

return chassis_tsa_status

def downstream_isolate_unisolate(self, idf_isolation_state):
cmd = "\n"
if idf_isolation_state == "unisolated":
Expand All @@ -153,4 +168,4 @@ def check_state_and_get_idf_isolation_routemaps(self):
if idf_isolation_state != "unisolated":
log_notice("DeviceGlobalCfgMgr:: IDF is isolated. Applying required route-maps")
cmd = self.idf_isolate_template.render(isolation_status=idf_isolation_state, constants=self.constants)
return cmd
return cmd
5 changes: 4 additions & 1 deletion src/sonic-bgpcfgd/bgpcfgd/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ def add_manager(self, manager):
table_name = manager.get_table_name()
db = swsscommon.SonicDBConfig.getDbId(db_name)
if db not in self.db_connectors:
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0)
if db_name == "CHASSIS_APP_DB":
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0, True, '')
else:
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0)

if table_name not in self.callbacks[db]:
conn = self.db_connectors[db]
Expand Down
142 changes: 142 additions & 0 deletions src/sonic-bgpcfgd/tests/test_chassis_app_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from unittest.mock import MagicMock, patch

import os
from bgpcfgd.directory import Directory
from bgpcfgd.template import TemplateFabric
from . import swsscommon_test
from .util import load_constants
import bgpcfgd.managers_chassis_app_db
import bgpcfgd.managers_device_global
from swsscommon import swsscommon
from copy import deepcopy

TEMPLATE_PATH = os.path.abspath('../../dockers/docker-fpm-frr/frr')
BASE_PATH = os.path.abspath('../sonic-bgpcfgd/tests/data/general/peer-group.conf/')
INTERNAL_BASE_PATH = os.path.abspath('../sonic-bgpcfgd/tests/data/internal/peer-group.conf/')
global_constants = {
"bgp": {
"traffic_shift_community" :"12345:12345",
"internal_community_match_tag" : "1001"
}
}

def constructor(check_internal=False):
cfg_mgr = MagicMock()
def get_text():
text = []
for line in cfg_mgr.changes.split('\n'):
if line.lstrip().startswith('!'):
continue
text.append(line)
text += [" "]
return text
def update():
if check_internal:
cfg_mgr.changes = get_string_from_file("/result_chasiss_packet.conf", INTERNAL_BASE_PATH)
else:
cfg_mgr.changes = get_string_from_file("/result_all.conf")
def push(cfg):
cfg_mgr.changes += cfg + "\n"
def get_config():
return cfg_mgr.changes
cfg_mgr.get_text = get_text
cfg_mgr.update = update
cfg_mgr.push = push
cfg_mgr.get_config = get_config

constants = deepcopy(global_constants)
common_objs = {
'directory': Directory(),
'cfg_mgr': cfg_mgr,
'tf': TemplateFabric(TEMPLATE_PATH),
'constants': constants
}
mgr = bgpcfgd.managers_chassis_app_db.ChassisAppDbMgr(common_objs, "CHASSIS_APP_DB", "BGP_DEVICE_GLOBAL")
cfg_mgr.update()
return mgr


@patch('bgpcfgd.managers_device_global.log_debug')
def test_isolate_device(mocked_log_info):
m = constructor()

m.lc_tsa = "false"
res = m.set_handler("STATE", {"tsa_enabled": "true"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_all_isolate.conf")

curr_cfg = m.cfg_mgr.get_config()
m.lc_tsa = "true"
res = m.set_handler("STATE", {"tsa_enabled": "true"})
assert res, "Expect True return value for set_handler"
assert m.cfg_mgr.get_config() == curr_cfg

@patch('bgpcfgd.managers_device_global.log_debug')
def test_isolate_device_internal_session(mocked_log_info):
m = constructor(check_internal=True)

m.lc_tsa = "false"
res = m.set_handler("STATE", {"tsa_enabled": "true"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_chassis_packet_isolate.conf", INTERNAL_BASE_PATH)

curr_cfg = m.cfg_mgr.get_config()
m.lc_tsa = "true"
res = m.set_handler("STATE", {"tsa_enabled": "true"})
assert res, "Expect True return value for set_handler"
assert m.cfg_mgr.get_config() == curr_cfg


@patch('bgpcfgd.managers_device_global.log_debug')
def test_unisolate_device(mocked_log_info):
m = constructor()

m.lc_tsa = "false"
res = m.set_handler("STATE", {"tsa_enabled": "false"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_all_unisolate.conf")

curr_cfg = m.cfg_mgr.get_config()
m.lc_tsa = "true"
res = m.set_handler("STATE", {"tsa_enabled": "false"})
assert res, "Expect True return value for set_handler"
assert m.cfg_mgr.get_config() == curr_cfg

@patch('bgpcfgd.managers_device_global.log_debug')
def test_unisolate_device_internal_session(mocked_log_info):
m = constructor(check_internal=True)

m.lc_tsa = "false"
res = m.set_handler("STATE", {"tsa_enabled": "false"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_chassis_packet_unisolate.conf", INTERNAL_BASE_PATH)

curr_cfg = m.cfg_mgr.get_config()
m.lc_tsa = "true"
res = m.set_handler("STATE", {"tsa_enabled": "false"})
assert res, "Expect True return value for set_handler"
assert m.cfg_mgr.get_config() == curr_cfg


def get_string_from_file(filename, base_path=BASE_PATH):
fp = open(base_path + filename, "r")
cfg = fp.read()
fp.close()

return cfg

@patch('bgpcfgd.managers_chassis_app_db.log_err')
def test_set_handler_failure_case(mocked_log_info):
m = constructor()
res = m.set_handler("STATE", {})
assert res == False, "Expect False return value for invalid data passed to set_handler"
mocked_log_info.assert_called_with("ChassisAppDbMgr:: data is None")

def test_del_handler():
m = constructor()
res = m.del_handler("STATE")
assert res, "Expect True return value for del_handler"
Loading

0 comments on commit b696b4c

Please sign in to comment.