From 8a654821fc92f749757297e3ce2ee60ddf681fd8 Mon Sep 17 00:00:00 2001 From: shilongliu Date: Fri, 30 Aug 2024 13:08:08 +0800 Subject: [PATCH] Reapply "Event and alarm management (#17949)" (#20052) This reverts commit 93f3cad6faaf0be67548c4e99a66ffedad98ca52. --- .../docker-database/database_config.json.j2 | 6 + dockers/docker-eventd/supervisord.conf | 10 + files/build_templates/docker_image_ctl.j2 | 13 +- src/sonic-eventd/Makefile | 17 +- src/sonic-eventd/debian/sonic-eventd.install | 3 + src/sonic-eventd/etc/eventd.json | 5 + src/sonic-eventd/src/eventconsume.cpp | 662 ++++++++++++++++++ src/sonic-eventd/src/eventconsume.h | 54 ++ src/sonic-eventd/src/eventdb.cpp | 30 + src/sonic-eventd/src/eventutils.cpp | 76 ++ src/sonic-eventd/src/eventutils.h | 43 ++ src/sonic-eventd/src/loghandler.cpp | 37 + src/sonic-eventd/src/loghandler.h | 5 + src/sonic-eventd/src/subdir.mk | 6 +- src/sonic-eventd/tests/default.json | 24 + src/sonic-eventd/tests/eventd.json | 5 + .../tests/eventdb_database_config.json | 107 +++ .../tests/eventdb_database_config_global.json | 8 + src/sonic-eventd/tests/eventdb_ut.cpp | 434 ++++++++++++ src/sonic-eventd/tests/subdir.mk | 7 +- src/sonic-eventd/var/evprofile/default.json | 7 + .../yang-models/sonic-alarm.yang | 142 ++++ .../yang-models/sonic-event.yang | 134 ++++ .../yang-models/sonic-events-common.yang | 41 ++ 24 files changed, 1866 insertions(+), 10 deletions(-) create mode 100644 src/sonic-eventd/etc/eventd.json create mode 100644 src/sonic-eventd/src/eventconsume.cpp create mode 100644 src/sonic-eventd/src/eventconsume.h create mode 100644 src/sonic-eventd/src/eventdb.cpp create mode 100644 src/sonic-eventd/src/eventutils.cpp create mode 100644 src/sonic-eventd/src/eventutils.h create mode 100755 src/sonic-eventd/src/loghandler.cpp create mode 100644 src/sonic-eventd/src/loghandler.h create mode 100755 src/sonic-eventd/tests/default.json create mode 100755 src/sonic-eventd/tests/eventd.json create mode 100644 src/sonic-eventd/tests/eventdb_database_config.json create mode 100644 src/sonic-eventd/tests/eventdb_database_config_global.json create mode 100644 src/sonic-eventd/tests/eventdb_ut.cpp create mode 100644 src/sonic-eventd/var/evprofile/default.json create mode 100644 src/sonic-yang-models/yang-models/sonic-alarm.yang create mode 100644 src/sonic-yang-models/yang-models/sonic-event.yang diff --git a/dockers/docker-database/database_config.json.j2 b/dockers/docker-database/database_config.json.j2 index 65db6cb72641..0e772cc0661f 100644 --- a/dockers/docker-database/database_config.json.j2 +++ b/dockers/docker-database/database_config.json.j2 @@ -130,6 +130,12 @@ "instance" : {% if include_remote_db %} "remote_redis" {% else %} "redis" {% endif %} } {% endif %} + , + "EVENT_DB" : { + "id" : 19, + "separator": "|", + "instance" : "redis" + } }, "VERSION" : "1.0" } diff --git a/dockers/docker-eventd/supervisord.conf b/dockers/docker-eventd/supervisord.conf index be51f922c120..ef8411545513 100644 --- a/dockers/docker-eventd/supervisord.conf +++ b/dockers/docker-eventd/supervisord.conf @@ -50,3 +50,13 @@ stderr_logfile=syslog dependent_startup=true dependent_startup_wait_for=start:exited +[program:eventdb] +command=/usr/bin/eventdb +priority=3 +autostart=false +autorestart=false +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=start:exited + diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index 7e64845c8097..56e5dc913c2e 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -91,9 +91,16 @@ function preStartAction() # Load redis content from /host/warmboot/dump.rdb docker cp $WARM_DIR/dump.rdb database$DEV:/var/lib/redis/dump.rdb else - # Create an emtpy file and overwrite any RDB if already there - echo -n > /tmp/dump.rdb - docker cp /tmp/dump.rdb database$DEV:/var/lib/redis/ + COLD_DIR=/host/coldboot + #In case of cold reboot, load redis content from /host/coldboot/dump.rdb + if [[ -f $COLD_DIR/dump.rdb ]]; then + #Load redis content from /host/coldboot/dump.rdb + docker cp $COLD_DIR/dump.rdb database$DEV:/var/lib/redis/dump.rdb + else + # Create an emtpy file and overwrite any RDB if already there + echo -n > /tmp/dump.rdb + docker cp /tmp/dump.rdb database$DEV:/var/lib/redis/ + fi fi fi {%- elif docker_container_name == "pde" %} diff --git a/src/sonic-eventd/Makefile b/src/sonic-eventd/Makefile index 835d0732a0cd..73adf10ca5f3 100644 --- a/src/sonic-eventd/Makefile +++ b/src/sonic-eventd/Makefile @@ -1,12 +1,16 @@ RM := rm -rf EVENTD_TARGET := eventd EVENTD_TEST := tests/tests +EVENTDB_TEST := tests/eventdb EVENTD_TOOL := tools/events_tool EVENTD_PUBLISH_TOOL := tools/events_publish_tool.py RSYSLOG-PLUGIN_TARGET := rsyslog_plugin/rsyslog_plugin RSYSLOG-PLUGIN_TEST := rsyslog_plugin_tests/tests EVENTD_MONIT := tools/events_monit_test.py EVENTD_MONIT_CONF := tools/monit_events +EVENTDB_TARGET := eventdb +EVENTDB_DEFAULT_PROFILE := var/evprofile/default.json +EVENTDB_PROF := etc/eventd.json CP := cp MKDIR := mkdir @@ -19,7 +23,7 @@ PWD := $(shell pwd) ifneq ($(MAKECMDGOALS),clean) ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) $(OBJS) +-include $(C_DEPS) $(OBJS) $(EVENTDB_OBJS) endif endif @@ -31,10 +35,11 @@ endif all: sonic-eventd eventd-tests eventd-tool rsyslog-plugin rsyslog-plugin-tests -sonic-eventd: $(OBJS) +sonic-eventd: $(OBJS) $(EVENTDB_OBJS) @echo 'Building target: $@' @echo 'Invoking: G++ Linker' $(CC) $(LDFLAGS) -o $(EVENTD_TARGET) $(OBJS) $(LIBS) + $(CC) $(LDFLAGS) -o $(EVENTDB_TARGET) $(EVENTDB_OBJS) $(LIBS) @echo 'Finished building target: $@' @echo ' ' @@ -52,12 +57,14 @@ rsyslog-plugin: $(RSYSLOG-PLUGIN_OBJS) @echo 'Finished building target: $@' @echo ' ' -eventd-tests: $(TEST_OBJS) +eventd-tests: $(TEST_OBJS) $(EVENTDB_TEST_OBJS) @echo 'Building target: $@' @echo 'Invoking: G++ Linker' $(CC) $(LDFLAGS) -o $(EVENTD_TEST) $(TEST_OBJS) $(LIBS) $(TEST_LIBS) + $(CC) $(LDFLAGS) -o $(EVENTDB_TEST) $(EVENTDB_TEST_OBJS) $(LIBS) $(TEST_LIBS) @echo 'Finished building target: $@' $(EVENTD_TEST) + $(EVENTDB_TEST) @echo 'Finished running tests' @echo ' ' @@ -73,12 +80,16 @@ rsyslog-plugin-tests: $(RSYSLOG-PLUGIN-TEST_OBJS) install: $(MKDIR) -p $(DESTDIR)/usr/bin $(MKDIR) -p $(DESTDIR)/etc/monit/conf.d + $(MKDIR) -p $(DESTDIR)/etc/evprofile $(CP) $(EVENTD_TARGET) $(DESTDIR)/usr/bin $(CP) $(EVENTD_TOOL) $(DESTDIR)/usr/bin $(CP) $(EVENTD_PUBLISH_TOOL) $(DESTDIR)/usr/bin $(CP) $(RSYSLOG-PLUGIN_TARGET) $(DESTDIR)/usr/bin $(CP) $(EVENTD_MONIT) $(DESTDIR)/usr/bin $(CP) $(EVENTD_MONIT_CONF) $(DESTDIR)/etc/monit/conf.d + $(CP) $(EVENTDB_TARGET) $(DESTDIR)/usr/bin + $(CP) $(EVENTDB_PROF) $(DESTDIR)/etc/eventd.json + $(CP) $(EVENTDB_DEFAULT_PROFILE) $(DESTDIR)/etc/evprofile/default.json deinstall: $(RM) -rf $(DESTDIR)/usr diff --git a/src/sonic-eventd/debian/sonic-eventd.install b/src/sonic-eventd/debian/sonic-eventd.install index bdba566c77b3..273b07adf186 100644 --- a/src/sonic-eventd/debian/sonic-eventd.install +++ b/src/sonic-eventd/debian/sonic-eventd.install @@ -1,3 +1,6 @@ usr/bin/eventd +usr/bin/eventdb usr/bin/events_tool usr/bin/events_publish_tool.py +etc/evprofile/default.json +etc/eventd.json diff --git a/src/sonic-eventd/etc/eventd.json b/src/sonic-eventd/etc/eventd.json new file mode 100644 index 000000000000..ee441eb329f8 --- /dev/null +++ b/src/sonic-eventd/etc/eventd.json @@ -0,0 +1,5 @@ +{ + "__README__": "Specify size of event history table. Whichever limit is hit first, eventd wraps event history table around and deletes older records.", + "max-records": 40000, + "max-days": 30 +} diff --git a/src/sonic-eventd/src/eventconsume.cpp b/src/sonic-eventd/src/eventconsume.cpp new file mode 100644 index 000000000000..5ef249e27619 --- /dev/null +++ b/src/sonic-eventd/src/eventconsume.cpp @@ -0,0 +1,662 @@ + +#include +#include +#include +#include +#include +#include "eventconsume.h" +#include "loghandler.h" +#include "eventutils.h" +#include "events_common.h" + + +using namespace std::chrono; + +// map to store sequence-id for alarms +unordered_map cal_lookup_map; + +// temporary map to hold merge of default map of events and any event profile +EventMap static_event_table; + +bool g_run = true; +uint64_t seq_id = 0; +uint64_t PURGE_SECONDS = 86400; + +typedef pair pi; +priority_queue, greater > event_history_list; + +map SYSLOG_SEVERITY = { + {EVENT_SEVERITY_CRITICAL_STR, LOG_ALERT}, + {EVENT_SEVERITY_MAJOR_STR, LOG_CRIT}, + {EVENT_SEVERITY_MINOR_STR, LOG_ERR}, + {EVENT_SEVERITY_WARNING_STR, LOG_WARNING}, + {EVENT_SEVERITY_INFORMATIONAL_STR, LOG_NOTICE} +}; + +map SYSLOG_SEVERITY_STR = { + {LOG_ALERT , EVENT_SEVERITY_CRITICAL_STR}, + {LOG_CRIT , EVENT_SEVERITY_MAJOR_STR}, + {LOG_ERR , EVENT_SEVERITY_MINOR_STR}, + {LOG_WARNING , EVENT_SEVERITY_WARNING_STR}, + {LOG_NOTICE , EVENT_SEVERITY_INFORMATIONAL_STR} +}; + +static string flood_ev_id; +static string flood_ev_action; +static string flood_ev_resource; +static string flood_ev_msg; + +EventConsume::EventConsume(DBConnector* dbConn, + string evProfile, + string dbProfile): + m_eventTable(dbConn, EVENT_HISTORY_TABLE_NAME), + m_alarmTable(dbConn, EVENT_CURRENT_ALARM_TABLE_NAME), + m_eventStatsTable(dbConn, EVENT_STATS_TABLE_NAME), + m_alarmStatsTable(dbConn, EVENT_ALARM_STATS_TABLE_NAME), + m_evProfile(evProfile), + m_dbProfile(dbProfile) { + + // open syslog connection + openSyslog(); + + // init stats + initStats(); + + // populate local queue of event histor table + read_events(); + + // read and apply eventd configuration files + // read eventd.json and apply it on history table. + // read default and custom profiles, build static_event_table + read_eventd_config(); + + SWSS_LOG_NOTICE("DONE WITH EventConsume constructor"); +} + +EventConsume::~EventConsume() { +} + +void EventConsume::run() +{ + + SWSS_LOG_ENTER(); + event_handle_t hsub; + hsub = events_init_subscriber(); + + while (g_run) { + event_receive_op_t evt; + map_str_str_t evtOp; + int rc = event_receive(hsub, evt); + if (rc != 0) { + SWSS_LOG_ERROR("Failed to receive rc=%d", rc); + continue; + } + handle_notification(evt); + } + events_deinit_subscriber(hsub); +} + +void EventConsume::read_eventd_config(bool read_all) { + // read manifest file for config options + if (read_all) { + read_config_and_purge(); + } + + // read from default map + static_event_table.clear(); + if (!parse(m_evProfile.c_str(), static_event_table)) { + SWSS_LOG_ERROR("Can not initialize event map"); + closeSyslog(); + exit(0); + } + + SWSS_LOG_NOTICE("Event map is built as follows:"); + for (auto& x: static_event_table) { + SWSS_LOG_NOTICE(" %s (%s %s %s)", x.first.c_str(), x.second.severity.c_str(), x.second.enable.c_str(), x.second.static_event_msg.c_str()); + } +} + + +void EventConsume::handle_notification(const event_receive_op_t& evt) +{ + SWSS_LOG_ENTER(); + + string ev_id, ev_msg, ev_src, ev_act, ev_timestamp, ev_type("EVENT"), + ev_static_msg(""), ev_reckey; + string ev_sev = string(EVENT_SEVERITY_INFORMATIONAL_STR); + bool is_raise = false; + bool is_clear = false; + bool is_ack = false; + vector vec; + + fetchFieldValues(evt, vec, ev_id, ev_msg, ev_src, ev_act, ev_timestamp); + + // flood protection. If a rogue application sends same event repeatedly, throttle repeated instances of that event + if (isFloodedEvent(ev_src, ev_act, ev_id, ev_msg)) { + return; + } + + // get static info + if (!staticInfoExists(ev_id, ev_act, ev_sev, ev_static_msg, vec)) { + return; + } + + // increment save seq-id for the newly received event + uint64_t new_seq_id = seq_id + 1; + + FieldValueTuple seqfv("id", to_string(new_seq_id)); + vec.push_back(seqfv); + + if (ev_act.length() > 0) { + SWSS_LOG_DEBUG("ev_act %s", ev_act.c_str()); + ev_type = "ALARM"; + string almkey = ev_id; + if (!ev_src.empty()) { + almkey += "|" + ev_src; + } + + if (ev_act.compare(EVENT_ACTION_RAISE_STR) == 0) { + is_raise = true; + // add entry to the lookup map + cal_lookup_map.insert(make_pair(almkey, new_seq_id)); + + // add acknowledged field intializing it to false + FieldValueTuple seqfv1("acknowledged", "false"); + vec.push_back(seqfv1); + m_alarmTable.set(to_string(new_seq_id), vec); + + // update alarm counters + updateAlarmStatistics(ev_sev, ev_act); + } else if (ev_act.compare(EVENT_ACTION_CLEAR_STR) == 0) { + is_clear = true; + SWSS_LOG_DEBUG(" Received clear alarm for %s", almkey.c_str()); + + bool ack_flag = false; + // remove entry from local cache, alarm table + if (!udpateLocalCacheAndAlarmTable(almkey, ack_flag)) { + SWSS_LOG_ERROR("Received clear for non-existent alarm %s", almkey.c_str()); + return; + } + + // update alarm counters ONLY if it has not been ack'd before. + // This is because when alarm is ack'd, alarms/severity counter is reduced already. + if (!ack_flag) { + updateAlarmStatistics(ev_sev, ev_act); + } else { + // if it has been ack'd before, ack counter would have been incremented for this alrm. + // Now is the time reduce it. + clearAckAlarmStatistic(); + } + } else { + // ack/unack events comes with seq-id of raised alarm as resource field. + // fetch details of "raised" alarm record + string raise_act; + string raise_ack_flag; + string raise_ts; + + // fetch information from raised event record + if (!fetchRaiseInfo(vec, ev_src, ev_id, ev_sev, raise_act, raise_ack_flag, raise_ts)) + { + SWSS_LOG_ERROR("Action %s on a non-existent Alarm id %s", ev_act.c_str(), ev_src.c_str()); + return; + } + + if (ev_act.compare(EVENT_ACTION_ACK_STR) == 0) { + if (raise_ack_flag.compare("true") == 0) { + SWSS_LOG_INFO("%s/%s is already acknowledged", ev_id.c_str(), ev_src.c_str()); + return; + } + if (raise_act.compare(EVENT_ACTION_RAISE_STR) == 0) { + is_ack = true; + SWSS_LOG_DEBUG("Received acknowledge event - %s/%s", ev_id.c_str(), ev_src.c_str()); + + // update the record with ack flag and ack-time and stats + updateAckInfo(is_ack, ev_timestamp, ev_sev, ev_act, ev_src); + } else { + SWSS_LOG_ERROR("Alarm %s/%s not in RAISE state", ev_id.c_str(), ev_src.c_str()); + return; + } + } else if (ev_act.compare(EVENT_ACTION_UNACK_STR) == 0) { + if (raise_ack_flag.compare("true") == 0) { + SWSS_LOG_DEBUG(" received un-ACKnowledge event - %s/%s", ev_id.c_str(), ev_src.c_str()); + + // update the record with ack flag and ack-time and stats + updateAckInfo(is_ack, ev_timestamp, ev_sev, ev_act, ev_src); + } else { + SWSS_LOG_INFO(" %s/%s is already un-acknowledged", ev_id.c_str(), ev_src.c_str()); + return; + } + } + } + } + // verify the size of history table; delete older entry; add new entry + seq_id = new_seq_id; + update_events(to_string(seq_id), ev_timestamp, vec); + + updateEventStatistics(true, is_raise, is_ack, is_clear); + + // raise a syslog message + writeToSyslog(ev_id, (int)(SYSLOG_SEVERITY.find(ev_sev)->second), ev_type, ev_act, ev_msg, ev_static_msg); + + return; +} + +void EventConsume::read_events() { + vector tuples; + m_eventTable.getContent(tuples); + + SWSS_LOG_ENTER(); + // find out last sequence-id; build local history list + for (auto tuple: tuples) { + for (auto fv: kfvFieldsValues(tuple)) { + if (fvField(fv) == "time-created") { + char* end; + uint64_t seq = strtoull(kfvKey(tuple).c_str(), &end,10); + if (seq > seq_id) { + seq_id = seq; + } + uint64_t val = strtoull(fvValue(fv).c_str(), &end,10); + event_history_list.push(make_pair( seq, val )); + } + } + } + SWSS_LOG_NOTICE("eventd sequence-id intialized to %lu", seq_id); +} + +void EventConsume::updateAlarmStatistics(string ev_sev, string ev_act) { + vector vec; + vector temp; + + // severity counter names are of lower case + transform(ev_sev.begin(), ev_sev.end(), ev_sev.begin(), ::tolower); + + if (m_alarmStatsTable.get("state", vec)) { + for (auto fv: vec) { + if (!fv.first.compare("alarms")) { + if ((ev_act.compare(EVENT_ACTION_RAISE_STR) == 0) || (ev_act.compare(EVENT_ACTION_UNACK_STR) == 0)) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } else if (!fv.first.compare(ev_sev)) { + if ((ev_act.compare(EVENT_ACTION_RAISE_STR) == 0) || (ev_act.compare(EVENT_ACTION_UNACK_STR) == 0)) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } else if (!fv.first.compare("acknowledged")) { + if (ev_act.compare(EVENT_ACTION_ACK_STR) == 0) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else if (ev_act.compare(EVENT_ACTION_UNACK_STR) == 0) { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } + } + m_alarmStatsTable.set("state", temp); + } else { + SWSS_LOG_ERROR("Can not update alarm statistics (table does not exist)"); + } +} + +void EventConsume::updateEventStatistics(bool is_add, bool is_raise, bool is_ack, bool is_clear) { + vector vec; + vector temp; + + if (m_eventStatsTable.get("state", vec)) { + for (auto fv: vec) { + if (!fv.first.compare("events")) { + if (is_add) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } else if (!fv.first.compare("raised")) { + if (is_raise) { + if (is_add) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } + } else if (!fv.first.compare("cleared")) { + if (is_clear) { + if (is_add) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } + } else if (!fv.first.compare("acked")) { + if (is_ack) { + if (is_add) { + fv.second = to_string(stoi(fv.second.c_str())+1); + } else { + fv.second = to_string(stoi(fv.second.c_str())-1); + } + temp.push_back(fv); + } + } + } + + m_eventStatsTable.set("state", temp); + } else { + SWSS_LOG_ERROR("Can not update event statistics (table does not exist)"); + } +} + +void EventConsume::modifyEventStats(string seq_id) { + vector rec; + m_eventTable.get(seq_id, rec); + bool is_raise = false; + bool is_clear = false; + bool is_ack = false; + for (auto fvr: rec) { + if (!fvr.first.compare("action")) { + if (!fvr.second.compare(EVENT_ACTION_RAISE_STR)) { + is_raise = true; + } else if (!fvr.second.compare(EVENT_ACTION_CLEAR_STR)) { + is_clear = true; + } + } + if (!fvr.first.compare("acknowledged")) { + if (!fvr.second.compare("true")) { + is_ack = true; + } + } + } + updateEventStatistics(false, is_raise, is_ack, is_clear); +} + +void EventConsume::purge_events() { + SWSS_LOG_ENTER(); + uint32_t size = event_history_list.size(); + + while (size >= m_count) { + pair oldest_entry = event_history_list.top(); + SWSS_LOG_NOTICE("Rollover based on count(%d/%d). Deleting %lu", size, m_count, oldest_entry.first); + m_eventTable.del(to_string(oldest_entry.first)); + modifyEventStats(to_string(oldest_entry.first)); + event_history_list.pop(); + --size; + } + + const auto p1 = system_clock::now(); + unsigned tnow_seconds = duration_cast(p1.time_since_epoch()).count(); + + while (!event_history_list.empty()) { + pair oldest_entry = event_history_list.top(); + unsigned old_seconds = oldest_entry.second / 1000000000ULL; + + if ((tnow_seconds - old_seconds) > PURGE_SECONDS) { + SWSS_LOG_NOTICE("Rollover based on time (%lu days). Deleting %lu.. now %u old %u", (PURGE_SECONDS/m_days), oldest_entry.second, tnow_seconds, old_seconds); + m_eventTable.del(to_string(oldest_entry.first)); + modifyEventStats(to_string(oldest_entry.first)); + event_history_list.pop(); + } else { + return; + } + } + return; +} + +void EventConsume::read_config_and_purge() { + m_days = 0; + m_count = 0; + // read from the manifest file + parse_config(m_dbProfile.c_str(), m_days, m_count); + SWSS_LOG_NOTICE("max-days %d max-records %d", m_days, m_count); + + // update the nanosecond limit + PURGE_SECONDS *= m_days; + + // purge events based on # of days + purge_events(); +} + +void EventConsume::update_events(string seq_id, string ts, vector vec) { + // purge events based on # of days + purge_events(); + + // now add the event to the event table + m_eventTable.set(seq_id, vec); + + // store it into the event history list + char* end; + uint64_t seq = strtoull(seq_id.c_str(), &end, 10); + uint64_t val = strtoull(ts.c_str(), &end, 10); + event_history_list.push(make_pair( seq, val )); +} + +void EventConsume::resetAlarmStats(int alarms, int critical, int major, int minor, int warning, int acknowledged) { + vector temp; + FieldValueTuple fv; + map::iterator it; + for (it = SYSLOG_SEVERITY_STR.begin(); it != SYSLOG_SEVERITY_STR.end(); it++) { + // there wont be any informational alarms + if (it->second.compare(EVENT_SEVERITY_CRITICAL_STR) == 0) { + fv = FieldValueTuple("critical", to_string(critical)); + temp.push_back(fv); + } else if (it->second.compare(EVENT_SEVERITY_MAJOR_STR) == 0) { + fv = FieldValueTuple("major", to_string(major)); + temp.push_back(fv); + } else if (it->second.compare(EVENT_SEVERITY_MINOR_STR) == 0) { + fv = FieldValueTuple("minor", to_string(minor)); + temp.push_back(fv); + } else if (it->second.compare(EVENT_SEVERITY_WARNING_STR) == 0) { + fv = FieldValueTuple("warning", to_string(warning)); + temp.push_back(fv); + } + } + fv = FieldValueTuple("alarms", to_string(alarms)); + temp.push_back(fv); + fv = FieldValueTuple("acknowledged", to_string(acknowledged)); + temp.push_back(fv); + m_alarmStatsTable.set("state", temp); +} + +void EventConsume::clearAckAlarmStatistic() { + vector vec; + vector temp; + + if (m_alarmStatsTable.get("state", vec)) { + for (auto fv: vec) { + if (!fv.first.compare("acknowledged")) { + fv.second = to_string(stoi(fv.second.c_str())-1); + temp.push_back(fv); + m_alarmStatsTable.set("state", temp); + return; + } + } + } +} + + +void EventConsume::fetchFieldValues(const event_receive_op_t& evt, + vector& vec, + string& ev_id, + string& ev_msg, + string& ev_src, + string& ev_act, + string &ev_timestamp) { + + ev_timestamp = to_string(evt.publish_epoch_ms); + vec.push_back(FieldValueTuple("time-created", ev_timestamp)); + for (const auto& idx : evt.params) { + if (idx.first == "type-id") { + ev_id = idx.second; + vec.push_back(FieldValueTuple("type-id", ev_id)); + SWSS_LOG_DEBUG("type-id: <%s> ", ev_id.c_str()); + } else if (idx.first == "text") { + ev_msg = idx.second; + vec.push_back(FieldValueTuple("text", ev_msg)); + SWSS_LOG_DEBUG("text: <%s> ", ev_msg.c_str()); + } else if (idx.first == "resource") { + ev_src = idx.second; + vec.push_back(idx); + SWSS_LOG_DEBUG("resource: <%s> ", ev_src.c_str()); + } else if (idx.first == "action") { + ev_act = idx.second; + // for events, action is empty + if (!ev_act.empty()) { + vec.push_back(FieldValueTuple("action", ev_act)); + } + } + } +} + +bool EventConsume::isFloodedEvent(string ev_src, string ev_act, string ev_id, string ev_msg) { + // flood protection. If a rogue application sends same event repeatedly, throttle repeated instances of that event + if (!flood_ev_resource.compare(ev_src) && + !flood_ev_action.compare(ev_act) && + !flood_ev_id.compare(ev_id) && + !(flood_ev_msg.compare(ev_msg))) { + SWSS_LOG_INFO("Ignoring the event %s from %s action %s msg %s as it is repeated", ev_id.c_str(), ev_src.c_str(), ev_act.c_str(), ev_msg.c_str()); + return true; + } + + flood_ev_resource = ev_src; + flood_ev_action = ev_act; + flood_ev_id = ev_id; + flood_ev_msg = ev_msg; + return false; +} + +bool EventConsume::staticInfoExists(string &ev_id, string &ev_act, string &ev_sev, string &ev_static_msg, vector &vec) { + auto it = static_event_table.find(ev_id); + if (it != static_event_table.end()) { + EventInfo tmp = (EventInfo) (it->second); + // discard the event as event_static_map shows enable is false for this event + if (tmp.enable == EVENT_ENABLE_FALSE_STR) { + SWSS_LOG_NOTICE("Discarding event <%s> as it is set to disabled", ev_id.c_str()); + return false;; + } + + // get severity in the map and store it in the db + ev_sev = tmp.severity; + ev_static_msg = tmp.static_event_msg; + SWSS_LOG_DEBUG("static info: <%s> <%s> ", tmp.severity.c_str(), tmp.static_event_msg.c_str()); + + FieldValueTuple seqfv1("severity", tmp.severity); + vec.push_back(seqfv1); + return true; + } else { + // dont process the incoming alarms if action is neither raise nor clear + // for ack/unack, no need for this check + if ((ev_act.compare(EVENT_ACTION_ACK_STR) && ev_act.compare(EVENT_ACTION_UNACK_STR))) { + // TODO currently, applications may raise events but default evprofile doesnt contain + // ID info. This is planned for later. + // Change it back to SWSS_LOG_ERROR once event profile contains event-ids + SWSS_LOG_DEBUG("static info NOT FOUND for <%s> ", ev_id.c_str()); + return false; + } + } + return true; +} + +bool EventConsume::udpateLocalCacheAndAlarmTable(string almkey, bool &ack_flag) { + // find and remove the raised alarm + uint64_t lookup_seq_id = 0; + auto it = cal_lookup_map.find(almkey); + if (it != cal_lookup_map.end()) { + lookup_seq_id = (uint64_t) (it->second); + cal_lookup_map.erase(almkey); + + // get status of is_aknowledged flag so that we dont decrement counters twice + vector alm_rec; + m_alarmTable.get(to_string(lookup_seq_id), alm_rec); + for (auto fvr: alm_rec) { + if (!fvr.first.compare("acknowledged")) { + ack_flag = (fvr.second.compare("true") == 0) ? true : false; + break; + } + } + + // delete the record from alarm table + m_alarmTable.del(to_string(lookup_seq_id)); + } else { + // possible - when event profile removes alarms for which enable is false and application cleared them later. + // ignore by logging a debug message.. + SWSS_LOG_INFO("Received alarm-clear for non-existing alarm %s", almkey.c_str()); + return false; + } + return true; +} + +void EventConsume::initStats() { + vector vec; + // possible after a cold-boot or very first time + if (! m_eventStatsTable.get("state", vec)) { + FieldValueTuple fv; + vector temp; + + SWSS_LOG_DEBUG("resetting Event Statistics table"); + fv = FieldValueTuple("events", to_string(0)); + temp.push_back(fv); + fv = FieldValueTuple("raised", to_string(0)); + temp.push_back(fv); + fv = FieldValueTuple("cleared", to_string(0)); + temp.push_back(fv); + fv = FieldValueTuple("acked", to_string(0)); + temp.push_back(fv); + m_eventStatsTable.set("state", temp); + } + if (! m_alarmStatsTable.get("state", vec)) { + SWSS_LOG_DEBUG("resetting Alarm Statistics table"); + resetAlarmStats(0, 0, 0, 0, 0, 0); + } +} + +void EventConsume::updateAckInfo(bool is_ack, string ev_timestamp, string ev_sev, string ev_act, string ev_src) { + vector ack_vec; + + FieldValueTuple seqfv1("acknowledged", (is_ack ? "true" : "false")); + ack_vec.push_back(seqfv1); + + FieldValueTuple seqfv2("acknowledge-time", ev_timestamp); + ack_vec.push_back(seqfv2); + + // update alarm stats + updateAlarmStatistics(ev_sev, ev_act); + + // update alarm/event tables for the "raise" record with ack flag and ack timestamp + // for ack/unack, ev_src contains the "seq-id" + m_alarmTable.set(ev_src, ack_vec); + m_eventTable.set(ev_src, ack_vec); +} + + +bool EventConsume::fetchRaiseInfo(vector &vec, string ev_src, string &ev_id, string &ev_sev, string &raise_act, + string &raise_ack_flag, string &raise_ts) { + vector raise_vec; + if (!m_alarmTable.get(ev_src, raise_vec)) { + return false; + } + for (auto fv: raise_vec) { + if (!fv.first.compare("type-id")) { + ev_id = fv.second; + vec.push_back(fv); + } + if (!fv.first.compare("severity")) { + ev_sev = fv.second; + vec.push_back(fv); + } + if (!fv.first.compare("action")) { + raise_act = fv.second; + } + if (!fv.first.compare("acknowledged")) { + raise_ack_flag = fv.second; + } + if (!fv.first.compare("time-created")) { + raise_ts = fv.second; + } + } + return true; +} + + diff --git a/src/sonic-eventd/src/eventconsume.h b/src/sonic-eventd/src/eventconsume.h new file mode 100644 index 000000000000..36e75f7dac98 --- /dev/null +++ b/src/sonic-eventd/src/eventconsume.h @@ -0,0 +1,54 @@ +#ifndef __EVENTCONSUME_H__ +#define __EVENTCONSUME_H__ + +#include +#include +#include "events.h" +#include "eventutils.h" +#include "dbconnector.h" +#include "subscriberstatetable.h" + +using namespace swss; +using namespace std; + +class EventConsume +{ +public: + EventConsume(DBConnector *dbConn, + string evProfile =EVENTD_DEFAULT_MAP_FILE, + string dbProfile =EVENTD_CONF_FILE); + ~EventConsume(); + void read_eventd_config(bool read_all=true); + void run(); + +private: + Table m_eventTable; + Table m_alarmTable; + Table m_eventStatsTable; + Table m_alarmStatsTable; + u_int32_t m_days, m_count; + string m_evProfile; + string m_dbProfile; + + void handle_notification(const event_receive_op_t& evt); + void read_events(); + void updateAlarmStatistics(string ev_sev, string ev_act); + void updateEventStatistics(bool is_add, bool is_alarm, bool is_ack, bool is_clear); + void read_config_and_purge(); + void update_events(string seq_id, string ts, vector vec); + void purge_events(); + void modifyEventStats(string seq_id); + void clearAckAlarmStatistic(); + void resetAlarmStats(int, int, int, int, int, int); + void fetchFieldValues(const event_receive_op_t& evt , vector &, string &, string &, string &, string &, string &); + bool isFloodedEvent(string, string, string, string); + bool staticInfoExists(string &, string &, string &, string &, vector &); + bool udpateLocalCacheAndAlarmTable(string, bool &); + void initStats(); + void updateAckInfo(bool, string, string, string, string); + bool fetchRaiseInfo(vector &, string, string &, string &, string &, string &, string &); +}; + + +#endif /* __EVENTCONSUME_H__ */ + diff --git a/src/sonic-eventd/src/eventdb.cpp b/src/sonic-eventd/src/eventdb.cpp new file mode 100644 index 000000000000..9b162416bf31 --- /dev/null +++ b/src/sonic-eventd/src/eventdb.cpp @@ -0,0 +1,30 @@ +#include "eventconsume.h" +#include + +static EventConsume *evtd_instance = NULL; + +void signalHandler(const int signal) { + SWSS_LOG_NOTICE("in signalHandler"); + + if (signal == SIGINT) { + evtd_instance->read_eventd_config(); + } +} + +int main() +{ + swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG); + + swss::DBConnector eventDb("EVENT_DB", 0); + + // register signal SIGINT and signal handler + signal(SIGINT, signalHandler); + + EventConsume evtd(&eventDb); + evtd_instance = &evtd; + + evtd.run(); + + return 0; +} + diff --git a/src/sonic-eventd/src/eventutils.cpp b/src/sonic-eventd/src/eventutils.cpp new file mode 100644 index 000000000000..0a977b4475af --- /dev/null +++ b/src/sonic-eventd/src/eventutils.cpp @@ -0,0 +1,76 @@ +#include "eventutils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace swss; +using json = nlohmann::json; + +bool isValidSeverity(string severityStr) { + transform(severityStr.begin(), severityStr.end(), severityStr.begin(), ::toupper); + if (severityStr == EVENT_SEVERITY_MAJOR_STR) return true; + if (severityStr == EVENT_SEVERITY_CRITICAL_STR) return true; + if (severityStr == EVENT_SEVERITY_MINOR_STR) return true; + if (severityStr == EVENT_SEVERITY_WARNING_STR) return true; + if (severityStr == EVENT_SEVERITY_INFORMATIONAL_STR) return true; + return false; +} + +bool isValidEnable(string enableStr) { + if (enableStr == EVENT_ENABLE_TRUE_STR) return true; + if (enableStr == EVENT_ENABLE_FALSE_STR) return true; + return false; +} + +bool parse_config(const char *filename, unsigned int& days, unsigned int& count) { + days = EHT_MAX_DAYS; + count = EHT_MAX_ELEMS; + std::ifstream ifs(filename); + json j = json::parse(ifs); + for (json::iterator it = j.begin(); it != j.end(); ++it) { + if(it.key() == "max-days") { + days = it.value(); + } + if(it.key() == "max-records") { + count = it.value(); + } + } + return true; +} + +bool parse(const char *filename, EventMap& tmp_event_table) { + ifstream fs(filename); + if (!fs.is_open()) { + return false; + } + + fstream file(filename, fstream::in); + json j; + file >> j; + + if (j["events"].size() == 0) { + SWSS_LOG_ERROR("No entries in 'events' field in %s", filename); + return false; + } + + for (size_t i = 0; i < j["events"].size(); i++) { + auto elem = j["events"][i]; + struct EventInfo_t ev_info; + string ev_name = elem["name"]; + ev_info.severity = elem["severity"]; + ev_info.enable = elem["enable"]; + ev_info.static_event_msg = elem["message"]; + tmp_event_table.emplace(ev_name, ev_info); + } + + return true; +} + diff --git a/src/sonic-eventd/src/eventutils.h b/src/sonic-eventd/src/eventutils.h new file mode 100644 index 000000000000..8ec6d39558b9 --- /dev/null +++ b/src/sonic-eventd/src/eventutils.h @@ -0,0 +1,43 @@ +#ifndef __EVENTUTILS_H__ +#define __EVENTUTILS_H__ + +#include +#include + +using namespace std; + +const string EVENT_SEVERITY_CRITICAL_STR = "CRITICAL"; +const string EVENT_SEVERITY_MAJOR_STR = "MAJOR"; +const string EVENT_SEVERITY_MINOR_STR = "MINOR"; +const string EVENT_SEVERITY_WARNING_STR = "WARNING"; +const string EVENT_SEVERITY_INFORMATIONAL_STR = "INFORMATIONAL"; + +const string EVENT_ENABLE_TRUE_STR = "true"; +const string EVENT_ENABLE_FALSE_STR = "false"; + +const string EVENT_ACTION_RAISE_STR = "RAISE"; +const string EVENT_ACTION_CLEAR_STR = "CLEAR"; +const string EVENT_ACTION_ACK_STR = "ACKNOWLEDGE"; +const string EVENT_ACTION_UNACK_STR = "UNACKNOWLEDGE"; + +constexpr char EVENTD_DEFAULT_MAP_FILE[] = "/etc/evprofile/default.json"; + +constexpr size_t EHT_MAX_ELEMS = 40000; +constexpr size_t EHT_MAX_DAYS = 30; +constexpr char EVENTD_CONF_FILE[] = "/etc/eventd.json"; + +typedef struct EventInfo_t { + string severity; + string enable; + string static_event_msg; +} EventInfo; + +//unordered_map static_event_table; +typedef unordered_map EventMap; + +bool isValidSeverity(string severityStr); +bool isValidEnable(string enableStr); +bool parse_config(const char *filename, unsigned int& days, unsigned int& count); +bool parse(const char *filename, EventMap& tmp_event_table); + +#endif diff --git a/src/sonic-eventd/src/loghandler.cpp b/src/sonic-eventd/src/loghandler.cpp new file mode 100755 index 000000000000..d560ccc2e0e3 --- /dev/null +++ b/src/sonic-eventd/src/loghandler.cpp @@ -0,0 +1,37 @@ +#include +#include + +extern "C" void openSyslog() { + openlog (NULL, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL4); +} + +extern "C" void writeToSyslog(std::string ev_id, int ev_sev, std::string ev_type, std::string ev_act, std::string ev_msg, std::string ev_static_msg) { + int SYSLOG_FACILITY = LOG_LOCAL4; + if (ev_act.empty()) { + const char LOG_FORMAT[] = "[%s], %%%s: %s %s"; + // event Type + // Event Name + // Static Desc + // Dynamic Desc + + // raise a syslog message + syslog(LOG_MAKEPRI(ev_sev, SYSLOG_FACILITY), LOG_FORMAT, + ev_type.c_str(), + ev_id.c_str(), ev_static_msg.c_str(), ev_msg.c_str()); + } else { + const char LOG_FORMAT[] = "[%s] (%s), %%%s: %s %s"; + // event Type + // event action + // Event Name + // Static Desc + // Dynamic Desc + // raise a syslog message + syslog(LOG_MAKEPRI(ev_sev, SYSLOG_FACILITY), LOG_FORMAT, + ev_type.c_str(), ev_act.c_str(), + ev_id.c_str(), ev_static_msg.c_str(), ev_msg.c_str()); + } +} + +extern "C" void closeSyslog() { + closelog (); +} diff --git a/src/sonic-eventd/src/loghandler.h b/src/sonic-eventd/src/loghandler.h new file mode 100644 index 000000000000..1e98a940152f --- /dev/null +++ b/src/sonic-eventd/src/loghandler.h @@ -0,0 +1,5 @@ +#include +extern "C" void openSyslog(); +extern "C" void writeToSyslog(std::string ev_id, int ev_sev, std::string ev_type, std::string ev_act, std::string ev_msg, std::string ev_static_msg); +extern "C" void closeSyslog(); + diff --git a/src/sonic-eventd/src/subdir.mk b/src/sonic-eventd/src/subdir.mk index eb067fd6e86c..1b03f3d4b040 100644 --- a/src/sonic-eventd/src/subdir.mk +++ b/src/sonic-eventd/src/subdir.mk @@ -1,9 +1,11 @@ CC := g++ -TEST_OBJS += ./src/eventd.o +TEST_OBJS += ./src/eventd.o ./src/eventconsume.o ./src/eventutils.o ./src/loghandler.o +EVENTDB_TEST_OBJS += ./src/eventd.o ./src/eventconsume.o ./src/eventutils.o ./src/loghandler.o OBJS += ./src/eventd.o ./src/main.o +EVENTDB_OBJS += ./src/eventdb.o ./src/eventconsume.o ./src/loghandler.o ./src/eventutils.o -C_DEPS += ./src/eventd.d ./src/main.d +C_DEPS += ./src/eventd.d ./src/main.d ./src/eventdb.d ./src/eventconsume.d ./src/loghandler.d ./src/eventutils.d src/%.o: src/%.cpp @echo 'Building file: $<' diff --git a/src/sonic-eventd/tests/default.json b/src/sonic-eventd/tests/default.json new file mode 100755 index 000000000000..9a1511188513 --- /dev/null +++ b/src/sonic-eventd/tests/default.json @@ -0,0 +1,24 @@ +{ + "__README__" : "This is default map of events that eventd uses. Developer can modify this file and send SIGINT to eventd to make it read and use the updated file. Alternatively developer can test the new event by adding it to a custom event profile and use 'event profile ' command to apply that profile without having to send SIGINT to eventd. Developer need to commit default.json file with the new event after testing it out. Supported severities are: CRITICAL, MAJOR, MINOR, WARNING and INFORMATIONAL. Supported enable flag values are: true and false.", + "events":[ + { + "name": "SYSTEM_STATE", + "severity": "INFORMATIONAL", + "enable": "true", + "message" : "" + }, + { + "name": "SENSOR_TEMP_HIGH", + "severity": "WARNING", + "enable": "true", + "message" : "" + }, + { + "name": "USER_LOGIN", + "severity": "INFORMATIONAL", + "enable": "true", + "message" : "" + } + ] +} + diff --git a/src/sonic-eventd/tests/eventd.json b/src/sonic-eventd/tests/eventd.json new file mode 100755 index 000000000000..2236e0c6a9e6 --- /dev/null +++ b/src/sonic-eventd/tests/eventd.json @@ -0,0 +1,5 @@ +{ + "__README__": "Specify size of event history table. Whichever limit is hit first, eventd wraps event history table around and deletes older records.", + "max-records": 200, + "max-days": 30 +} diff --git a/src/sonic-eventd/tests/eventdb_database_config.json b/src/sonic-eventd/tests/eventdb_database_config.json new file mode 100644 index 000000000000..3fb6d6339af1 --- /dev/null +++ b/src/sonic-eventd/tests/eventdb_database_config.json @@ -0,0 +1,107 @@ +{ + "INSTANCES": { + "redis":{ + "hostname" : "127.0.0.1", + "port": 6379, + "unix_socket_path": "/var/run/redis/redis.sock" + }, + "redis1":{ + "hostname" : "127.0.0.1", + "port": 6380, + "unix_socket_path": "/var/run/redis/redis1.sock" + }, + "redis2":{ + "hostname" : "127.0.0.1", + "port": 6381, + "unix_socket_path": "/var/run/redis/redis2.sock" + }, + "redis3":{ + "hostname" : "127.0.0.1", + "port": 6382, + "unix_socket_path": "/var/run/redis/redis3.sock" + }, + "redis4":{ + "hostname" : "127.0.0.1", + "port": 6383, + "unix_socket_path": "/var/run/redis/redis4.sock" + } + }, + "DATABASES" : { + "APPL_DB" : { + "id" : 0, + "separator": ":", + "instance" : "redis" + }, + "ASIC_DB" : { + "id" : 1, + "separator": ":", + "instance" : "redis" + }, + "COUNTERS_DB" : { + "id" : 2, + "separator": ":", + "instance" : "redis" + }, + "CONFIG_DB" : { + "id" : 4, + "separator": "|", + "instance" : "redis" + }, + "PFC_WD_DB" : { + "id" : 5, + "separator": ":", + "instance" : "redis" + }, + "FLEX_COUNTER_DB" : { + "id" : 5, + "separator": ":", + "instance" : "redis" + }, + "STATE_DB" : { + "id" : 6, + "separator": "|", + "instance" : "redis" + }, + "SNMP_OVERLAY_DB" : { + "id" : 7, + "separator": "|", + "instance" : "redis" + }, + "RESTAPI_DB": { + "id": 8, + "separator": "|", + "instance": "redis" + }, + "GB_ASIC_DB": { + "id": 9, + "separator": ":", + "instance": "redis" + }, + "GB_COUNTERS_DB": { + "id": 10, + "separator": ":", + "instance": "redis" + }, + "GB_FLEX_COUNTER_DB": { + "id": 11, + "separator": ":", + "instance": "redis" + }, + "STATE_DB2" : { + "id" : 13, + "separator": "|", + "instance" : "redis" + }, + "APPL_STATE_DB" : { + "id" : 14, + "separator": ":", + "instance" : "redis" + }, + "EVENT_DB" : { + "id" : 15, + "separator": ":", + "instance" : "redis" + } + }, + "VERSION" : "1.0" +} diff --git a/src/sonic-eventd/tests/eventdb_database_config_global.json b/src/sonic-eventd/tests/eventdb_database_config_global.json new file mode 100644 index 000000000000..e55e1ab1b181 --- /dev/null +++ b/src/sonic-eventd/tests/eventdb_database_config_global.json @@ -0,0 +1,8 @@ +{ + "INCLUDES" : [ + { + "include" : "eventdb_database_config.json" + } + ], + "VERSION" : "1.0" +} diff --git a/src/sonic-eventd/tests/eventdb_ut.cpp b/src/sonic-eventd/tests/eventdb_ut.cpp new file mode 100644 index 000000000000..a3e9e4aba8f6 --- /dev/null +++ b/src/sonic-eventd/tests/eventdb_ut.cpp @@ -0,0 +1,434 @@ +#include +#include "gtest/gtest.h" +#include "events_common.h" +#include "events.h" +#include +#include "dbconnector.h" +#include + +#include +#include "../src/eventd.h" +#include "../src/eventconsume.h" + +using namespace std; +using namespace swss; + +extern volatile bool g_run; +extern uint64_t seq_id; +extern uint64_t PURGE_SECONDS; +extern unordered_map cal_lookup_map; +typedef pair pi; +extern priority_queue, greater > event_history_list; +extern EventMap static_event_table; + + +#define TEST_DB "APPL_DB" +#define TEST_NAMESPACE "asic0" +#define INVALID_NAMESPACE "invalid" + +//extern void run_pub(void *mock_pub, const string wr_source, internal_events_lst_t &lst); + +string existing_file = "./tests//eventdb_database_config.json"; +string nonexisting_file = "./tests//database_config_nonexisting.json"; +string global_existing_file = "./tests//eventdb_database_global.json"; + + + +typedef struct { + map ev_data; + string severity; +} ev_data_struct; +map, ev_data_struct> event_data_t = { + {{"SYSTEM_STATE","NOTIFY"}, + {{{"type-id","SYSTEM_STATE"}, {"resource", "system-state"}, {"text", "System Ready"}}, + "INFORMATIONAL"}}, + {{"INTERFACE_OPER_STATE", "NOTIFY"}, + {{{"type-id", "INTERFACE_OPER_STATE"}, {"resource", "Ethernet1"}, {"text", "Operational Down"}, {"state", "up"}}, + "INFORMATIONAL"}}, + {{"SENSOR_TEMP_HIGH", "RAISE"}, + {{{"type-id", "SENSOR_TEMP_HIGH"}, {"resource", "cpu_sensor"}, {"action", "RAISE"}, {"text", "sensor temp 55C, threshold temp 52C"}}, + "WARNING"}}, + {{"SENSOR_TEMP_HIGH", "CLEAR"}, + {{{"type-id", "SENSOR_TEMP_HIGH"}, {"resource", "cpu_sensor"}, {"action", "CLEAR"}, {"text", "sensor temp 50C, threshold temp 52C"}}, + "WARNING"}} +}; + + +typedef struct { + int id; + string source; + string tag; + string rid; + string seq; +} test_data_t; + +const string event_profile = "tests/default.json"; +const string event_db_profile = "tests/eventd.json"; + +void delete_evdb(DBConnector& dbConn) +{ + auto keys = dbConn.keys("*"); + for (const auto& key : keys) + { + dbConn.del(key); + } +} + +void clear_eventdb_data() +{ + g_run = true; + seq_id =0; + cal_lookup_map.clear(); + PURGE_SECONDS = 86400; + event_history_list = priority_queue, greater >(); + static_event_table.clear(); +} + +void run_pub(void *mock_pub, const string wr_source, internal_events_lst_t &lst) +{ + for(internal_events_lst_t::const_iterator itc = lst.begin(); itc != lst.end(); ++itc) { + EXPECT_EQ(0, zmq_message_send(mock_pub, wr_source, *itc)); + } +} + +class EventDbFixture : public ::testing::Test { + protected: + void SetUp() override { + zctx = zmq_ctx_new(); + EXPECT_TRUE(NULL != zctx); + + /* Run proxy to enable receive as capture test needs to receive */ + pxy = new eventd_proxy(zctx); + EXPECT_TRUE(NULL != pxy); + + /* Starting proxy */ + EXPECT_EQ(0, pxy->init()); + DBConnector eventDb("EVENT_DB", 0, true); + //delete any entries in the EVENT_DB + delete_evdb(eventDb); + + try + { + /* Start Eventdb in a separate thread*/ + evtConsume= new EventConsume(&eventDb, event_profile, event_db_profile); + thread thr(&EventConsume::run, evtConsume); + thr.detach(); + } + catch (exception &e) + { + printf("EventDbFixture::SetUP: Unable to get DB Connector, e=(%s)\n", e.what()); + } + } + + void TearDown() override { + delete evtConsume; + evtConsume = NULL; + delete pxy; + pxy= NULL; + zmq_ctx_term(zctx); + zctx = NULL; + clear_eventdb_data(); + } + EventConsume *evtConsume; + void *zctx; + eventd_proxy *pxy; +}; + + +void *init_publish(void *zctx) +{ + void *mock_pub = zmq_socket (zctx, ZMQ_PUB); + EXPECT_TRUE(NULL != mock_pub); + EXPECT_EQ(0, zmq_connect(mock_pub, get_config(XSUB_END_KEY).c_str())); + + /* Provide time for async connect to complete */ + this_thread::sleep_for(chrono::milliseconds(200)); + + return mock_pub; +} + +internal_event_t create_ev(const int id, const int ev_id, const string& event, + const string& action, + map> &verify_data) +{ + internal_event_t event_data; + stringstream ss; + + test_data_t data; + data.id = id; + data.source = "source" + to_string(id); + data.tag = "tag" + to_string(id); + data.rid = "guid-" + to_string(id); + data.seq = to_string(id); + + event_data[EVENT_STR_DATA] = convert_to_json( + data.source + ":" + data.tag, map(event_data_t[make_pair(event, action)].ev_data)); + event_data[EVENT_RUNTIME_ID] = data.rid; + event_data[EVENT_SEQUENCE] = data.seq; + auto timepoint = system_clock::now(); + ss << duration_cast(timepoint.time_since_epoch()).count(); + + event_data[EVENT_EPOCH] = ss.str(); + unordered_map ev_val(event_data_t[make_pair(event, action)].ev_data.begin(), + event_data_t[make_pair(event, action)].ev_data.end()); + ev_val.insert({{"id", to_string(ev_id)}}); + ev_val.insert({{"time-created", ss.str()}}); + ev_val.insert({{"severity", event_data_t[make_pair(event, action)].severity}}); + + if (action == "RAISE") { + ev_val.insert({{"acknowledged", "false"}}); + ev_val.insert({{"action", action}}); + } + verify_data.insert({to_string(ev_id), ev_val}); + + return event_data; +} + + +void verify_events(map> verifyData) +{ + DBConnector eventDb("EVENT_DB", 0, true); + auto dbKeys = eventDb.keys("EVENT:*"); + EXPECT_EQ(verifyData.size(), dbKeys.size()); + + for (const auto& vKey : verifyData) + { + string evtKey = "EVENT:" + vKey.first; + EXPECT_TRUE(count(dbKeys.begin(), dbKeys.end(), evtKey) == 1); + auto ev = eventDb.hgetall(evtKey); + EXPECT_TRUE(ev == verifyData[vKey.first]); + } +} + + +void verify_alarms_clear(map> verifyData) +{ + DBConnector eventDb("EVENT_DB", 0, true); + auto dbKeys = eventDb.keys("ALARM:*"); + EXPECT_EQ(0, dbKeys.size()); +} + +void verify_alarms_raise(map> verifyData) +{ + DBConnector eventDb("EVENT_DB", 0, true); + auto dbKeys = eventDb.keys("ALARM:*"); + EXPECT_EQ(verifyData.size(), dbKeys.size()); + + for (const auto& vKey : verifyData) + { + string almKey = "ALARM:" + vKey.first; + EXPECT_TRUE(count(dbKeys.begin(), dbKeys.end(), almKey) == 1); + auto ev = eventDb.hgetall(almKey); + EXPECT_TRUE(ev == verifyData[vKey.first]); + } +} + +TEST_F(EventDbFixture, validate_events) +{ + printf("Validate events TEST started\n"); + + internal_events_lst_t wr_evts; + string wr_source("eventd-test"); + + void *mock_pub = init_publish(zctx); + + map> verify_data; + + wr_evts.push_back(create_ev(1, 1, "SENSOR_TEMP_HIGH", "RAISE", verify_data)); + wr_evts.push_back(create_ev(2, 2, "SYSTEM_STATE", "NOTIFY", verify_data)); + + run_pub(mock_pub, wr_source, wr_evts); + + this_thread::sleep_for(chrono::milliseconds(2000)); + + // verify events logged in DB. + verify_events(verify_data); + + //send events to close eventdb task + g_run = false; + wr_evts.clear(); + wr_evts.push_back(create_ev(301, 3, "SYSTEM_STATE", "NOTIFY", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + zmq_close(mock_pub); + + printf("Validate events TEST completed\n"); + +} + + +TEST_F(EventDbFixture, validate_alarms) +{ + printf("Validate alarms TEST started\n"); + internal_events_lst_t wr_evts; + string wr_source("eventd-test"); + void *mock_pub = init_publish(zctx); + + map> verify_data; + wr_evts.push_back(create_ev(3, 1, "SENSOR_TEMP_HIGH", "RAISE", verify_data)); + + run_pub(mock_pub, wr_source, wr_evts); + + this_thread::sleep_for(chrono::milliseconds(2000)); + + // verify events logged in DB. + verify_events(verify_data); + verify_alarms_raise(verify_data); + + wr_evts.clear(); + wr_evts.push_back(create_ev(4, 2, "SENSOR_TEMP_HIGH", "CLEAR", verify_data)); + + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + verify_events(verify_data); + verify_alarms_clear(verify_data); + g_run = false; + wr_evts.clear(); + wr_evts.push_back(create_ev(302, 3, "SYSTEM_STATE", "NOTIFY", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + zmq_close(mock_pub); + printf("Validate alarms TEST completed\n"); +} + + +TEST_F(EventDbFixture, expiry_purge) +{ + printf("Expiry purge TEST started\n"); + internal_events_lst_t wr_evts; + string wr_source("eventd-test"); + void *mock_pub = init_publish(zctx); + map> verify_data; + //set epoch time back to 31 days + auto timepoint = system_clock::now(); + auto epochTimeNs = duration_cast(timepoint.time_since_epoch()).count(); + epochTimeNs = epochTimeNs - (32UL * 24 * 60 * 60 * 1000 * 1000 * 1000); + auto ev_data = create_ev(5, 1, "SENSOR_TEMP_HIGH", "RAISE", verify_data); + + ev_data[EVENT_EPOCH] = to_string(epochTimeNs); + verify_data["1"]["time-created"] = ev_data[EVENT_EPOCH]; + wr_evts.push_back(ev_data); + + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + + // verify events logged in DB. + verify_events(verify_data); + verify_alarms_raise(verify_data); + + wr_evts.clear(); + verify_data.clear(); + wr_evts.push_back(create_ev(6, 2, "SENSOR_TEMP_HIGH", "CLEAR", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + + verify_events(verify_data); + verify_alarms_clear(verify_data); + wr_evts.clear(); + g_run = false; + wr_evts.push_back(create_ev(303, 3, "SYSTEM_STATE", "NOTIFY", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + zmq_close(mock_pub); + printf("Expiry purge TEST completed\n"); +} + + +TEST_F(EventDbFixture, rollover_purge) +{ + printf("Rollover purge TEST started\n"); + internal_events_lst_t wr_evts; + string wr_source("eventd-test"); + void *mock_pub = init_publish(zctx); + map> verify_data; + int i=0,j=6; + + for (; i <= 198; i+=2, j+=2) + { + wr_evts.push_back(create_ev(j+1, i+1, "SENSOR_TEMP_HIGH", "RAISE", verify_data)); + wr_evts.push_back(create_ev(j+2, i+2, "SENSOR_TEMP_HIGH", "CLEAR", verify_data)); + } + + run_pub(mock_pub, wr_source, wr_evts); + + this_thread::sleep_for(chrono::milliseconds(5000)); + + // verify events logged in DB. + verify_events(verify_data); + // This will make it out of limit + wr_evts.push_back(create_ev(j+1, i+1, "SENSOR_TEMP_HIGH", "RAISE", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + + this_thread::sleep_for(chrono::milliseconds(2000)); + + DBConnector eventDb("EVENT_DB", 0, true); + auto dbKeys = eventDb.keys("EVENT:*"); + EXPECT_EQ(200, dbKeys.size()); + EXPECT_TRUE(count(dbKeys.begin(), dbKeys.end(), "EVENT:1") == 0); + EXPECT_TRUE(count(dbKeys.begin(), dbKeys.end(), "EVENT:2") == 1); + EXPECT_TRUE(count(dbKeys.begin(), dbKeys.end(), "EVENT:" + to_string(i+1)) == 1); + g_run = false; + wr_evts.push_back(create_ev(303, 3, "SYSTEM_STATE", "NOTIFY", verify_data)); + run_pub(mock_pub, wr_source, wr_evts); + this_thread::sleep_for(chrono::milliseconds(2000)); + zmq_close(mock_pub); + printf("Rollover purge TEST completed\n"); +} + +class SwsscommonEnvironment : public ::testing::Environment { +public: + // Override this to define how to set up the environment + void SetUp() override { + // by default , init should be false + cout << "Default : isInit = " << SonicDBConfig::isInit() << endl; + EXPECT_FALSE(SonicDBConfig::isInit()); + EXPECT_THROW(SonicDBConfig::initialize(nonexisting_file), runtime_error); + + EXPECT_FALSE(SonicDBConfig::isInit()); + + // load local config file, init should be true + SonicDBConfig::initialize(existing_file); + cout << "INIT: load local db config file, isInit = " << SonicDBConfig::isInit() << endl; + EXPECT_TRUE(SonicDBConfig::isInit()); + + // Test the database_global.json file + // by default , global_init should be false + cout << "Default : isGlobalInit = " << SonicDBConfig::isGlobalInit() << endl; + EXPECT_FALSE(SonicDBConfig::isGlobalInit()); + + // Call an API which actually needs the data populated by SonicDBConfig::initializeGlobalConfig +// EXPECT_THROW(SonicDBConfig::getDbId(EVENT_DB, TEST_NAMESPACE), runtime_error); + + // load local global file, init should be true + SonicDBConfig::initializeGlobalConfig(global_existing_file); + cout<<"INIT: load global db config file, isInit = "<' command to apply that profile without having to send SIGINT to eventd. Developer need to commit default.json file with the new event after testing it out. Supported severities are: CRITICAL, MAJOR, MINOR, WARNING and INFORMATIONAL. Supported enable flag values are: true and false.", + "events":[ + + ] +} + \ No newline at end of file diff --git a/src/sonic-yang-models/yang-models/sonic-alarm.yang b/src/sonic-yang-models/yang-models/sonic-alarm.yang new file mode 100644 index 000000000000..e54ad69858dd --- /dev/null +++ b/src/sonic-yang-models/yang-models/sonic-alarm.yang @@ -0,0 +1,142 @@ + module sonic-alarm { + + namespace "http://github.com/sonic-net/sonic-alarm"; + prefix salarm; + yang-version 1.1; + + import sonic-event { + prefix event; + } + + // meta + organization + "SONiC"; + + contact + "SONiC"; + + description + "This module defines operational state data for SONiC alarms."; + + revision "2024-01-30" { + description + "Initial revision."; + } + + + grouping alarm-state { + + leaf id { + type string; + description "Sequence identifier for an alarm."; + } + + leaf resource { + type string; + description "The item that is under alarm within the device."; + } + + leaf text { + type string; + description "Dynamic message raised with the alarm."; + } + + leaf time-created { + type uint64; + description + "The time at which the alarm was raised by the system. + Expressed in nanoseconds since Unix epoch."; + } + + leaf type-id { + type string; + description "Type of the alarm raised"; + } + + leaf severity { + type event:severity-type; + description + "Severity of a raised condition."; + } + + leaf acknowledged { + type boolean; + description + "This denotes whether an alarm is acknowledged by the operator. + An acknowledged alarm is not considered in determining the + health of the system."; + } + + leaf acknowledge-time { + type uint64; + description + "The time at which alarm is acknowledged by the system. + This value is expressed as nanoseconds since the Unix Epoch."; + } + + } + + container sonic-alarm { + + container ALARM { + + list ALARM_LIST { + key "id"; + uses alarm-state; + } + } + + container ALARM_STATS { + + + list ALARM_STATS_LIST { + + key "id"; + leaf id { + type enumeration { + enum state; + } + description + "Table identifier value defined as state."; + } + + leaf alarms { + type uint64; + description + "Total alarms in the system."; + } + + leaf critical { + type uint64; + description + "Total critical alarms in the system."; + } + + leaf major { + type uint64; + description + "Total major alarms in the system."; + } + + leaf minor { + type uint64; + description + "Total minor alarms in the system."; + } + + leaf warning { + type uint64; + description + "Total warnings in the system."; + } + + leaf acknowledged { + type uint64; + description + "Total acknowledged alarms in the system."; + } + + } + } + } +} diff --git a/src/sonic-yang-models/yang-models/sonic-event.yang b/src/sonic-yang-models/yang-models/sonic-event.yang new file mode 100644 index 000000000000..f5bb27782ec0 --- /dev/null +++ b/src/sonic-yang-models/yang-models/sonic-event.yang @@ -0,0 +1,134 @@ + module sonic-event { + namespace "http://github.com/sonic-net/sonic-event"; + prefix sevents; + yang-version 1.1; + + organization + "SONiC"; + + contact + "SONiC"; + + description + "This module defines operational state data for SONiC events."; + + revision "2024-01-30" { + description + "Initial revision."; + } + + typedef severity-type { + type enumeration { + enum CRITICAL; + enum MAJOR; + enum MINOR; + enum WARNING; + enum INFORMATIONAL; + } + description + "Severity of a raised condition."; + } + + typedef action-type { + type enumeration { + enum RAISE; + enum CLEAR; + enum ACKNOWLEDGE; + enum UNACKNOWLEDGE; + } + description + "Action on a raised condition."; + } + + grouping event-state { + + leaf id { + type string; + description "Sequence identifier for events."; + } + + leaf resource { + type string; + description "The item in the device that raised the event."; + } + + leaf text { + type string; + description "Dynamic message raised with the event."; + } + + leaf time-created { + type uint64; + description + "The time at which the event was raised by the system. + Expressed in epoch time."; + } + + leaf type-id { + type string; + description "Type of event raised by the device."; + } + + leaf severity { + type severity-type; + description + "Severity of the event."; + } + + leaf action { + type action-type; + description "Action on the event."; + } + } + + container sonic-event { + + container EVENT { + + list EVENT_LIST { + key "id"; + uses event-state; + } + } + + container EVENT_STATS { + + + list EVENT_STATS_LIST { + + key "id"; + leaf id { + type enumeration { + enum state; + } + description + "Table identifier value defined as state."; + } + + leaf events { + type uint64; + description + "Total number of events in the system store."; + } + + leaf raised { + type uint64; + description + "Total number of events for raise operation in system store."; + } + + leaf acked { + type uint64; + description + "Total number of events for ack operation in system store."; + } + + leaf cleared { + type uint64; + description + "Total number of events for clear operation in system store."; + } + } + } + } + } diff --git a/src/sonic-yang-models/yang-models/sonic-events-common.yang b/src/sonic-yang-models/yang-models/sonic-events-common.yang index ed6c81a7b908..cac93a69c810 100644 --- a/src/sonic-yang-models/yang-models/sonic-events-common.yang +++ b/src/sonic-yang-models/yang-models/sonic-events-common.yang @@ -21,11 +21,52 @@ module sonic-events-common { "Common reusable definitions"; } + typedef action-type { + type enumeration { + enum RAISE { + description "Event with raise alarm action."; + } + enum CLEAR { + description "Event with clear alarm action."; + } + } + description + "This type defines the actions associated with an event notification."; + } + grouping sonic-events-cmn { leaf timestamp { type yang:date-and-time; description "time of the event"; } + leaf type-id { + type string; + description + "The abbreviated name of the event, for example FAN_SPEED_STATUS, + SYSTEM_STATUS, or PSU_FAULTY."; + } + + leaf resource { + type string; + description + "The item generating the event. for example eth1, cpu_sensor"; + } + + leaf text { + type string; + description + "The string used to inform operators about the event. This + MUST contain enough information for an operator to be able + to understand the problem. If this string contains structure, + this format should be clearly documented for programs to be + able to parse that information"; + } + + leaf action { + type action-type; + description + "This denotes the action associated with the event."; + } } grouping sonic-events-usage {