Skip to content

Commit

Permalink
Merge pull request #113 from networktocode/release-v2.0.5
Browse files Browse the repository at this point in the history
Release v2.0.5
  • Loading branch information
glennmatthews authored Nov 18, 2021
2 parents 8aeddef + 2802d43 commit a25fad5
Show file tree
Hide file tree
Showing 53 changed files with 4,194 additions and 112 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## v2.0.5 - 2021-11-18

### Fixed

- #109 - Improve handling of Zayo notifications.
- #110 - Improve handling of Telstra notifications.
- #111 - Improve handling of EXA (GTT) notifications.
- #112 - Improve handling of Equinix notifications.

## v2.0.4 - 2021-11-04

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
- Cogent
- Colt
- Equinix
- GTT
- EXA (formerly GTT)
- HGC
- Lumen
- Megaport
Expand Down
3 changes: 2 additions & 1 deletion circuit_maintenance_parser/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def init_from_emailmessage(cls: Type["NotificationData"], email_message) -> Opti
# Adding extra headers that are interesting to be parsed
data_parts.add(DataPart(EMAIL_HEADER_SUBJECT, email_message["Subject"].encode()))
data_parts.add(DataPart(EMAIL_HEADER_DATE, email_message["Date"].encode()))
return cls(data_parts=list(data_parts))
# Ensure the data parts are processed in a consistent order
return cls(data_parts=sorted(data_parts, key=lambda part: part.type))
except Exception: # pylint: disable=broad-except
logger.exception("Error found initializing data from email message: %s", email_message)
return None
8 changes: 3 additions & 5 deletions circuit_maintenance_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,9 @@ def parse_html(self, soup: ResultSet,) -> List[Dict]:
def clean_line(line):
"""Clean up of undesired characters from Html."""
try:
line = line.text.strip()
return line.text.strip()
except AttributeError:
line = line.strip()
# TODO: below may not be needed if we use `quopri.decodestring()` on the initial email file?
return line.replace("=C2", "").replace("=A0", "").replace("\r", "").replace("=", "").replace("\n", "")
return line.strip()


class EmailDateParser(Parser):
Expand All @@ -199,7 +197,7 @@ class EmailSubjectParser(Parser):
def parser_hook(self, raw: bytes):
"""Execute parsing."""
result = []
for data in self.parse_subject(self.bytes_to_string(raw)):
for data in self.parse_subject(self.bytes_to_string(raw).replace("\r", "").replace("\n", "")):
result.append(data)
return result

Expand Down
5 changes: 4 additions & 1 deletion circuit_maintenance_parser/parsers/equinix.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def _parse_b(self, b_elements, data):
Returns:
impact (Status object): impact of the maintenance notification (used in the parse table function to assign an impact for each circuit).
"""
impact = None
for b_elem in b_elements:
if "UTC:" in b_elem:
raw_time = b_elem.next_sibling
Expand All @@ -72,6 +73,8 @@ def _parse_b(self, b_elements, data):
impact = Impact.NO_IMPACT
elif "There will be service interruptions" in impact_line.next_sibling.text:
impact = Impact.OUTAGE
elif "Loss of redundancy" in impact_line:
impact = Impact.REDUCED_REDUNDANCY
return impact

def _parse_table(self, theader_elements, data, impact): # pylint: disable=no-self-use
Expand Down Expand Up @@ -105,7 +108,7 @@ def parse_subject(self, subject: str) -> List[Dict]:
List[Dict]: Returns the data object with summary and status fields.
"""
data = {}
maintenance_id = re.search(r"\[(.*)\]$", subject)
maintenance_id = re.search(r"\[([^[]*)\]$", subject)
if maintenance_id:
data["maintenance_id"] = maintenance_id[1]
data["summary"] = subject.strip().replace("\n", "")
Expand Down
26 changes: 21 additions & 5 deletions circuit_maintenance_parser/parsers/gtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class HtmlParserGTT1(Html):
"""Notifications Parser for GTT notifications."""
"""Notifications Parser for EXA (formerly GTT) notifications."""

def parse_html(self, soup):
"""Execute parsing."""
Expand All @@ -33,21 +33,37 @@ def parse_tables(self, tables, data):
if groups:
data["maintenance_id"] = groups.groups()[0]
status = groups.groups()[1]
if status == "Reminder":
if status in ("New", "Reminder"):
data["status"] = Status["CONFIRMED"]
elif status == "Update":
elif status in ("Update", "Rescheduled"):
data["status"] = Status["RE_SCHEDULED"]
elif status == "Cancelled":
data["status"] = Status["CANCELLED"]
# When a email is cancelled there is no start or end time specificed
# Setting this to 0 and 1 stops any errors from pydantic
data["start"] = 0
data["end"] = 1
elif status == "Completed":
data["status"] = Status["COMPLETED"]
elif "Start" in td_element.text:
start = parser.parse(td_element.next_sibling.next_sibling.text)
# In the case of a normal notification, we have:
# <td> <strong>TIME</strong></td>
# But in the case of a reschedule, we have:
# <td> <strong><strike>OLD TIME</strike><font>NEW TIME</font></strong></td>
next_td = td_element.next_sibling.next_sibling
strong = next_td.contents[1]
if strong.string:
start = parser.parse(strong.string)
else:
start = parser.parse(strong.contents[1].string)
data["start"] = self.dt2ts(start)
elif "End" in td_element.text:
end = parser.parse(td_element.next_sibling.next_sibling.text)
next_td = td_element.next_sibling.next_sibling
strong = next_td.contents[1]
if strong.string:
end = parser.parse(strong.string)
else:
end = parser.parse(strong.contents[1].string)
data["end"] = self.dt2ts(end)
num_columns = len(table.find_all("th"))
if num_columns:
Expand Down
4 changes: 2 additions & 2 deletions circuit_maintenance_parser/parsers/seaborn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
class SubjectParserSeaborn1(EmailSubjectParser):
"""Parser for Seaborn subject string, email type 1.
Subject: [{ACOUNT NAME}] {MAINTENACE ID} {DATE}
Subject: [{ACCOUNT NAME}] {MAINTENANCE ID} {DATE}
[Customer Direct] 1111 08/14
"""

def parse_subject(self, subject):
"""Parse subject of email file."""
data = {}
search = re.search(r".+\[(.+)\].([0-9]+).+", subject)
search = re.search(r".+\[([^#]+)\].([0-9]+).+", subject)
if search:
data["account"] = search.group(1)
data["maintenance_id"] = search.group(2)
Expand Down
10 changes: 8 additions & 2 deletions circuit_maintenance_parser/parsers/telstra.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,16 @@ def parse_tables(self, tables: ResultSet, data: Dict):
for table in tables:
for td_element in table.find_all("td"):
# TODO: We should find a more consistent way to parse the status of a maintenance note
if "Planned Maintenance has been scheduled" in td_element.text:
if "maintenance has been scheduled" in td_element.text.lower():
data["status"] = Status("CONFIRMED")
elif "This is a reminder notification to notify that a planned maintenance" in td_element.text:
elif "this is a reminder notification to notify that a planned maintenance" in td_element.text.lower():
data["status"] = Status("CONFIRMED")
elif "has been completed" in td_element.text.lower():
data["status"] = Status("COMPLETED")
elif "has been amended" in td_element.text.lower():
data["status"] = Status("RE-SCHEDULED")
elif "has been withdrawn" in td_element.text.lower():
data["status"] = Status("CANCELLED")
else:
continue
break
Expand Down
72 changes: 63 additions & 9 deletions circuit_maintenance_parser/parsers/zayo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,33 @@

from dateutil import parser

from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Html, Impact, Status

# pylint: disable=too-many-nested-blocks,no-member, too-many-branches


logger = logging.getLogger(__name__)


class SubjectParserZayo1(EmailSubjectParser):
"""Parser for Zayo subject string, email type 1.
Subject: {MESSAGE TYPE}?***{ACCOUNT NAME}***ZAYO {MAINTENANCE_ID} {URGENCY}...***
END OF WINDOW NOTIFICATION***Customer Inc.***ZAYO TTN-0000123456 Planned***
***Customer Inc***ZAYO TTN-0001234567 Emergency MAINTENANCE NOTIFICATION***
RESCHEDULE NOTIFICATION***Customer Inc***ZAYO TTN-0005423873 Planned***
"""

def parse_subject(self, subject):
"""Parse subject of email message."""
data = {}
tokens = subject.split("***")
if len(tokens) == 4:
data["account"] = tokens[1]
data["maintenance_id"] = tokens[2].split(" ")[1]
return [data]


class HtmlParserZayo1(Html):
"""Notifications Parser for Zayo notifications."""

Expand All @@ -24,6 +43,14 @@ def parse_html(self, soup):
self.parse_bs(soup.find_all("b"), data)
self.parse_tables(soup.find_all("table"), data)

if data:
if "status" not in data:
text = soup.get_text()
if "will be commencing momentarily" in text:
data["status"] = Status("IN-PROCESS")
elif "has been completed" in text:
data["status"] = Status("COMPLETED")

return [data]

def parse_bs(self, btags: ResultSet, data: dict):
Expand All @@ -32,10 +59,29 @@ def parse_bs(self, btags: ResultSet, data: dict):
if isinstance(line, bs4.element.Tag):
if line.text.lower().strip().startswith("maintenance ticket #:"):
data["maintenance_id"] = self.clean_line(line.next_sibling)
elif line.text.lower().strip().startswith("urgency:"):
urgency = self.clean_line(line.next_sibling)
if urgency == "Planned":
elif "serves as official notification" in line.text.lower():
if "will be performing maintenance" in line.text.lower():
data["status"] = Status("CONFIRMED")
elif "has cancelled" in line.text.lower():
data["status"] = Status("CANCELLED")
# Some Zayo notifications may include multiple activity dates.
# For lack of a better way to handle this, we consolidate these into a single extended activity range.
#
# For example, given:
#
# 1st Activity Date
# 01-Nov-2021 00:01 to 01-Nov-2021 05:00 ( Mountain )
# 01-Nov-2021 06:01 to 01-Nov-2021 11:00 ( GMT )
#
# 2nd Activity Date
# 02-Nov-2021 00:01 to 02-Nov-2021 05:00 ( Mountain )
# 02-Nov-2021 06:01 to 02-Nov-2021 11:00 ( GMT )
#
# 3rd Activity Date
# 03-Nov-2021 00:01 to 03-Nov-2021 05:00 ( Mountain )
# 03-Nov-2021 06:01 to 03-Nov-2021 11:00 ( GMT )
#
# our end result would be (start: "01-Nov-2021 06:01", end: "03-Nov-2021 11:00")
elif "activity date" in line.text.lower():
logger.info("Found 'activity date': %s", line.text)
for sibling in line.next_siblings:
Expand All @@ -44,9 +90,15 @@ def parse_bs(self, btags: ResultSet, data: dict):
if "( GMT )" in text:
window = self.clean_line(sibling).strip("( GMT )").split(" to ")
start = parser.parse(window.pop(0))
data["start"] = self.dt2ts(start)
start_ts = self.dt2ts(start)
# Keep the earliest of any listed start times
if "start" not in data or data["start"] > start_ts:
data["start"] = start_ts
end = parser.parse(window.pop(0))
data["end"] = self.dt2ts(end)
end_ts = self.dt2ts(end)
# Keep the latest of any listed end times
if "end" not in data or data["end"] < end_ts:
data["end"] = end_ts
break
elif line.text.lower().strip().startswith("reason for maintenance:"):
data["summary"] = self.clean_line(line.next_sibling)
Expand Down Expand Up @@ -80,10 +132,12 @@ def parse_tables(self, tables: ResultSet, data: Dict):
number_of_circuits = int(len(data_rows) / 5)
for idx in range(number_of_circuits):
data_circuit = {}
data_circuit["circuit_id"] = self.clean_line(data_rows[0 + idx])
impact = self.clean_line(data_rows[1 + idx])
data_circuit["circuit_id"] = self.clean_line(data_rows[0 + 5 * idx])
impact = self.clean_line(data_rows[1 + 5 * idx])
if "hard down" in impact.lower():
data_circuit["impact"] = Impact("OUTAGE")
circuits.append(CircuitImpact(**data_circuit))
elif "no expected impact" in impact.lower():
data_circuit["impact"] = Impact("NO-IMPACT")
circuits.append(CircuitImpact(**data_circuit))
if circuits:
data["circuits"] = circuits
15 changes: 10 additions & 5 deletions circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1
from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -94,7 +94,7 @@ def filter_check(filter_dict: Dict, data: NotificationData, filter_type: str) ->
if filter_data_type not in filter_dict:
continue

data_part_content = data_part.content.decode()
data_part_content = data_part.content.decode().replace("\r", "").replace("\n", "")
if any(re.search(filter_re, data_part_content) for filter_re in filter_dict[filter_data_type]):
logger.debug("Matching %s filter expression for %s.", filter_type, data_part_content)
return True
Expand Down Expand Up @@ -201,6 +201,8 @@ class Colt(GenericProvider):
class Equinix(GenericProvider):
"""Equinix provider custom class."""

_include_filter = {EMAIL_HEADER_SUBJECT: ["Network Maintenance"]}

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[HtmlParserEquinix, SubjectParserEquinix, EmailDateParser]),
]
Expand All @@ -214,12 +216,15 @@ class EUNetworks(GenericProvider):


class GTT(GenericProvider):
"""GTT provider custom class."""
"""EXA (formerly GTT) provider custom class."""

# "Planned Work Notification", "Emergency Work Notification"
_include_filter = {EMAIL_HEADER_SUBJECT: ["Work Notification"]}

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserGTT1]),
]
_default_organizer = "InfraCo.CM@gttcorp.org"
_default_organizer = "InfraCo.CM@exainfra.net"


class HGC(GenericProvider):
Expand Down Expand Up @@ -330,6 +335,6 @@ class Zayo(GenericProvider):
"""Zayo provider custom class."""

_processors: List[GenericProcessor] = [
SimpleProcessor(data_parsers=[HtmlParserZayo1]),
CombinedProcessor(data_parsers=[EmailDateParser, SubjectParserZayo1, HtmlParserZayo1]),
]
_default_organizer = "[email protected]"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "circuit-maintenance-parser"
version = "2.0.4"
version = "2.0.5"
description = "Python library to parse Circuit Maintenance notifications and return a structured data back"
authors = ["Network to Code <[email protected]>"]
license = "Apache-2.0"
Expand Down
Loading

0 comments on commit a25fad5

Please sign in to comment.