Skip to content

Commit

Permalink
Merge pull request #76 from networktocode/release-v2.0.0
Browse files Browse the repository at this point in the history
Release v2.0.0
  • Loading branch information
chadell authored Sep 15, 2021
2 parents 9bee2dd + ec42d37 commit 1992402
Show file tree
Hide file tree
Showing 23 changed files with 956 additions and 96 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Changelog

## v2.0.0 -
## v2.0.0 - 2021-09-15

### Added

- #68 - Added new provider `HGC` using `Html` and `EmailSubjectParser`
- #73 - Added new provider `Sparkle` using `Html` and `EmailSubjectParser`. Added support for multiple maintenances with `CombinedProcessor`.
- #75 - Added new provider `AquaComms` using `Html` and `EmailSubjectParser`

### Fixed

- #72 - Ensure `NotificationData` init methods for library client do not raise exceptions and just return `None`.

## v2.0.0-beta - 2021-09-07

Expand All @@ -23,6 +28,7 @@
- #60 - Added new provider `Seaborn` using `Html` and a new parser for Email Subject: `EmailSubjectParser`
- #61 - Added new provider `Colt` using `ICal` and `Csv`
- #66 - Added new provider `Momentum` using `Html` and `EmailSubjectParser`
- #68 - Added new provider `HGC` using `Html` and `EmailSubjectParser`

### Fixed

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using

#### Supported providers based on other parsers

- AquaComms
- Cogent
- Colt
- GTT
Expand All @@ -51,6 +52,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
- Megaport
- Momentum
- Seaborn
- Sparkle
- Telstra
- Turkcell
- Verizon
Expand Down
28 changes: 8 additions & 20 deletions circuit_maintenance_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Notifications parser init."""

"""Circuit-maintenance-parser init."""
from typing import Type, Optional

from .data import NotificationData
from .output import Maintenance
from .errors import NonexistentProviderError, ProviderError
from .provider import (
GenericProvider,
AquaComms,
Cogent,
Colt,
EUNetworks,
Expand All @@ -17,6 +18,7 @@
NTT,
PacketFabric,
Seaborn,
Sparkle,
Telia,
Telstra,
Turkcell,
Expand All @@ -26,6 +28,7 @@

SUPPORTED_PROVIDERS = (
GenericProvider,
AquaComms,
Cogent,
Colt,
EUNetworks,
Expand All @@ -37,6 +40,7 @@
NTT,
PacketFabric,
Seaborn,
Sparkle,
Telia,
Telstra,
Turkcell,
Expand All @@ -60,21 +64,6 @@ def init_provider(provider_type=None) -> Optional[GenericProvider]:
return None


def init_data_raw(data_type: str, data_content: bytes) -> NotificationData:
"""Returns an instance of NotificationData from one combination of data type and content."""
return NotificationData.init(data_type, data_content)


def init_data_email(raw_email_bytes: bytes) -> NotificationData:
"""Returns an instance of NotificationData from a raw email content."""
return NotificationData.init_from_email_bytes(raw_email_bytes)


def init_data_emailmessage(email_message) -> NotificationData:
"""Returns an instance of NotificationData from an email message."""
return NotificationData.init_from_emailmessage(email_message)


def get_provider_class(provider_name: str) -> Type[GenericProvider]:
"""Returns the Provider parser class for a specific provider_type."""
provider_name = provider_name.lower()
Expand Down Expand Up @@ -107,11 +96,10 @@ def get_provider_class_from_sender(email_sender: str) -> Type[GenericProvider]:

__all__ = [
"init_provider",
"init_data_raw",
"init_data_email",
"init_data_emailmessage",
"NotificationData",
"get_provider_class",
"get_provider_class_from_sender",
"ProviderError",
"NonexistentProviderError",
"Maintenance",
]
7 changes: 4 additions & 3 deletions circuit_maintenance_parser/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import email
import click

from . import SUPPORTED_PROVIDERS, init_provider, init_data_raw, init_data_emailmessage
from . import SUPPORTED_PROVIDERS, init_provider
from .provider import ProviderError
from .data import NotificationData


@click.command()
Expand All @@ -32,15 +33,15 @@ def main(provider_type, data_file, data_type, verbose):
if str.lower(data_file[-3:]) == "eml":
with open(data_file) as email_file:
msg = email.message_from_file(email_file)
data = init_data_emailmessage(msg)
data = NotificationData.init_from_emailmessage(msg)
else:
click.echo("File format not supported, only *.eml", err=True)
sys.exit(1)

else:
with open(data_file, "rb") as raw_filename:
raw_bytes = raw_filename.read()
data = init_data_raw(data_type, raw_bytes)
data = NotificationData.init_from_raw(data_type, raw_bytes)

try:
parsed_notifications = provider.get_maintenances(data)
Expand Down
47 changes: 32 additions & 15 deletions circuit_maintenance_parser/data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Definition of Data classes."""
from typing import List, NamedTuple
import logging
from typing import List, NamedTuple, Optional, Type, Set

import email
from pydantic import BaseModel, Extra

logger = logging.getLogger(__name__)


class DataPart(NamedTuple):
"""Simplest data unit to be parsed."""
Expand All @@ -23,16 +26,26 @@ def add_data_part(self, data_type: str, data_content: bytes):
self.data_parts.append(DataPart(data_type, data_content))

@classmethod
def init(cls, data_type: str, data_content: bytes):
def init_from_raw(
cls: Type["NotificationData"], data_type: str, data_content: bytes
) -> Optional["NotificationData"]:
"""Initialize the data_parts with only one DataPart object."""
return cls(data_parts=[DataPart(data_type, data_content)])
try:
return cls(data_parts=[DataPart(data_type, data_content)])
except Exception: # pylint: disable=broad-except
logger.exception("Error found initializing data raw: %s, %s", data_type, data_content)
return None

@classmethod
def init_from_email_bytes(cls, raw_email_bytes: bytes):
def init_from_email_bytes(cls: Type["NotificationData"], raw_email_bytes: bytes) -> Optional["NotificationData"]:
"""Initialize the data_parts from an email defined as raw bytes.."""
raw_email_string = raw_email_bytes.decode("utf-8")
email_message = email.message_from_string(raw_email_string)
return cls.init_from_emailmessage(email_message)
try:
raw_email_string = raw_email_bytes.decode("utf-8")
email_message = email.message_from_string(raw_email_string)
return cls.init_from_emailmessage(email_message)
except Exception: # pylint: disable=broad-except
logger.exception("Error found initializing data from email raw bytes: %s", raw_email_bytes)
return None

@classmethod
def walk_email(cls, email_message, data_parts):
Expand All @@ -53,13 +66,17 @@ def walk_email(cls, email_message, data_parts):
data_parts.add(DataPart(part.get_content_type(), part.get_payload(decode=True)))

@classmethod
def init_from_emailmessage(cls, email_message):
def init_from_emailmessage(cls: Type["NotificationData"], email_message) -> Optional["NotificationData"]:
"""Initialize the data_parts from an email.message.Email object."""
data_parts = set()
cls.walk_email(email_message, data_parts)
try:
data_parts: Set[DataPart] = set()
cls.walk_email(email_message, data_parts)

# Adding extra headers that are interesting to be parsed
data_parts.add(DataPart("email-header-subject", email_message["Subject"].encode()))
# TODO: Date could be used to extend the "Stamp" time of a notification when not available, but we need a parser
data_parts.add(DataPart("email-header-date", email_message["Date"].encode()))
return cls(data_parts=list(data_parts))
# Adding extra headers that are interesting to be parsed
data_parts.add(DataPart("email-header-subject", email_message["Subject"].encode()))
# TODO: Date could be used to extend the "Stamp" time of a notification when not available, but we need a parser
data_parts.add(DataPart("email-header-date", email_message["Date"].encode()))
return cls(data_parts=list(data_parts))
except Exception: # pylint: disable=broad-except
logger.exception("Error found initializing data from email message: %s", email_message)
return None
78 changes: 78 additions & 0 deletions circuit_maintenance_parser/parsers/aquacomms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""AquaComms parser."""
import logging
import re
from datetime import datetime

from circuit_maintenance_parser.parser import EmailSubjectParser, Html, Impact, CircuitImpact, Status

# pylint: disable=too-many-nested-blocks, too-many-branches

logger = logging.getLogger(__name__)


class SubjectParserAquaComms1(EmailSubjectParser):
"""Parser for Seaborn subject string, email type 1."""

def parse_subject(self, subject):
"""Parse subject of email file.
Subject: Aqua Comms Planned Outage Work ISSUE=111111 PROJ=999
"""
data = {}
search = re.search(r"ISSUE=([0-9]+).PROJ=([0-9]+)", subject)
if search:
data["maintenance_id"] = search.group(1)
data["account"] = search.group(2)
return [data]


class HtmlParserAquaComms1(Html):
"""Notifications Parser for AquaComms notifications."""

def parse_html(self, soup):
"""Execute parsing."""
data = {}
self.parse_tables(soup.find_all("table"), data)
return [data]

@staticmethod
def get_tr_value(element):
"""Remove new lines and split key to value."""
return element.text.replace("\n", "").split(": ")[1].strip()

def parse_tables(self, tables, data):
"""Parse HTML tables.
<table>
<tbody>
<tr>
<td><font>Ticket Number:</font></td>
<td><font>11111</font></td>
</tr>
<tr>
<td><font>Scheduled Start Date & Time:</font></td>
<td><font>22:00 12/10/2020 GMT</font></td>
</tr>
<tr>
<td><font>Scheduled End Date & Time:</font></td>
<td><font>22:00 12/10/2020 GMT</font></td>
</tr>
...
</tbody>
</table>
"""
for table in tables:
for tr_element in table.find_all("tr"):
if "ticket number" in tr_element.text.lower():
data["maintenance_id"] = self.get_tr_value(tr_element)
elif "update" in tr_element.text.lower():
data["summary"] = tr_element.text.replace("\n", "").split(" - ")[1]
elif "scheduled start date" in tr_element.text.lower():
data["start"] = self.dt2ts(datetime.strptime(self.get_tr_value(tr_element), "%H:%M %d/%m/%Y %Z"))
elif "scheduled end date" in tr_element.text.lower():
data["end"] = self.dt2ts(datetime.strptime(self.get_tr_value(tr_element), "%H:%M %d/%m/%Y %Z"))
elif "service id" in tr_element.text.lower():
data["circuits"] = [
CircuitImpact(circuit_id=self.get_tr_value(tr_element), impact=Impact("OUTAGE"))
]
data["status"] = Status.CONFIRMED
94 changes: 94 additions & 0 deletions circuit_maintenance_parser/parsers/sparkle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Sparkle parser."""
import logging
from dateutil import parser

from circuit_maintenance_parser.errors import ParserError
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status

logger = logging.getLogger(__name__)


class HtmlParserSparkle1(Html):
"""Notifications HTML Parser 1 for Sparkle notifications.
Example:
<table>
<tbody>
<tr>
<td><p></p>Maintenance ID</td>
<td><p></p>1111 / 2222</td>
</tr>
<tr>
<td><p></p>Start Date/Time (UTC) Day 1</td>
<td><p></p>08/10/2021 03:00 UTC</td>
</tr>
<tr>
<td><p></p>End Date/Time (UTC) Day 1</td>
<td><p></p>08/10/2021 11:00 UTC</td>
</tr>
<tr>
<td><p></p>Start Date/Time (UTC) Day 2</td>
<td><p></p>08/11/2021 03:00 UTC</td>
</tr>
<tr>
<td><p></p>End Date/Time (UTC) Day 2</td>
<td><p></p>08/11/2021 11:00 UTC</td>
</tr>
...
</tbody>
</table>
"""

def parse_html(self, soup):
"""Execute parsing."""
data = {}
try:
return self.parse_tables(soup.find_all("table"), data)
except Exception as exc:
raise ParserError from exc

def clean_string(self, string):
"""Remove hex characters and new lines."""
return self.remove_hex_characters(string.replace("\n", "")).strip()

@staticmethod
def set_all_tickets(tickets, attribute, value):
"""Set the same value for all notifications."""
for ticket in tickets:
ticket[attribute] = value

def parse_tables(self, tables, data_base):
"""Parse HTML tables."""
data = []
for table in tables:
tr_elements = table.find_all("tr")
for idx, tr_element in enumerate(tr_elements):
td_elements = tr_element.find_all("td")
if "sparkle ticket number" in td_elements[0].text.lower():
tickets = self.clean_string(td_elements[1].text).split("/ ")
for ticket_id in tickets:
ticket = data_base.copy()
ticket["maintenance_id"] = ticket_id
if "start date/time" in tr_elements[idx + 1].text.lower():
start = self.clean_string(tr_elements[idx + 1].find_all("td")[1].text)
ticket["start"] = self.dt2ts(parser.parse(start))
else:
raise ParserError("Unable to find start time for ticket " + ticket_id)
if "end date/time" in tr_elements[idx + 2].text.lower():
end = self.clean_string(tr_elements[idx + 2].find_all("td")[1].text)
ticket["end"] = self.dt2ts(parser.parse(end))
else:
raise ParserError("Unable to find end time for ticket " + ticket_id)
idx += 2
data.append(ticket)
elif "circuits involved" in td_elements[0].text.lower():
self.set_all_tickets(
data,
"circuits",
[CircuitImpact(impact=Impact.OUTAGE, circuit_id=self.clean_line(td_elements[1].text))],
)
elif "description of work" in td_elements[0].text.lower():
self.set_all_tickets(data, "summary", self.clean_string(td_elements[1].text))
self.set_all_tickets(data, "status", Status.CONFIRMED)
self.set_all_tickets(data, "account", "Not Available")
return data
Loading

0 comments on commit 1992402

Please sign in to comment.