Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HGC Parser #68

Merged
merged 9 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- #60 - Added new provider `Seaborn` using `Html` and a new parser for Email Subject: `EmailSubjectParser`
- #66 - Added new provider `Momentum` using `Html` and `EmailSubjectParser`
- #61 - Added new provider `Colt` using `ICal` and `Csv`
- #68 - Added new provider `HGC` using `Html` and `EmailSubjectParser`

### Fixed

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
- Cogent
- Colt
- GTT
- HGC
- Lumen
- Megaport
- Momentum
Expand Down
2 changes: 2 additions & 0 deletions circuit_maintenance_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Colt,
EUNetworks,
GTT,
HGC,
Lumen,
Megaport,
Momentum,
Expand All @@ -29,6 +30,7 @@
Colt,
EUNetworks,
GTT,
HGC,
Lumen,
Megaport,
Momentum,
Expand Down
120 changes: 120 additions & 0 deletions circuit_maintenance_parser/parsers/hgc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""HGC parser."""
import logging
import re

from dateutil import parser

from circuit_maintenance_parser.parser import EmailSubjectParser, Html, Impact, CircuitImpact, Status

# pylint: disable=too-many-branches


logger = logging.getLogger(__name__)


class SubjectParserHGC1(EmailSubjectParser):
"""HGC subject parser."""

def parse_subject(self, subject):
"""Parse HGC subject string.

Examples:
HGC Maintenance Work Notification - Network to Code _ CIR0000001 (TIC00000000000001)
HGC Maintenance Work Notification - Network to Code | CIR0000001 | TIC00000000000001
"""
data = {}
search = re.search(r"^.+\((.+)\)", subject.replace("\n", ""))
if search:
data["maintenance_id"] = search.group(1)
else:
split = subject.split(" | ")
data["maintenance_id"] = split[2]
return [data]


class HtmlParserHGC1(Html):
"""HGC HTML 1 parser."""

def parse_html(self, soup):
"""Execute parsing."""
data = {}
self.parse_table(soup.find_all("table"), data)
data["status"] = Status.CONFIRMED
return [data]

def parse_table(self, tables, data):
"""Parse HTML tables.

<table>
<tr>
<td><p>Circuit ID</p></td>
<td><p>:</p></td>
<td><p>CIR00000001</p></td>
</tr>
<tr>
<td><p>Customer</p></td>
<td><p>:</p></td>
<td><p>Network to Code</p></td>
</tr>
...
</table>
"""
circuit_id = None
for table in tables:
td_elements = table.find_all("td")
for idx, td_element in enumerate(td_elements):
if "circuit id" in td_element.text.lower():
circuit_id = td_elements[idx + 2].text.strip()
elif "customer" in td_element.text.lower():
data["account"] = td_elements[idx + 2].text.strip()
elif "maintenance window start date" in td_element.text.lower():
data["start"] = self.dt2ts(parser.parse(td_elements[idx + 2].text.strip()))
elif "maintenance window end date" in td_element.text.lower():
data["end"] = self.dt2ts(parser.parse(td_elements[idx + 2].text.strip()))
elif "description" in td_element.text.lower():
data["summary"] = td_elements[idx + 2].text.strip()
elif "service impact" in td_element.text.lower():
if "down throughout maintenance window" in td_elements[idx + 2].text:
impact = Impact("OUTAGE")
else:
impact = Impact("OUTAGE")
data["circuits"] = [CircuitImpact(impact=impact, circuit_id=circuit_id)]


class HtmlParserHGC2(Html):
"""HGC HTML 2 parser."""

def parse_html(self, soup):
"""Execute parsing."""
data = {}
self.parse_body(soup.find_all("span"), data)
data["status"] = Status.CONFIRMED
return [data]

def parse_body(self, span_elements, data):
"""Parse HTML body.

<div>
<span>Circuit ID:<wbr>CIR000001</span>
<span>Customer:<wbr>Network to Code</span>
...
</div>
"""
circuit_id = None
for span_element in span_elements:
if "circuit id:" in span_element.text.lower():
circuit_id = span_element.text.split(":")[1].strip()
elif "customer:" in span_element.text.lower():
data["account"] = span_element.text.split(":")[1].strip()
elif "maintenance window start date" in span_element.text.lower():
data["start"] = self.dt2ts(parser.parse(span_element.text.split(":")[1].strip()))
elif "maintenance window end date" in span_element.text.lower():
data["end"] = self.dt2ts(parser.parse(span_element.text.split(":")[1].strip()))
elif "description:" in span_element.text.lower():
data["summary"] = span_element.text.split(":")[1].strip()
elif "service impact:" in span_element.text.lower():
if "down throughout maintenance window" in span_element.text.split(":")[1]:
impact = Impact("OUTAGE")
else:
impact = Impact("OUTAGE")
data["circuits"] = [CircuitImpact(impact=impact, circuit_id=circuit_id)]
11 changes: 11 additions & 0 deletions circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1
from circuit_maintenance_parser.parsers.colt import ICalParserColt1, CsvParserColt1
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
from circuit_maintenance_parser.parsers.hgc import HtmlParserHGC1, HtmlParserHGC2, SubjectParserHGC1
from circuit_maintenance_parser.parsers.lumen import HtmlParserLumen1
from circuit_maintenance_parser.parsers.megaport import HtmlParserMegaport1
from circuit_maintenance_parser.parsers.momentum import HtmlParserMomentum1, SubjectParserMomentum1
Expand Down Expand Up @@ -137,6 +138,16 @@ class GTT(GenericProvider):
_default_organizer = "[email protected]"


class HGC(GenericProvider):
"""HGC provider custom class."""

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserHGC1, SubjectParserHGC1]),
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserHGC2, SubjectParserHGC1]),
]
_default_organizer = "[email protected]"


class Lumen(GenericProvider):
"""Lumen provider custom class."""

Expand Down
Loading