Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparkle Provider #73

Merged
merged 14 commits into from
Sep 14, 2021
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ By default, there is a `GenericProvider` that support a `SimpleProcessor` using
- Megaport
- Momentum
- Seaborn
- Sparkle
- Telstra
- Turkcell
- Verizon
Expand Down
2 changes: 2 additions & 0 deletions circuit_maintenance_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NTT,
PacketFabric,
Seaborn,
Sparkle,
Telia,
Telstra,
Turkcell,
Expand All @@ -37,6 +38,7 @@
NTT,
PacketFabric,
Seaborn,
Sparkle,
Telia,
Telstra,
Turkcell,
Expand Down
94 changes: 94 additions & 0 deletions circuit_maintenance_parser/parsers/sparkle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Sparkle parser."""
import logging
from dateutil import parser

from circuit_maintenance_parser.errors import ParserError
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status

logger = logging.getLogger(__name__)


class HtmlParserSparkle1(Html):
"""Notifications HTML Parser 1 for Sparkle notifications.

Example:
<table>
<tbody>
<tr>
<td><p></p>Maintenance ID</td>
<td><p></p>1111 / 2222</td>
</tr>
<tr>
<td><p></p>Start Date/Time (UTC) Day 1</td>
<td><p></p>08/10/2021 03:00 UTC</td>
</tr>
<tr>
<td><p></p>End Date/Time (UTC) Day 1</td>
<td><p></p>08/10/2021 11:00 UTC</td>
</tr>
<tr>
<td><p></p>Start Date/Time (UTC) Day 2</td>
<td><p></p>08/11/2021 03:00 UTC</td>
</tr>
<tr>
<td><p></p>End Date/Time (UTC) Day 2</td>
<td><p></p>08/11/2021 11:00 UTC</td>
</tr>
...
</tbody>
</table>
"""

def parse_html(self, soup):
"""Execute parsing."""
data = {}
try:
return self.parse_tables(soup.find_all("table"), data)
except Exception as exc:
raise ParserError from exc

def clean_string(self, string):
"""Remove hex characters and new lines."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my own education, can you point out in the test files some examples of hex characters that are needing to be removed? I'm not sure I understand what is meant by that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inside the sparkle test you can see strange charecters being used. Maybe they arent HEX, i can change the name if needed.
1111111=C2=A0/ 22222=C2=A0/ 33333 is in the sparkle test where its really meant to be 1111111 / 22222 / 33333

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. I thought those were supposed to be removed by our use of quopri in the base HTML parser class. Are you seeing cases where these characters are still present in the processed HTML being passed through to the Sparkle parser?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think I saw this in another parser but unsure which as it was a while ago.

return self.remove_hex_characters(string.replace("\n", "")).strip()

@staticmethod
def set_all_tickets(tickets, attribute, value):
"""Set the same value for all notifications."""
for ticket in tickets:
ticket[attribute] = value

def parse_tables(self, tables, data_base):
"""Parse HTML tables."""
glennmatthews marked this conversation as resolved.
Show resolved Hide resolved
data = []
for table in tables:
tr_elements = table.find_all("tr")
for idx, tr_element in enumerate(tr_elements):
td_elements = tr_element.find_all("td")
if "sparkle ticket number" in td_elements[0].text.lower():
tickets = self.clean_string(td_elements[1].text).split("/ ")
for ticket_id in tickets:
ticket = data_base.copy()
ticket["maintenance_id"] = ticket_id
if "start date/time" in tr_elements[idx + 1].text.lower():
start = self.clean_string(tr_elements[idx + 1].find_all("td")[1].text)
ticket["start"] = self.dt2ts(parser.parse(start))
else:
raise ParserError("Unable to find start time for ticket " + ticket_id)
if "end date/time" in tr_elements[idx + 2].text.lower():
end = self.clean_string(tr_elements[idx + 2].find_all("td")[1].text)
ticket["end"] = self.dt2ts(parser.parse(end))
else:
raise ParserError("Unable to find end time for ticket " + ticket_id)
idx += 2
data.append(ticket)
elif "circuits involved" in td_elements[0].text.lower():
self.set_all_tickets(
data,
"circuits",
[CircuitImpact(impact=Impact.OUTAGE, circuit_id=self.clean_line(td_elements[1].text))],
)
elif "description of work" in td_elements[0].text.lower():
self.set_all_tickets(data, "summary", self.clean_string(td_elements[1].text))
self.set_all_tickets(data, "status", Status.COMPLETED)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe CONFIRMED? COMPLETED means that the maintenance is over...

self.set_all_tickets(data, "account", "Not Available")
return data
25 changes: 18 additions & 7 deletions circuit_maintenance_parser/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,27 @@ class CombinedProcessor(GenericProcessor):

def process_hook(self, maintenances_extracted_data, maintenances_data):
"""All the parsers contribute with a subset of data that is extended."""
# We only expect one data object from these parsers
if len(maintenances_extracted_data) == 1:
self.combined_maintenance_data.update(maintenances_extracted_data[0])
else:
raise ProcessorError(f"Unexpected data retrieved from parser: {maintenances_extracted_data}")
maintenances_data.extend(maintenances_extracted_data)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it would be useful to document the new logic in the docstring.
To make clear that when the parser return one object we just accumulate it in the combined_maintenance_data but when it's multiple, we understand that there are multiple maintenances so we save them in the maintenances_data to be all extended at the end.
Maybe we could add a safety check if we try this maintenances_data.extend twice... this should only happen one time in this processor


def post_process_hook(self, maintenances_data):
"""After processing all the parsers, we try to combine all the data together."""
"""After processing all the parsers, we try to combine all the data together.

For some notifications there can be multiple maintenances in a single file. To handle this, maintenances are store in a
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe move this explanation to process_hook where this logic actually take place?

list where each of them can be extended with the extra processors.
"""
self.extend_processor_data(self.combined_maintenance_data)
try:
maintenances_data.append(Maintenance(**self.combined_maintenance_data))
except ValidationError as exc:
raise ProcessorError("Not enough information available to create a Maintenance notification.") from exc
if not maintenances_data:
maintenances = [{}]
else:
maintenances = maintenances_data.copy()
maintenances_data.clear()

for maintenance in maintenances:
try:
combined_data = {**self.combined_maintenance_data, **maintenance}
maintenances_data.append(Maintenance(**combined_data))
except ValidationError as exc:
raise ProcessorError("Not enough information available to create a Maintenance notification.") from exc
10 changes: 10 additions & 0 deletions circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
SubjectParserSeaborn1,
SubjectParserSeaborn2,
)
from circuit_maintenance_parser.parsers.sparkle import HtmlParserSparkle1
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
Expand Down Expand Up @@ -197,6 +198,15 @@ class Seaborn(GenericProvider):
_default_organizer = "[email protected]"


class Sparkle(GenericProvider):
"""Sparkle provider custom class."""

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[HtmlParserSparkle1, EmailDateParser]),
]
_default_organizer = "[email protected]"


class Telia(GenericProvider):
"""Telia provider custom class."""

Expand Down
Loading