-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sparkle Provider #73
Sparkle Provider #73
Changes from 9 commits
4f60c99
1112154
0e29aa8
85305d5
aabf5f8
ddd6a28
6eec81c
733eba7
f3b77dc
d8ec40d
c451332
6e1f4b9
03dfa06
e929959
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
"""Sparkle parser.""" | ||
import logging | ||
from dateutil import parser | ||
|
||
from circuit_maintenance_parser.errors import ParserError | ||
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class HtmlParserSparkle1(Html): | ||
"""Notifications HTML Parser 1 for Sparkle notifications. | ||
|
||
Example: | ||
<table> | ||
<tbody> | ||
<tr> | ||
<td><p></p>Maintenance ID</td> | ||
<td><p></p>1111 / 2222</td> | ||
</tr> | ||
<tr> | ||
<td><p></p>Start Date/Time (UTC) Day 1</td> | ||
<td><p></p>08/10/2021 03:00 UTC</td> | ||
</tr> | ||
<tr> | ||
<td><p></p>End Date/Time (UTC) Day 1</td> | ||
<td><p></p>08/10/2021 11:00 UTC</td> | ||
</tr> | ||
<tr> | ||
<td><p></p>Start Date/Time (UTC) Day 2</td> | ||
<td><p></p>08/11/2021 03:00 UTC</td> | ||
</tr> | ||
<tr> | ||
<td><p></p>End Date/Time (UTC) Day 2</td> | ||
<td><p></p>08/11/2021 11:00 UTC</td> | ||
</tr> | ||
... | ||
</tbody> | ||
</table> | ||
""" | ||
|
||
def parse_html(self, soup): | ||
"""Execute parsing.""" | ||
data = {} | ||
try: | ||
return self.parse_tables(soup.find_all("table"), data) | ||
except Exception as exc: | ||
raise ParserError from exc | ||
|
||
def clean_string(self, string): | ||
"""Remove hex characters and new lines.""" | ||
return self.remove_hex_characters(string.replace("\n", "")).strip() | ||
|
||
@staticmethod | ||
def set_all_tickets(tickets, attribute, value): | ||
"""Set the same value for all notifications.""" | ||
for ticket in tickets: | ||
ticket[attribute] = value | ||
|
||
def parse_tables(self, tables, data_base): | ||
"""Parse HTML tables.""" | ||
glennmatthews marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data = [] | ||
for table in tables: | ||
tr_elements = table.find_all("tr") | ||
for idx, tr_element in enumerate(tr_elements): | ||
td_elements = tr_element.find_all("td") | ||
if "sparkle ticket number" in td_elements[0].text.lower(): | ||
tickets = self.clean_string(td_elements[1].text).split("/ ") | ||
for ticket_id in tickets: | ||
ticket = data_base.copy() | ||
ticket["maintenance_id"] = ticket_id | ||
if "start date/time" in tr_elements[idx + 1].text.lower(): | ||
start = self.clean_string(tr_elements[idx + 1].find_all("td")[1].text) | ||
ticket["start"] = self.dt2ts(parser.parse(start)) | ||
else: | ||
raise ParserError("Unable to find start time for ticket " + ticket_id) | ||
if "end date/time" in tr_elements[idx + 2].text.lower(): | ||
end = self.clean_string(tr_elements[idx + 2].find_all("td")[1].text) | ||
ticket["end"] = self.dt2ts(parser.parse(end)) | ||
else: | ||
raise ParserError("Unable to find end time for ticket " + ticket_id) | ||
idx += 2 | ||
data.append(ticket) | ||
elif "circuits involved" in td_elements[0].text.lower(): | ||
self.set_all_tickets( | ||
data, | ||
"circuits", | ||
[CircuitImpact(impact=Impact.OUTAGE, circuit_id=self.clean_line(td_elements[1].text))], | ||
) | ||
elif "description of work" in td_elements[0].text.lower(): | ||
self.set_all_tickets(data, "summary", self.clean_string(td_elements[1].text)) | ||
self.set_all_tickets(data, "status", Status.COMPLETED) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe |
||
self.set_all_tickets(data, "account", "Not Available") | ||
return data |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,16 +117,27 @@ class CombinedProcessor(GenericProcessor): | |
|
||
def process_hook(self, maintenances_extracted_data, maintenances_data): | ||
"""All the parsers contribute with a subset of data that is extended.""" | ||
# We only expect one data object from these parsers | ||
if len(maintenances_extracted_data) == 1: | ||
self.combined_maintenance_data.update(maintenances_extracted_data[0]) | ||
else: | ||
raise ProcessorError(f"Unexpected data retrieved from parser: {maintenances_extracted_data}") | ||
maintenances_data.extend(maintenances_extracted_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it would be useful to document the new logic in the docstring. |
||
|
||
def post_process_hook(self, maintenances_data): | ||
"""After processing all the parsers, we try to combine all the data together.""" | ||
"""After processing all the parsers, we try to combine all the data together. | ||
|
||
For some notifications there can be multiple maintenances in a single file. To handle this, maintenances are store in a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe move this explanation to |
||
list where each of them can be extended with the extra processors. | ||
""" | ||
self.extend_processor_data(self.combined_maintenance_data) | ||
try: | ||
maintenances_data.append(Maintenance(**self.combined_maintenance_data)) | ||
except ValidationError as exc: | ||
raise ProcessorError("Not enough information available to create a Maintenance notification.") from exc | ||
if not maintenances_data: | ||
maintenances = [{}] | ||
else: | ||
maintenances = maintenances_data.copy() | ||
maintenances_data.clear() | ||
|
||
for maintenance in maintenances: | ||
try: | ||
combined_data = {**self.combined_maintenance_data, **maintenance} | ||
maintenances_data.append(Maintenance(**combined_data)) | ||
except ValidationError as exc: | ||
raise ProcessorError("Not enough information available to create a Maintenance notification.") from exc |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
SubjectParserSeaborn1, | ||
SubjectParserSeaborn2, | ||
) | ||
from circuit_maintenance_parser.parsers.sparkle import HtmlParserSparkle1 | ||
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1 | ||
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1 | ||
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1 | ||
|
@@ -197,6 +198,15 @@ class Seaborn(GenericProvider): | |
_default_organizer = "[email protected]" | ||
|
||
|
||
class Sparkle(GenericProvider): | ||
"""Sparkle provider custom class.""" | ||
|
||
_processors: List[GenericProcessor] = [ | ||
CombinedProcessor(data_parsers=[HtmlParserSparkle1, EmailDateParser]), | ||
] | ||
_default_organizer = "[email protected]" | ||
|
||
|
||
class Telia(GenericProvider): | ||
"""Telia provider custom class.""" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For my own education, can you point out in the test files some examples of hex characters that are needing to be removed? I'm not sure I understand what is meant by that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inside the sparkle test you can see strange charecters being used. Maybe they arent HEX, i can change the name if needed.
1111111=C2=A0/ 22222=C2=A0/ 33333
is in the sparkle test where its really meant to be1111111 / 22222 / 33333
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm. I thought those were supposed to be removed by our use of
quopri
in the base HTML parser class. Are you seeing cases where these characters are still present in the processed HTML being passed through to the Sparkle parser?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I think I saw this in another parser but unsure which as it was a while ago.