From b3a2682ebf76acabb33cee676b099e654622c66d Mon Sep 17 00:00:00 2001 From: carbonarok Date: Tue, 31 Aug 2021 12:11:46 +0100 Subject: [PATCH 1/6] Turkcell parser. --- circuit_maintenance_parser/__init__.py | 2 + .../parsers/turkcell.py | 70 ++++++++++ circuit_maintenance_parser/providers.py | 8 ++ tests/unit/data/turkcell/turkcell1.html | 126 ++++++++++++++++++ .../unit/data/turkcell/turkcell1_result.json | 29 ++++ tests/unit/data/turkcell/turkcell2.html | 77 +++++++++++ .../unit/data/turkcell/turkcell2_result.json | 21 +++ tests/unit/test_providers.py | 12 ++ 8 files changed, 345 insertions(+) create mode 100644 circuit_maintenance_parser/parsers/turkcell.py create mode 100644 tests/unit/data/turkcell/turkcell1.html create mode 100644 tests/unit/data/turkcell/turkcell1_result.json create mode 100644 tests/unit/data/turkcell/turkcell2.html create mode 100644 tests/unit/data/turkcell/turkcell2_result.json diff --git a/circuit_maintenance_parser/__init__.py b/circuit_maintenance_parser/__init__.py index fa3f35ad..40dd22df 100644 --- a/circuit_maintenance_parser/__init__.py +++ b/circuit_maintenance_parser/__init__.py @@ -16,6 +16,7 @@ PacketFabric, Telia, Telstra, + Turkcell, Verizon, Zayo, ) @@ -31,6 +32,7 @@ PacketFabric, Telia, Telstra, + Turkcell, Verizon, Zayo, ) diff --git a/circuit_maintenance_parser/parsers/turkcell.py b/circuit_maintenance_parser/parsers/turkcell.py new file mode 100644 index 00000000..7dc3e2bf --- /dev/null +++ b/circuit_maintenance_parser/parsers/turkcell.py @@ -0,0 +1,70 @@ +"""Turkcell parser.""" +import logging +import re +from typing import Dict + +from dateutil import parser +import bs4 # type: ignore +from bs4.element import ResultSet # type: ignore + +from circuit_maintenance_parser.errors import ParsingError +from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status + +# pylint: disable=too-many-nested-blocks, too-many-branches + + +logger = logging.getLogger(__name__) + + +class HtmlParserTurkcell1(Html): + """Notifications Parser for Turkcell notifications.""" + + def parse_html(self, soup, data_base): + """Execute parsing.""" + data = data_base.copy() + try: + self.parse_tables(soup.find_all("table"), data) + return [data] + + except Exception as exc: + raise ParsingError from exc + + def parse_tables(self, tables, data): + """Parse tables.""" + # Main table + td_elements = tables[0].find_all("td") + for idx, td in enumerate(td_elements): + if "Dear Customer" in td.text.strip(): + if "planned" in td.text.strip(): + data["status"] = Status["CONFIRMED"] + else: + data["status"] = Status["CONFIRMED"] + if "Maintenance Number" in td.text.strip(): + data["maintenance_id"] = td_elements[idx + 1].text.strip() + elif "Start" in td.text.strip(): + data["start"] = self.dt2ts(parser.parse(td_elements[idx + 1].text.strip())) + elif "End" in td.text.strip(): + data["end"] = self.dt2ts(parser.parse(td_elements[idx + 1].text.strip())) + elif "Impact of the maintenance" in td.text.strip(): + data["summary"] = td_elements[idx + 1].span.text.strip() + if len(tables) == 1: + data["circuits"] = [] + p_elements = td_elements[idx + 1].find_all("p") + for element in p_elements: + # Example match: + # Eth-Trunk1.1 up up 111111111111111|01-CUSTOMER|LOCATION|LINK + if re.match(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()): + groups = re.search(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()) + details = groups.group(1).split("|") + data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=details[0])) + data["account"] = details[1] + + # Circuit table + # Possibility that there could be a table inside the first table. + if len(tables) == 2: + tr_elements = tables[1].find_all("tr") + data["circuits"] = [] + for tr in tr_elements: + line = tr.text.strip().split("\n\n\n") + data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=line[0])) + data["account"] = line[1] diff --git a/circuit_maintenance_parser/providers.py b/circuit_maintenance_parser/providers.py index e8276e1e..0bb3a727 100644 --- a/circuit_maintenance_parser/providers.py +++ b/circuit_maintenance_parser/providers.py @@ -15,6 +15,7 @@ from circuit_maintenance_parser.parsers.lumen import HtmlParserLumen1 from circuit_maintenance_parser.parsers.megaport import HtmlParserMegaport1 from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1 +from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1 from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1 from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1 @@ -161,6 +162,13 @@ class Telstra(GenericProvider): _default_organizer = "gpen@team.telstra.com" +class Turkcell(GenericProvider): + """Turkcell provider custom class.""" + + _parser_classes: Iterable[Type[Parser]] = [HtmlParserTurkcell1] + _default_organizer = "inoc@superonline.net" + + class Verizon(GenericProvider): """Verizon provider custom class.""" diff --git a/tests/unit/data/turkcell/turkcell1.html b/tests/unit/data/turkcell/turkcell1.html new file mode 100644 index 00000000..028578a1 --- /dev/null +++ b/tests/unit/data/turkcell/turkcell1.html @@ -0,0 +1,126 @@ +
+

 

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Description: Description: Description: cid:image001.jpg@01CE1344.23DEC930

+

+TURKCELL SUPERONLINE PLANNED MAINTENANCE EMERGENCY

+
+

 

+

Dear Customer,

+


+We have a planned maintenance on your transmission way.

+

 

+
+

Maintenance Number: +

+
+

 11111111111111

+
+

Start Tİme: +

+
+

 07.08.2021 00:00 GMT+3

+
+

End Tİme: +

+
+

 07.08.2021 07:00 GMT+3

+
+

Impact of the maintenance:

+
+

There is a fibercut risk during Sancaktepe routes Circuit transfer work will be carried out at Istanbul Sancaktepe service point. It is foreseen that your services will be interrupted + during the work.

+

 

+ + + + + + + + + + + + + + + + + + +
+

circuit1circuit1

+
+

CUSTOMER

+
+

Down

+
+

circuit2circuit2

+
+

CUSTOMER

+
+

Down

+
+

circuit3-circuit3

+
+

CUSTOMER

+
+

Down

+
+

+
+

 

+

Thank you for your cooperation.

+

Best regards,

+
+
+
+
+

+inoc@superonline.net – 0 850 222 46 62

+
+
+

 

+

 

+

 

+

 

+

 

+

 

+

 

+

 

+

 

+

 

+
\ No newline at end of file diff --git a/tests/unit/data/turkcell/turkcell1_result.json b/tests/unit/data/turkcell/turkcell1_result.json new file mode 100644 index 00000000..c4905869 --- /dev/null +++ b/tests/unit/data/turkcell/turkcell1_result.json @@ -0,0 +1,29 @@ +[ + { + "account": "CUSTOMER", + "circuits": [ + { + "circuit_id": "circuit1circuit1", + "impact": "OUTAGE" + }, + { + "circuit_id": "circuit2circuit2", + "impact": "OUTAGE" + }, + { + "circuit_id": "circuit3-circuit3", + "impact": "OUTAGE" + } + ], + "end": 1625738400, + "maintenance_id": "11111111111111", + "organizer": "inoc@superonline.net", + "provider": "turkcell", + "sequence": 1, + "stamp": null, + "start": 1625713200, + "status": "CONFIRMED", + "summary": "There is a fibercut risk during Sancaktepe routes Circuit transfer work will be carried out at Istanbul Sancaktepe service point. It is foreseen that your services will be interrupted\n during the work.", + "uid": "0" + } +] diff --git a/tests/unit/data/turkcell/turkcell2.html b/tests/unit/data/turkcell/turkcell2.html new file mode 100644 index 00000000..74637f83 --- /dev/null +++ b/tests/unit/data/turkcell/turkcell2.html @@ -0,0 +1,77 @@ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Description: Description: Description: cid:image001.jpg@01CE1344.23DEC930

+

TURKCELL SUPERONLINE PLANNED MAINTENANCE +

+
+

 

+

Dear Customer,
+We have a planned maintenance on your transmission way.

+
+

Maintenance Number: +

+
+

 10009081013

+
+

Start Time: +

+
+

18.08.2021 00:30 GMT+3

+
+

End Time: +

+
+

 18.08.2021 06:00 GMT+3

+
+

Impact of the maintenance:

+
+

SW upgrade work will be done on the OLTs mentioned below; +

+

 

+

Eth-Trunk1.1               up      up       111111111111111|01-CUSTOMER|LOCATION|LINK

+

Eth-Trunk1.2               up      up       111111111111112|01-CUSTOMER|LOCATION|LINK

+

Eth-Trunk1.3628               up      up       1111111111111113|01-CUSTOMER|LOCATION|LINK

+
+

Thank you for your cooperation.

+

Best regards, +

+
+
+
+
+

+inoc@superonline.net + – 0 850 222 46 62

+
+
\ No newline at end of file diff --git a/tests/unit/data/turkcell/turkcell2_result.json b/tests/unit/data/turkcell/turkcell2_result.json new file mode 100644 index 00000000..43147666 --- /dev/null +++ b/tests/unit/data/turkcell/turkcell2_result.json @@ -0,0 +1,21 @@ +[ + { + "account": "01-CUSTOMER", + "circuits": [ + { + "circuit_id": "111111111111111", + "impact": "OUTAGE" + } + ], + "end": 1629277200, + "maintenance_id": "10009081013", + "organizer": "inoc@superonline.net", + "provider": "turkcell", + "sequence": 1, + "stamp": null, + "start": 1629257400, + "status": "CONFIRMED", + "summary": "SW upgrade work will be done on the OLTs mentioned below;", + "uid": "0" + } +] diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py index 1d227b1b..d7526ced 100644 --- a/tests/unit/test_providers.py +++ b/tests/unit/test_providers.py @@ -18,6 +18,7 @@ PacketFabric, Telia, Telstra, + Turkcell, Verizon, Zayo, ) @@ -80,6 +81,17 @@ Path(dir_path, "data", "telstra", "telstra2_result.json"), ), (Telstra, GENERIC_ICAL_DATA_PATH, GENERIC_ICAL_RESULT_PATH,), + # Turkcell + ( + Turkcell, + Path(dir_path, "data", "turkcell", "turkcell1.html"), + Path(dir_path, "data", "turkcell", "turkcell1_result.json"), + ), + ( + Turkcell, + Path(dir_path, "data", "turkcell", "turkcell2.html"), + Path(dir_path, "data", "turkcell", "turkcell2_result.json"), + ), # Verizon ( Verizon, From cb75f52a08fa8a9aca6816a2746f26b50ecc80a4 Mon Sep 17 00:00:00 2001 From: carbonarok Date: Tue, 31 Aug 2021 12:13:43 +0100 Subject: [PATCH 2/6] Modified readme. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5ece5f1b..9b268fed 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Each provider could use the standard ICal format commented above or define its c - Lumen - Megaport - Telstra +- Turkcell - Verizon - Zayo From 71559332e6edd39bf7ef250cdc4ad984e37d56af Mon Sep 17 00:00:00 2001 From: carbonarok Date: Tue, 31 Aug 2021 12:18:33 +0100 Subject: [PATCH 3/6] Removed unused imports. Added type. --- .../parsers/turkcell.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/circuit_maintenance_parser/parsers/turkcell.py b/circuit_maintenance_parser/parsers/turkcell.py index 7dc3e2bf..efda2baf 100644 --- a/circuit_maintenance_parser/parsers/turkcell.py +++ b/circuit_maintenance_parser/parsers/turkcell.py @@ -3,9 +3,8 @@ import re from typing import Dict -from dateutil import parser -import bs4 # type: ignore from bs4.element import ResultSet # type: ignore +from dateutil import parser from circuit_maintenance_parser.errors import ParsingError from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status @@ -29,23 +28,23 @@ def parse_html(self, soup, data_base): except Exception as exc: raise ParsingError from exc - def parse_tables(self, tables, data): + def parse_tables(self, tables: ResultSet, data: Dict): """Parse tables.""" # Main table td_elements = tables[0].find_all("td") - for idx, td in enumerate(td_elements): - if "Dear Customer" in td.text.strip(): - if "planned" in td.text.strip(): + for idx, td_element in enumerate(td_elements): + if "Dear Customer" in td_element.text.strip(): + if "planned" in td_element.text.strip(): data["status"] = Status["CONFIRMED"] else: data["status"] = Status["CONFIRMED"] - if "Maintenance Number" in td.text.strip(): + if "Maintenance Number" in td_element.text.strip(): data["maintenance_id"] = td_elements[idx + 1].text.strip() - elif "Start" in td.text.strip(): + elif "Start" in td_element.text.strip(): data["start"] = self.dt2ts(parser.parse(td_elements[idx + 1].text.strip())) - elif "End" in td.text.strip(): + elif "End" in td_element.text.strip(): data["end"] = self.dt2ts(parser.parse(td_elements[idx + 1].text.strip())) - elif "Impact of the maintenance" in td.text.strip(): + elif "Impact of the maintenance" in td_element.text.strip(): data["summary"] = td_elements[idx + 1].span.text.strip() if len(tables) == 1: data["circuits"] = [] From 77b5c24893d7bc4b1cdf72351b7ca004d50279f9 Mon Sep 17 00:00:00 2001 From: carbonarok Date: Tue, 31 Aug 2021 12:56:02 +0100 Subject: [PATCH 4/6] Travis retry. --- circuit_maintenance_parser/parsers/turkcell.py | 1 - 1 file changed, 1 deletion(-) diff --git a/circuit_maintenance_parser/parsers/turkcell.py b/circuit_maintenance_parser/parsers/turkcell.py index efda2baf..ca52eace 100644 --- a/circuit_maintenance_parser/parsers/turkcell.py +++ b/circuit_maintenance_parser/parsers/turkcell.py @@ -11,7 +11,6 @@ # pylint: disable=too-many-nested-blocks, too-many-branches - logger = logging.getLogger(__name__) From 042f8ad10f5d8ad9af5149e2be792e57d62a3418 Mon Sep 17 00:00:00 2001 From: carbonarok Date: Tue, 31 Aug 2021 13:03:31 +0100 Subject: [PATCH 5/6] Fix mypy. --- circuit_maintenance_parser/parsers/turkcell.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/circuit_maintenance_parser/parsers/turkcell.py b/circuit_maintenance_parser/parsers/turkcell.py index ca52eace..aec3aabf 100644 --- a/circuit_maintenance_parser/parsers/turkcell.py +++ b/circuit_maintenance_parser/parsers/turkcell.py @@ -53,16 +53,17 @@ def parse_tables(self, tables: ResultSet, data: Dict): # Eth-Trunk1.1 up up 111111111111111|01-CUSTOMER|LOCATION|LINK if re.match(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()): groups = re.search(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()) - details = groups.group(1).split("|") - data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=details[0])) - data["account"] = details[1] + if groups: + details = groups.group(1).split("|") + data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=details[0])) + data["account"] = details[1] # Circuit table # Possibility that there could be a table inside the first table. if len(tables) == 2: tr_elements = tables[1].find_all("tr") data["circuits"] = [] - for tr in tr_elements: - line = tr.text.strip().split("\n\n\n") + for tr_element in tr_elements: + line = tr_element.text.strip().split("\n\n\n") data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=line[0])) data["account"] = line[1] From 6ad133a77b6020397d7eb6d89984f8f4697ebace Mon Sep 17 00:00:00 2001 From: carbonarok Date: Wed, 1 Sep 2021 14:33:34 +0100 Subject: [PATCH 6/6] Moved second table parsing to elif. Added doc string. --- .../parsers/turkcell.py | 94 +++++++++++++++---- 1 file changed, 78 insertions(+), 16 deletions(-) diff --git a/circuit_maintenance_parser/parsers/turkcell.py b/circuit_maintenance_parser/parsers/turkcell.py index aec3aabf..c9f867c6 100644 --- a/circuit_maintenance_parser/parsers/turkcell.py +++ b/circuit_maintenance_parser/parsers/turkcell.py @@ -28,7 +28,70 @@ def parse_html(self, soup, data_base): raise ParsingError from exc def parse_tables(self, tables: ResultSet, data: Dict): - """Parse tables.""" + """Parse tables. + + Format 1 supported here (all information in a single table): + + + + + + + + + + + + + + + + + + + + + +
Dear Customer ... planned ......
Maintenance Number123456789
Start2021-08-31 01:02:03
End2021-08-31 01:02:04
Impact of the maintenance +

CIRCUIT1|01-CUSTOMER|LOCATION|LINK

+

CIRCUIT2|01-CUSTOMER|LOCATION|LINK

+
+ + Format #2 supported here (one table for general information, a separate table listing impacted circuits): + + + + + + + + + + + + + + + + + + + +
Dear Customer ... planned ......
Maintenance Number123456789
Start2021-08-31 01:02:03
End2021-08-31 01:02:04
Impact of the maintenance
+ + + + + + + + + + +
Circuit 1Custom NameDown
Circuit 2Custom NameDown
+ + + """ # Main table td_elements = tables[0].find_all("td") for idx, td_element in enumerate(td_elements): @@ -51,19 +114,18 @@ def parse_tables(self, tables: ResultSet, data: Dict): for element in p_elements: # Example match: # Eth-Trunk1.1 up up 111111111111111|01-CUSTOMER|LOCATION|LINK - if re.match(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()): - groups = re.search(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()) - if groups: - details = groups.group(1).split("|") - data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=details[0])) - data["account"] = details[1] + groups = re.search(r".+[ \t]([0-1]+\|.+\|.+\|.+)", element.text.strip()) + if groups: + details = groups.group(1).split("|") + data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=details[0])) + data["account"] = details[1] - # Circuit table - # Possibility that there could be a table inside the first table. - if len(tables) == 2: - tr_elements = tables[1].find_all("tr") - data["circuits"] = [] - for tr_element in tr_elements: - line = tr_element.text.strip().split("\n\n\n") - data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=line[0])) - data["account"] = line[1] + # Circuit table + # Possibility that there could be a table inside the first table. + elif len(tables) == 2: + tr_elements = tables[1].find_all("tr") + data["circuits"] = [] + for tr_element in tr_elements: + line = tr_element.text.strip().split("\n\n\n") + data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=line[0])) + data["account"] = line[1]