From 0c9d5872f47d6e8064735e5954391aef0e7644dd Mon Sep 17 00:00:00 2001 From: mediaminister <45148099+mediaminister@users.noreply.github.com> Date: Fri, 18 Sep 2020 20:20:46 +0000 Subject: [PATCH] Get categories from online json (#814) --- .github/workflows/status.yml | 5 ++- resources/lib/apihelper.py | 55 +++++++++++++++++++++++++-------- resources/lib/data.py | 10 +++--- resources/lib/webscraper.py | 59 ++---------------------------------- tests/test_apihelper.py | 15 ++++++++- tests/test_vrtplayer.py | 3 +- tests/test_webscraper.py | 12 +------- 7 files changed, 67 insertions(+), 92 deletions(-) diff --git a/.github/workflows/status.yml b/.github/workflows/status.yml index 247f5571..1876ebb7 100644 --- a/.github/workflows/status.yml +++ b/.github/workflows/status.yml @@ -43,9 +43,8 @@ jobs: - name: TEST TV guide run: python -m unittest -v test_tvguide.TestTVGuide.test_livetv_description if: always() - # FIXME: Add a better test for the webscraper that prints the categories as well - - name: TEST Categories webscraper - run: python -m unittest -v test_webscraper.TestWebScraper.test_get_categories + - name: TEST Categories + run: python -m unittest -v test_apihelper.TestApiHelper.test_get_categories if: always() - name: TEST Video attributes webscraper run: python -m unittest -v test_webscraper.TestWebScraper.test_get_video_attributes diff --git a/resources/lib/apihelper.py b/resources/lib/apihelper.py index 01761131..d9fb1ef0 100644 --- a/resources/lib/apihelper.py +++ b/resources/lib/apihelper.py @@ -14,9 +14,9 @@ from helperobjects import TitleItem from kodiutils import (delete_cached_thumbnail, get_cache, get_cached_url_json, get_global_setting, get_setting_bool, get_setting_int, get_url_json, has_addon, localize, - localize_from_data, log, ttl, url_for) + localize_from_data, log, ttl, update_cache, url_for) from metadata import Metadata -from utils import (html_to_kodi, find_entry, from_unicode, play_url_to_id, +from utils import (add_https_proto, html_to_kodi, find_entry, from_unicode, play_url_to_id, program_to_url, realpage, url_to_program, youtube_to_plugin_url) @@ -776,21 +776,50 @@ def localize_features(featured): return sorted(features, key=lambda x: x.get('name')) - def list_categories(self): - """Construct a list of category ListItems""" - from webscraper import get_categories, valid_categories - categories = get_categories() + @staticmethod + def valid_categories(categories): + """Check if categories contain all necessary keys and values""" + return bool(categories) and all(item.get('id') and item.get('name') for item in categories) + + @staticmethod + def get_online_categories(): + """Return a list of categories from the VRT NU website""" + categories = [] + categories_json = get_url_json('https://www.vrt.be/vrtnu/categorieen/jcr:content/par/categories.model.json') + if categories_json is not None: + categories = [] + for category in categories_json.get('items'): + categories.append(dict( + id=category.get('name'), + thumbnail=add_https_proto(category.get('image').get('src')), + name=category.get('title'), + )) + return categories - # Use the cache anyway (better than hard-coded) - if not valid_categories(categories): - categories = get_cache('categories.json', ttl=None) + def get_categories(self): + """Return a list of categories""" + cache_file = 'categories.json' + + # Try the cache if it is fresh + categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60) + if self.valid_categories(categories): + return categories + + # Try online categories json + categories = self.get_online_categories() + if self.valid_categories(categories): + from json import dumps + update_cache(cache_file, dumps(categories)) + return categories # Fall back to internal hard-coded categories - if not valid_categories(categories): - from data import CATEGORIES - log(2, 'Fall back to internal hard-coded categories') - categories = CATEGORIES + from data import CATEGORIES + log(2, 'Fall back to internal hard-coded categories') + return CATEGORIES + def list_categories(self): + """Construct a list of category ListItems""" + categories = self.get_categories() category_items = [] from data import CATEGORIES for category in self.localize_categories(categories, CATEGORIES): diff --git a/resources/lib/data.py b/resources/lib/data.py index 1214ef83..68738f4b 100644 --- a/resources/lib/data.py +++ b/resources/lib/data.py @@ -12,22 +12,22 @@ CATEGORIES = [ dict(name='Audiodescriptie', id='met-audiodescriptie', msgctxt=30070), dict(name='Cultuur', id='cultuur', msgctxt=30071), - dict(name='Docu', id='docu', msgctxt=30072), + dict(name='Documentaire', id='docu', msgctxt=30072), dict(name='Entertainment', id='entertainment', msgctxt=30073), - dict(name='Films', id='films', msgctxt=30074), + dict(name='Film', id='films', msgctxt=30074), dict(name='Human interest', id='human-interest', msgctxt=30075), dict(name='Humor', id='humor', msgctxt=30076), - dict(name='Kinderen en jongeren', id='voor-kinderen', msgctxt=30077), + dict(name='Kinderen & jongeren', id='voor-kinderen', msgctxt=30077), dict(name='Koken', id='koken', msgctxt=30078), dict(name='Levensbeschouwing', id='levensbeschouwing', msgctxt=30087), dict(name='Lifestyle', id='lifestyle', msgctxt=30079), dict(name='Muziek', id='muziek', msgctxt=30080), dict(name='Nieuws en actua', id='nieuws-en-actua', msgctxt=30081), - dict(name='Series', id='series', msgctxt=30082), + dict(name='Serie', id='series', msgctxt=30082), dict(name='Sport', id='sport', msgctxt=30083), dict(name='Talkshows', id='talkshows', msgctxt=30084), dict(name='Vlaamse Gebarentaal', id='met-gebarentaal', msgctxt=30085), - dict(name='Wetenschap en natuur', id='wetenschap-en-natuur', msgctxt=30086), + dict(name='Wetenschap & natuur', id='wetenschap-en-natuur', msgctxt=30086), ] # TODO: Find a solution for the below VRT YouTube channels diff --git a/resources/lib/webscraper.py b/resources/lib/webscraper.py index bddfa4c3..c41c2fb4 100644 --- a/resources/lib/webscraper.py +++ b/resources/lib/webscraper.py @@ -9,63 +9,8 @@ except ImportError: # Python 2 from urllib2 import HTTPError -from kodiutils import get_cache, get_setting_bool, log_error, open_url, ttl, update_cache -from utils import assetpath_to_id, add_https_proto, strip_newlines - - -def valid_categories(categories): - """Check if categories contain all necessary keys and values""" - return bool(categories) and all(item.get('id') and item.get('name') for item in categories) - - -def get_categories(): - """Return a list of categories by scraping the VRT NU website""" - - cache_file = 'categories.json' - categories = [] - - # Try the cache if it is fresh - categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60) - - # Try to scrape from the web - if not valid_categories(categories): - from bs4 import BeautifulSoup, SoupStrainer - response = open_url('https://www.vrt.be/vrtnu/categorieen/') - if response is None: - return categories - tiles = SoupStrainer('nui-list--content') - soup = BeautifulSoup(response.read(), 'html.parser', parse_only=tiles) - - categories = [] - for tile in soup.find_all('nui-tile'): - categories.append(dict( - id=tile.get('href').split('/')[-2], - thumbnail=get_category_thumbnail(tile), - name=get_category_title(tile), - )) - if categories: - from json import dumps - update_cache('categories.json', dumps(categories)) - - return categories - - -def get_category_thumbnail(element): - """Return a category thumbnail, if available""" - if get_setting_bool('showfanart', default=True): - raw_thumbnail = element.find(class_='media').get('data-responsive-image', 'DefaultGenre.png') - return add_https_proto(raw_thumbnail) - return 'DefaultGenre.png' - - -def get_category_title(element): - """Return a category title, if available""" - found_element = element.find('h3') - if found_element: - return strip_newlines(found_element.a.contents[0]) - # FIXME: We should probably fall back to something sensible here, or raise an exception instead - return '' - +from kodiutils import get_cache, log_error, open_url, ttl, update_cache +from utils import assetpath_to_id def get_video_attributes(vrtnu_url): """Return a dictionary with video attributes by scraping the VRT NU website""" diff --git a/tests/test_apihelper.py b/tests/test_apihelper.py index 022dcfba..396fdd89 100644 --- a/tests/test_apihelper.py +++ b/tests/test_apihelper.py @@ -7,7 +7,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import unittest from apihelper import ApiHelper -from data import CHANNELS +from data import CATEGORIES, CHANNELS from favorites import Favorites from resumepoints import ResumePoints from xbmcextra import kodi_to_ansi @@ -146,6 +146,19 @@ def test_upnext(self): next_episode = self._apihelper.get_upnext(info=current_episode) print(next_episode) + def test_get_categories(self): + """Test to ensure our local hardcoded categories conforms to online categories""" + # Remove thumbnails from scraped categories first + online_categories = [dict(id=c['id'], name=c['name']) for c in self._apihelper.get_online_categories()] + local_categories = [dict(id=c['id'], name=c['name']) for c in CATEGORIES] + print('Categories:') + for category in online_categories: + print('%s | %s' % (kodi_to_ansi(category.get('name')), kodi_to_ansi(category.get('id')))) + + self.assertTrue(self._apihelper.valid_categories(online_categories)) + self.assertTrue(self._apihelper.valid_categories(local_categories)) + self.assertEqual(online_categories, local_categories) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_vrtplayer.py b/tests/test_vrtplayer.py index a411b409..75a6d5e0 100644 --- a/tests/test_vrtplayer.py +++ b/tests/test_vrtplayer.py @@ -73,8 +73,7 @@ def test_show_videos_specific_seasons_shows_videos(self): def test_random_tvshow_episodes(self): """Test episode from a random tvshow in a random category""" - from webscraper import get_categories - categories = get_categories() + categories = self._apihelper.get_categories() self.assertTrue(categories) category = random.choice(categories) diff --git a/tests/test_webscraper.py b/tests/test_webscraper.py index b0d8f307..e85e502a 100644 --- a/tests/test_webscraper.py +++ b/tests/test_webscraper.py @@ -6,22 +6,12 @@ from __future__ import absolute_import, division, print_function, unicode_literals import unittest -from data import CATEGORIES -from webscraper import get_categories, get_video_attributes, valid_categories +from webscraper import get_video_attributes class TestWebScraper(unittest.TestCase): """TestCase class""" - def test_get_categories(self): - """Test to ensure our hardcoded categories conforms to scraped categories""" - # Remove thumbnails from scraped categories first - categories_scraped = [dict(id=c['id'], name=c['name']) for c in get_categories()] - categories_stored = [dict(id=c['id'], name=c['name']) for c in CATEGORIES] - self.assertTrue(valid_categories(categories_scraped)) - self.assertTrue(valid_categories(categories_stored)) - self.assertEqual(categories_scraped, categories_stored) - def test_get_video_attributes(self): """Test getting video attributes""" vrtnu_urls = [