Get categories from online json (#814)

add-ons · Sep 18, 2020 · 0c9d587 · 0c9d587
1 parent 42eff53
commit 0c9d587
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 92 deletions.
diff --git a/.github/workflows/status.yml b/.github/workflows/status.yml
@@ -43,9 +43,8 @@ jobs:
     - name: TEST TV guide
       run: python -m unittest -v test_tvguide.TestTVGuide.test_livetv_description
       if: always()
-    # FIXME: Add a better test for the webscraper that prints the categories as well
-    - name: TEST Categories webscraper
-      run: python -m unittest -v test_webscraper.TestWebScraper.test_get_categories
+    - name: TEST Categories
+      run: python -m unittest -v test_apihelper.TestApiHelper.test_get_categories
       if: always()
     - name: TEST Video attributes webscraper
       run: python -m unittest -v test_webscraper.TestWebScraper.test_get_video_attributes

diff --git a/resources/lib/apihelper.py b/resources/lib/apihelper.py
@@ -14,9 +14,9 @@
 from helperobjects import TitleItem
 from kodiutils import (delete_cached_thumbnail, get_cache, get_cached_url_json, get_global_setting,
                        get_setting_bool, get_setting_int, get_url_json, has_addon, localize,
-                       localize_from_data, log, ttl, url_for)
+                       localize_from_data, log, ttl, update_cache, url_for)
 from metadata import Metadata
-from utils import (html_to_kodi, find_entry, from_unicode, play_url_to_id,
+from utils import (add_https_proto, html_to_kodi, find_entry, from_unicode, play_url_to_id,
                    program_to_url, realpage, url_to_program, youtube_to_plugin_url)
 
 
@@ -776,21 +776,50 @@ def localize_features(featured):
 
         return sorted(features, key=lambda x: x.get('name'))
 
-    def list_categories(self):
-        """Construct a list of category ListItems"""
-        from webscraper import get_categories, valid_categories
-        categories = get_categories()
+    @staticmethod
+    def valid_categories(categories):
+        """Check if categories contain all necessary keys and values"""
+        return bool(categories) and all(item.get('id') and item.get('name') for item in categories)
+
+    @staticmethod
+    def get_online_categories():
+        """Return a list of categories from the VRT NU website"""
+        categories = []
+        categories_json = get_url_json('https://www.vrt.be/vrtnu/categorieen/jcr:content/par/categories.model.json')
+        if categories_json is not None:
+            categories = []
+            for category in categories_json.get('items'):
+                categories.append(dict(
+                    id=category.get('name'),
+                    thumbnail=add_https_proto(category.get('image').get('src')),
+                    name=category.get('title'),
+                ))
+        return categories
 
-        # Use the cache anyway (better than hard-coded)
-        if not valid_categories(categories):
-            categories = get_cache('categories.json', ttl=None)
+    def get_categories(self):
+        """Return a list of categories"""
+        cache_file = 'categories.json'
+
+        # Try the cache if it is fresh
+        categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60)
+        if self.valid_categories(categories):
+            return categories
+
+        # Try online categories json
+        categories = self.get_online_categories()
+        if self.valid_categories(categories):
+            from json import dumps
+            update_cache(cache_file, dumps(categories))
+            return categories
 
         # Fall back to internal hard-coded categories
-        if not valid_categories(categories):
-            from data import CATEGORIES
-            log(2, 'Fall back to internal hard-coded categories')
-            categories = CATEGORIES
+        from data import CATEGORIES
+        log(2, 'Fall back to internal hard-coded categories')
+        return CATEGORIES
 
+    def list_categories(self):
+        """Construct a list of category ListItems"""
+        categories = self.get_categories()
         category_items = []
         from data import CATEGORIES
         for category in self.localize_categories(categories, CATEGORIES):

diff --git a/resources/lib/data.py b/resources/lib/data.py
@@ -12,22 +12,22 @@
 CATEGORIES = [
     dict(name='Audiodescriptie', id='met-audiodescriptie', msgctxt=30070),
     dict(name='Cultuur', id='cultuur', msgctxt=30071),
-    dict(name='Docu', id='docu', msgctxt=30072),
+    dict(name='Documentaire', id='docu', msgctxt=30072),
     dict(name='Entertainment', id='entertainment', msgctxt=30073),
-    dict(name='Films', id='films', msgctxt=30074),
+    dict(name='Film', id='films', msgctxt=30074),
     dict(name='Human interest', id='human-interest', msgctxt=30075),
     dict(name='Humor', id='humor', msgctxt=30076),
-    dict(name='Kinderen en jongeren', id='voor-kinderen', msgctxt=30077),
+    dict(name='Kinderen & jongeren', id='voor-kinderen', msgctxt=30077),
     dict(name='Koken', id='koken', msgctxt=30078),
     dict(name='Levensbeschouwing', id='levensbeschouwing', msgctxt=30087),
     dict(name='Lifestyle', id='lifestyle', msgctxt=30079),
     dict(name='Muziek', id='muziek', msgctxt=30080),
     dict(name='Nieuws en actua', id='nieuws-en-actua', msgctxt=30081),
-    dict(name='Series', id='series', msgctxt=30082),
+    dict(name='Serie', id='series', msgctxt=30082),
     dict(name='Sport', id='sport', msgctxt=30083),
     dict(name='Talkshows', id='talkshows', msgctxt=30084),
     dict(name='Vlaamse Gebarentaal', id='met-gebarentaal', msgctxt=30085),
-    dict(name='Wetenschap en natuur', id='wetenschap-en-natuur', msgctxt=30086),
+    dict(name='Wetenschap & natuur', id='wetenschap-en-natuur', msgctxt=30086),
 ]
 
 # TODO: Find a solution for the below VRT YouTube channels

diff --git a/resources/lib/webscraper.py b/resources/lib/webscraper.py
@@ -9,63 +9,8 @@
 except ImportError:  # Python 2
     from urllib2 import HTTPError
 
-from kodiutils import get_cache, get_setting_bool, log_error, open_url, ttl, update_cache
-from utils import assetpath_to_id, add_https_proto, strip_newlines
-
-
-def valid_categories(categories):
-    """Check if categories contain all necessary keys and values"""
-    return bool(categories) and all(item.get('id') and item.get('name') for item in categories)
-
-
-def get_categories():
-    """Return a list of categories by scraping the VRT NU website"""
-
-    cache_file = 'categories.json'
-    categories = []
-
-    # Try the cache if it is fresh
-    categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60)
-
-    # Try to scrape from the web
-    if not valid_categories(categories):
-        from bs4 import BeautifulSoup, SoupStrainer
-        response = open_url('https://www.vrt.be/vrtnu/categorieen/')
-        if response is None:
-            return categories
-        tiles = SoupStrainer('nui-list--content')
-        soup = BeautifulSoup(response.read(), 'html.parser', parse_only=tiles)
-
-        categories = []
-        for tile in soup.find_all('nui-tile'):
-            categories.append(dict(
-                id=tile.get('href').split('/')[-2],
-                thumbnail=get_category_thumbnail(tile),
-                name=get_category_title(tile),
-            ))
-        if categories:
-            from json import dumps
-            update_cache('categories.json', dumps(categories))
-
-    return categories
-
-
-def get_category_thumbnail(element):
-    """Return a category thumbnail, if available"""
-    if get_setting_bool('showfanart', default=True):
-        raw_thumbnail = element.find(class_='media').get('data-responsive-image', 'DefaultGenre.png')
-        return add_https_proto(raw_thumbnail)
-    return 'DefaultGenre.png'
-
-
-def get_category_title(element):
-    """Return a category title, if available"""
-    found_element = element.find('h3')
-    if found_element:
-        return strip_newlines(found_element.a.contents[0])
-    # FIXME: We should probably fall back to something sensible here, or raise an exception instead
-    return ''
-
+from kodiutils import get_cache, log_error, open_url, ttl, update_cache
+from utils import assetpath_to_id
 
 def get_video_attributes(vrtnu_url):
     """Return a dictionary with video attributes by scraping the VRT NU website"""

diff --git a/tests/test_apihelper.py b/tests/test_apihelper.py
@@ -7,7 +7,7 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 import unittest
 from apihelper import ApiHelper
-from data import CHANNELS
+from data import CATEGORIES, CHANNELS
 from favorites import Favorites
 from resumepoints import ResumePoints
 from xbmcextra import kodi_to_ansi
@@ -146,6 +146,19 @@ def test_upnext(self):
         next_episode = self._apihelper.get_upnext(info=current_episode)
         print(next_episode)
 
+    def test_get_categories(self):
+        """Test to ensure our local hardcoded categories conforms to online categories"""
+        # Remove thumbnails from scraped categories first
+        online_categories = [dict(id=c['id'], name=c['name']) for c in self._apihelper.get_online_categories()]
+        local_categories = [dict(id=c['id'], name=c['name']) for c in CATEGORIES]
+        print('Categories:')
+        for category in online_categories:
+            print('%s | %s' % (kodi_to_ansi(category.get('name')), kodi_to_ansi(category.get('id'))))
+
+        self.assertTrue(self._apihelper.valid_categories(online_categories))
+        self.assertTrue(self._apihelper.valid_categories(local_categories))
+        self.assertEqual(online_categories, local_categories)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_vrtplayer.py b/tests/test_vrtplayer.py
@@ -73,8 +73,7 @@ def test_show_videos_specific_seasons_shows_videos(self):
 
     def test_random_tvshow_episodes(self):
         """Test episode from a random tvshow in a random category"""
-        from webscraper import get_categories
-        categories = get_categories()
+        categories = self._apihelper.get_categories()
         self.assertTrue(categories)
 
         category = random.choice(categories)

diff --git a/tests/test_webscraper.py b/tests/test_webscraper.py
@@ -6,22 +6,12 @@
 
 from __future__ import absolute_import, division, print_function, unicode_literals
 import unittest
-from data import CATEGORIES
-from webscraper import get_categories, get_video_attributes, valid_categories
+from webscraper import get_video_attributes
 
 
 class TestWebScraper(unittest.TestCase):
     """TestCase class"""
 
-    def test_get_categories(self):
-        """Test to ensure our hardcoded categories conforms to scraped categories"""
-        # Remove thumbnails from scraped categories first
-        categories_scraped = [dict(id=c['id'], name=c['name']) for c in get_categories()]
-        categories_stored = [dict(id=c['id'], name=c['name']) for c in CATEGORIES]
-        self.assertTrue(valid_categories(categories_scraped))
-        self.assertTrue(valid_categories(categories_stored))
-        self.assertEqual(categories_scraped, categories_stored)
-
     def test_get_video_attributes(self):
         """Test getting video attributes"""
         vrtnu_urls = [