Skip to content

Commit

Permalink
Get categories from online json (#814)
Browse files Browse the repository at this point in the history
  • Loading branch information
mediaminister authored Sep 18, 2020
1 parent 42eff53 commit 0c9d587
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 92 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/status.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@ jobs:
- name: TEST TV guide
run: python -m unittest -v test_tvguide.TestTVGuide.test_livetv_description
if: always()
# FIXME: Add a better test for the webscraper that prints the categories as well
- name: TEST Categories webscraper
run: python -m unittest -v test_webscraper.TestWebScraper.test_get_categories
- name: TEST Categories
run: python -m unittest -v test_apihelper.TestApiHelper.test_get_categories
if: always()
- name: TEST Video attributes webscraper
run: python -m unittest -v test_webscraper.TestWebScraper.test_get_video_attributes
Expand Down
55 changes: 42 additions & 13 deletions resources/lib/apihelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from helperobjects import TitleItem
from kodiutils import (delete_cached_thumbnail, get_cache, get_cached_url_json, get_global_setting,
get_setting_bool, get_setting_int, get_url_json, has_addon, localize,
localize_from_data, log, ttl, url_for)
localize_from_data, log, ttl, update_cache, url_for)
from metadata import Metadata
from utils import (html_to_kodi, find_entry, from_unicode, play_url_to_id,
from utils import (add_https_proto, html_to_kodi, find_entry, from_unicode, play_url_to_id,
program_to_url, realpage, url_to_program, youtube_to_plugin_url)


Expand Down Expand Up @@ -776,21 +776,50 @@ def localize_features(featured):

return sorted(features, key=lambda x: x.get('name'))

def list_categories(self):
"""Construct a list of category ListItems"""
from webscraper import get_categories, valid_categories
categories = get_categories()
@staticmethod
def valid_categories(categories):
"""Check if categories contain all necessary keys and values"""
return bool(categories) and all(item.get('id') and item.get('name') for item in categories)

@staticmethod
def get_online_categories():
"""Return a list of categories from the VRT NU website"""
categories = []
categories_json = get_url_json('https://www.vrt.be/vrtnu/categorieen/jcr:content/par/categories.model.json')
if categories_json is not None:
categories = []
for category in categories_json.get('items'):
categories.append(dict(
id=category.get('name'),
thumbnail=add_https_proto(category.get('image').get('src')),
name=category.get('title'),
))
return categories

# Use the cache anyway (better than hard-coded)
if not valid_categories(categories):
categories = get_cache('categories.json', ttl=None)
def get_categories(self):
"""Return a list of categories"""
cache_file = 'categories.json'

# Try the cache if it is fresh
categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60)
if self.valid_categories(categories):
return categories

# Try online categories json
categories = self.get_online_categories()
if self.valid_categories(categories):
from json import dumps
update_cache(cache_file, dumps(categories))
return categories

# Fall back to internal hard-coded categories
if not valid_categories(categories):
from data import CATEGORIES
log(2, 'Fall back to internal hard-coded categories')
categories = CATEGORIES
from data import CATEGORIES
log(2, 'Fall back to internal hard-coded categories')
return CATEGORIES

def list_categories(self):
"""Construct a list of category ListItems"""
categories = self.get_categories()
category_items = []
from data import CATEGORIES
for category in self.localize_categories(categories, CATEGORIES):
Expand Down
10 changes: 5 additions & 5 deletions resources/lib/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@
CATEGORIES = [
dict(name='Audiodescriptie', id='met-audiodescriptie', msgctxt=30070),
dict(name='Cultuur', id='cultuur', msgctxt=30071),
dict(name='Docu', id='docu', msgctxt=30072),
dict(name='Documentaire', id='docu', msgctxt=30072),
dict(name='Entertainment', id='entertainment', msgctxt=30073),
dict(name='Films', id='films', msgctxt=30074),
dict(name='Film', id='films', msgctxt=30074),
dict(name='Human interest', id='human-interest', msgctxt=30075),
dict(name='Humor', id='humor', msgctxt=30076),
dict(name='Kinderen en jongeren', id='voor-kinderen', msgctxt=30077),
dict(name='Kinderen & jongeren', id='voor-kinderen', msgctxt=30077),
dict(name='Koken', id='koken', msgctxt=30078),
dict(name='Levensbeschouwing', id='levensbeschouwing', msgctxt=30087),
dict(name='Lifestyle', id='lifestyle', msgctxt=30079),
dict(name='Muziek', id='muziek', msgctxt=30080),
dict(name='Nieuws en actua', id='nieuws-en-actua', msgctxt=30081),
dict(name='Series', id='series', msgctxt=30082),
dict(name='Serie', id='series', msgctxt=30082),
dict(name='Sport', id='sport', msgctxt=30083),
dict(name='Talkshows', id='talkshows', msgctxt=30084),
dict(name='Vlaamse Gebarentaal', id='met-gebarentaal', msgctxt=30085),
dict(name='Wetenschap en natuur', id='wetenschap-en-natuur', msgctxt=30086),
dict(name='Wetenschap & natuur', id='wetenschap-en-natuur', msgctxt=30086),
]

# TODO: Find a solution for the below VRT YouTube channels
Expand Down
59 changes: 2 additions & 57 deletions resources/lib/webscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,63 +9,8 @@
except ImportError: # Python 2
from urllib2 import HTTPError

from kodiutils import get_cache, get_setting_bool, log_error, open_url, ttl, update_cache
from utils import assetpath_to_id, add_https_proto, strip_newlines


def valid_categories(categories):
"""Check if categories contain all necessary keys and values"""
return bool(categories) and all(item.get('id') and item.get('name') for item in categories)


def get_categories():
"""Return a list of categories by scraping the VRT NU website"""

cache_file = 'categories.json'
categories = []

# Try the cache if it is fresh
categories = get_cache(cache_file, ttl=7 * 24 * 60 * 60)

# Try to scrape from the web
if not valid_categories(categories):
from bs4 import BeautifulSoup, SoupStrainer
response = open_url('https://www.vrt.be/vrtnu/categorieen/')
if response is None:
return categories
tiles = SoupStrainer('nui-list--content')
soup = BeautifulSoup(response.read(), 'html.parser', parse_only=tiles)

categories = []
for tile in soup.find_all('nui-tile'):
categories.append(dict(
id=tile.get('href').split('/')[-2],
thumbnail=get_category_thumbnail(tile),
name=get_category_title(tile),
))
if categories:
from json import dumps
update_cache('categories.json', dumps(categories))

return categories


def get_category_thumbnail(element):
"""Return a category thumbnail, if available"""
if get_setting_bool('showfanart', default=True):
raw_thumbnail = element.find(class_='media').get('data-responsive-image', 'DefaultGenre.png')
return add_https_proto(raw_thumbnail)
return 'DefaultGenre.png'


def get_category_title(element):
"""Return a category title, if available"""
found_element = element.find('h3')
if found_element:
return strip_newlines(found_element.a.contents[0])
# FIXME: We should probably fall back to something sensible here, or raise an exception instead
return ''

from kodiutils import get_cache, log_error, open_url, ttl, update_cache
from utils import assetpath_to_id

def get_video_attributes(vrtnu_url):
"""Return a dictionary with video attributes by scraping the VRT NU website"""
Expand Down
15 changes: 14 additions & 1 deletion tests/test_apihelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import unittest
from apihelper import ApiHelper
from data import CHANNELS
from data import CATEGORIES, CHANNELS
from favorites import Favorites
from resumepoints import ResumePoints
from xbmcextra import kodi_to_ansi
Expand Down Expand Up @@ -146,6 +146,19 @@ def test_upnext(self):
next_episode = self._apihelper.get_upnext(info=current_episode)
print(next_episode)

def test_get_categories(self):
"""Test to ensure our local hardcoded categories conforms to online categories"""
# Remove thumbnails from scraped categories first
online_categories = [dict(id=c['id'], name=c['name']) for c in self._apihelper.get_online_categories()]
local_categories = [dict(id=c['id'], name=c['name']) for c in CATEGORIES]
print('Categories:')
for category in online_categories:
print('%s | %s' % (kodi_to_ansi(category.get('name')), kodi_to_ansi(category.get('id'))))

self.assertTrue(self._apihelper.valid_categories(online_categories))
self.assertTrue(self._apihelper.valid_categories(local_categories))
self.assertEqual(online_categories, local_categories)


if __name__ == '__main__':
unittest.main()
3 changes: 1 addition & 2 deletions tests/test_vrtplayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ def test_show_videos_specific_seasons_shows_videos(self):

def test_random_tvshow_episodes(self):
"""Test episode from a random tvshow in a random category"""
from webscraper import get_categories
categories = get_categories()
categories = self._apihelper.get_categories()
self.assertTrue(categories)

category = random.choice(categories)
Expand Down
12 changes: 1 addition & 11 deletions tests/test_webscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,12 @@

from __future__ import absolute_import, division, print_function, unicode_literals
import unittest
from data import CATEGORIES
from webscraper import get_categories, get_video_attributes, valid_categories
from webscraper import get_video_attributes


class TestWebScraper(unittest.TestCase):
"""TestCase class"""

def test_get_categories(self):
"""Test to ensure our hardcoded categories conforms to scraped categories"""
# Remove thumbnails from scraped categories first
categories_scraped = [dict(id=c['id'], name=c['name']) for c in get_categories()]
categories_stored = [dict(id=c['id'], name=c['name']) for c in CATEGORIES]
self.assertTrue(valid_categories(categories_scraped))
self.assertTrue(valid_categories(categories_stored))
self.assertEqual(categories_scraped, categories_stored)

def test_get_video_attributes(self):
"""Test getting video attributes"""
vrtnu_urls = [
Expand Down

0 comments on commit 0c9d587

Please sign in to comment.