import html
import fuzzywuzzy.fuzz
import fuzzywuzzy.process
import googleapiclient
import googleapiclient.discovery
BASE_URL = 'https://www.youtube.com'
class YoutubeError(Exception):
pass
class Youtube(object):
def __init__(self, api_key):
self.client = googleapiclient.discovery.build('youtube', 'v3', developerKey=api_key)
def _search(self, channel_id, query, playlists, limit):
def get_thumbnail_url(thumbnails):
for key in ['high', 'medium', 'default']:
if key in thumbnails:
return thumbnails[key]['url']
resp = self.client.channels().list(
id=channel_id,
maxResults=1,
part='snippet').execute()
channel = resp['items'][0]
result = []
count = limit
token = ''
while True:
resp = self.client.search().list(
channelId=channel_id,
q=query,
safeSearch='none',
type='playlist' if playlists else 'video',
maxResults=min(count, 50),
part='id,snippet',
pageToken=token).execute()
for item in resp.get('items', []):
kind = item['id']['kind'].split('youtube#')[1]
if kind == 'playlist':
url = '{0}/view_play_list?p={1}'.format(BASE_URL, item['id']['playlistId'])
else:
url = '{0}/watch?v={1}'.format(BASE_URL, item['id']['videoId'])
result.append(dict(
kind=kind,
url=url,
title=html.unescape(item['snippet']['title']),
description=html.unescape(item['snippet']['description']),
thumbnail_url=get_thumbnail_url(item['snippet']['thumbnails']),
channel_title=html.unescape(channel['snippet']['title']),
channel_url='{0}/c/{1}'.format(BASE_URL, channel['snippet']['customUrl']),
channel_thumbnail_url=get_thumbnail_url(channel['snippet']['thumbnails'])))
count -= resp['pageInfo']['resultsPerPage']
if count <= 0:
break
token = resp.get('nextPageToken')
if not token:
break
return result
def search(self, channel_id, query, playlists=True, limit=None):
try:
return self._search(channel_id, query, playlists, limit)
except googleapiclient.errors.HttpError as e:
raise YoutubeError('Failed to query Youtube API: {0}'.format(e))
def find_best_match(self, channel_ids, query):
results = []
for channel_id in channel_ids:
try:
results.extend(self._search(channel_id, query, playlists=True, limit=1))
results.extend(self._search(channel_id, query, playlists=False, limit=1))
except googleapiclient.errors.HttpError as e:
raise YoutubeError('Failed to query Youtube API: {0}'.format(e))
if not results:
return None
tokens = [t for t in query.split('|') if not t.strip().startswith('-')] or ['']
matches = []
for token in tokens:
titles = {i: r['title'] for i, r in enumerate(results)}
descriptions = {i: r['description'] for i, r in enumerate(results)}
matches.append(fuzzywuzzy.process.extractOne(token, titles,
scorer=fuzzywuzzy.fuzz.token_sort_ratio))
matches.append(fuzzywuzzy.process.extractOne(token, descriptions,
scorer=fuzzywuzzy.fuzz.token_sort_ratio))
_, _, i = sorted(matches, key=lambda m: m[1], reverse=True)[0]
return results[i]