From e2ef6c55883b0f5b801bbb0e020cb7f5ff9c6344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Mon, 18 Jun 2018 12:47:17 +0200 Subject: [PATCH] Improve youtube matching --- Dockerfile | 3 +++ commands.py | 11 ++--------- requirements.txt | 1 + services/youtube.py | 24 ++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 53a433e..1362002 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,9 @@ WORKDIR /bot COPY . . RUN touch settings.cfg +RUN apk add --no-cache gcc libc-dev && \ + rm -rf /var/cache/apk/* + RUN pip install --no-cache-dir --requirement requirements.txt RUN addgroup -g 9999 lilia diff --git a/commands.py b/commands.py index d5ded42..6f0177d 100644 --- a/commands.py +++ b/commands.py @@ -104,15 +104,8 @@ class Commands(object): channel_ids = self.config['Youtube'].get('channel_ids').split(',') yt = Youtube(api_key) try: - for channel_id in channel_ids: - results = yt.search(channel_id, query, playlists=True, limit=1) - if results: - break - results = yt.search(channel_id, query, playlists=False, limit=1) - if results: - break - result = results.pop(0) - except (YoutubeError, IndexError): + result = yt.find_best_match(channel_ids, query) + except YoutubeError: raise CommandError('couldn\'t find anything on Youtube') else: return result diff --git a/requirements.txt b/requirements.txt index 15d9aef..c9313fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ discord.py +fuzzywuzzy[speedup] google-api-python-client irc python-dateutil diff --git a/services/youtube.py b/services/youtube.py index 32c981d..2b0aa3f 100644 --- a/services/youtube.py +++ b/services/youtube.py @@ -1,3 +1,5 @@ +import fuzzywuzzy.fuzz +import fuzzywuzzy.process import googleapiclient import googleapiclient.discovery @@ -63,3 +65,25 @@ class Youtube(object): return self._search(channel_id, query, playlists, limit) except googleapiclient.errors.HttpError as e: raise YoutubeError('Failed to query Youtube API: {}'.format(e)) + + def find_best_match(self, channel_ids, query): + results = [] + for channel_id in channel_ids: + try: + results.extend(self._search(channel_id, query, playlists=True, limit=1)) + results.extend(self._search(channel_id, query, playlists=False, limit=1)) + except googleapiclient.errors.HttpError as e: + raise YoutubeError('Failed to query Youtube API: {}'.format(e)) + if not results: + return None + tokens = [t for t in query.split('|') if not t.strip().startswith('-')] or [''] + matches = [] + for token in tokens: + titles = {i: r['title'] for i, r in enumerate(results)} + descriptions = {i: r['description'] for i, r in enumerate(results)} + matches.append(fuzzywuzzy.process.extractOne(token, titles, + scorer=fuzzywuzzy.fuzz.token_sort_ratio)) + matches.append(fuzzywuzzy.process.extractOne(token, descriptions, + scorer=fuzzywuzzy.fuzz.token_sort_ratio)) + _, _, i = sorted(matches, key=lambda m: m[1], reverse=True)[0] + return results[i]