From 5a2cd54cc887b770e2a7e297e1f5f72ccc0ddb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Thu, 14 Feb 2019 15:58:48 +0100 Subject: [PATCH] Decode HTML entities from cheese.com --- services/cheesecom.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/services/cheesecom.py b/services/cheesecom.py index dd2c33c..d1c2b4f 100644 --- a/services/cheesecom.py +++ b/services/cheesecom.py @@ -1,3 +1,5 @@ +import html + import requests from pyquery import PyQuery as pq @@ -19,11 +21,11 @@ class CheeseCom(object): d = pq(r.content) summary = [] for p in d('ul[class="summary-points"]').find('p').items(): - summary.append(p.html(method='text')) + summary.append(html.unescape(p.html(method='text'))) return dict( url=d('meta[name="twitter:url"]').attr('content'), - name=d('meta[name="twitter:title"]').attr('content'), - description=d('meta[name="twitter:description"]').attr('content'), + name=html.unescape(d('meta[name="twitter:title"]').attr('content')), + description=html.unescape(d('meta[name="twitter:description"]').attr('content')), image=d('meta[name="twitter:image"]').attr('content'), summary=summary) except Exception as e: