Decode HTML entities from cheese.com

master
Nikola Forró 6 years ago
parent fb316537c8
commit 5a2cd54cc8

@ -1,3 +1,5 @@
import html
import requests import requests
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
@ -19,11 +21,11 @@ class CheeseCom(object):
d = pq(r.content) d = pq(r.content)
summary = [] summary = []
for p in d('ul[class="summary-points"]').find('p').items(): for p in d('ul[class="summary-points"]').find('p').items():
summary.append(p.html(method='text')) summary.append(html.unescape(p.html(method='text')))
return dict( return dict(
url=d('meta[name="twitter:url"]').attr('content'), url=d('meta[name="twitter:url"]').attr('content'),
name=d('meta[name="twitter:title"]').attr('content'), name=html.unescape(d('meta[name="twitter:title"]').attr('content')),
description=d('meta[name="twitter:description"]').attr('content'), description=html.unescape(d('meta[name="twitter:description"]').attr('content')),
image=d('meta[name="twitter:image"]').attr('content'), image=d('meta[name="twitter:image"]').attr('content'),
summary=summary) summary=summary)
except Exception as e: except Exception as e:

Loading…
Cancel
Save