You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.4 KiB

import hashlib
import json
import random
import re
from requests_futures.sessions import FuturesSession
BASE_URL = 'https://www.instagram.com'
QUERY_HASH = '42323d64886122307be10013ad2dcc44'
SHARED_DATA = re.compile(r'window\._sharedData = (\{.*\});</script>')
class Instagram(object):
def __init__(self, username):
self.username = username
shared_data = self._get_shared_data()
try:
graphql = shared_data['entry_data']['ProfilePage'][0]['graphql']
self.user = {k: v for k, v in graphql['user'].items() if not k.startswith('edge')}
self.rhx_gis = self._get_rhx_gis()
except (IndexError, KeyError, TypeError):
self.user = {}
self.rhx_gis = None
def _get_rhx_gis(self):
s = json.dumps(dict(id=random.randint(10000000, 99999999)))
return hashlib.md5(s.encode('UTF-8')).hexdigest()
def _get_shared_data(self):
session = FuturesSession()
r = session.get('{0}/{1}'.format(BASE_URL, self.username)).result()
if not r.ok:
return None
m = SHARED_DATA.search(r.text)
if m:
return json.loads(m.group(1))
return None
def fetch_media(self):
session = FuturesSession()
def get_media(count, cursor):
variables = json.dumps(dict(
id=self.user.get('id'),
first=count,
after=cursor))
gis = '{0}:{1}'.format(self.rhx_gis, variables)
gis = hashlib.md5(gis.encode('UTF-8')).hexdigest()
url = '{0}/graphql/query'.format(BASE_URL)
params = dict(
query_hash=QUERY_HASH,
variables=variables)
return session.get(url, params=params, headers={'X-Instagram-GIS': gis})
result = []
count = 50
cursor = ''
while True:
request = get_media(count, cursor)
r = request.result()
if not r.ok:
return []
data = r.json()
try:
data = data['data']['user']['edge_owner_to_timeline_media']
except KeyError:
return []
for edge in data.get('edges', []):
node = edge.get('node', {})
node['owner'].update(self.user)
result.append(node)
cursor = data['page_info']['end_cursor']
if not cursor:
break
return result