import hashlib import json import re from requests_futures.sessions import FuturesSession BASE_URL = 'https://www.instagram.com' QUERY_HASH = '42323d64886122307be10013ad2dcc44' SHARED_DATA = re.compile(r'window\._sharedData = (\{.*\});') class Instagram(object): def __init__(self, username): self.username = username shared_data = self._get_shared_data() try: graphql = shared_data['entry_data']['ProfilePage'][0]['graphql'] self.user = {k: v for k, v in graphql['user'].items() if not k.startswith('edge')} self.rhx_gis = shared_data['rhx_gis'] except (IndexError, KeyError, TypeError): self.user = {} self.rhx_gis = None def _get_shared_data(self): session = FuturesSession() r = session.get('{0}/{1}'.format(BASE_URL, self.username)).result() if not r.ok: return None m = SHARED_DATA.search(r.text) if m: return json.loads(m.group(1)) return None def fetch_media(self): session = FuturesSession() def get_media(count, cursor): variables = json.dumps(dict( id=self.user.get('id'), first=count, after=cursor)) gis = '{0}:{1}'.format(self.rhx_gis, variables) gis = hashlib.md5(gis.encode('UTF-8')).hexdigest() url = '{0}/graphql/query'.format(BASE_URL) params = dict( query_hash=QUERY_HASH, variables=variables) return session.get(url, params=params, headers={'X-Instagram-GIS': gis}) result = [] count = 50 cursor = '' while True: request = get_media(count, cursor) r = request.result() if not r.ok: return [] data = r.json() try: data = data['data']['user']['edge_owner_to_timeline_media'] except KeyError: return [] for edge in data.get('edges', []): node = edge.get('node', {}) node['owner'].update(self.user) result.append(node) cursor = data['page_info']['end_cursor'] if not cursor: break return result