Add Instagram API

master
Nikola Forró 6 years ago
parent 3315fb1ff3
commit 45ed610553

@ -10,11 +10,25 @@ services:
ports:
- 127.0.0.1:8080:80
depends_on:
- instagram-api
- quotes-api
- twitch-cache-api
- twitch-subs-api
- cms
# Instagram API service with /data/instagram mounted as database storage
# INSTAGRAM_USERNAME is needed for synchronization
instagram-api:
build:
context: ./instagram-api
volumes:
- /data/instagram:/instagram
environment:
- SQLALCHEMY_DATABASE_URI=sqlite:////instagram/instagram.db
- INSTAGRAM_USERNAME=__INSTAGRAM_USERNAME__
expose:
- 5000
# Quotes API service with /data/quotes mounted as database storage
# SECRET_KEY is needed for API key validation
quotes-api:

@ -0,0 +1,96 @@
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject

@ -0,0 +1,14 @@
FROM python:alpine
WORKDIR /app
COPY . .
RUN pip install --no-cache-dir --requirement requirements.txt
RUN addgroup -g 9999 lilia
EXPOSE 5000
USER nobody:lilia
ENTRYPOINT ["python", "app.py"]

@ -0,0 +1,116 @@
import logging
import os
import flask
import flask_apscheduler
import flask_restful
import flask_restful.fields
import flask_restful.reqparse
import sqlalchemy
import sqlalchemy.engine
from db import db, Medium
app = flask.Flask(__name__)
app.logger.setLevel(logging.INFO)
app.config.update(
ERROR_404_HELP=False,
SQLALCHEMY_TRACK_MODIFICATIONS=False,
SQLALCHEMY_DATABASE_URI=os.getenv('SQLALCHEMY_DATABASE_URI'),
SCHEDULER_TIMEZONE='UTC',
SCHEDULER_JOBS=[
dict(id='sync_media',
func='sync:Sync.sync_media',
args=(app, db),
max_instances=1,
trigger='interval',
seconds=300)])
if app.config.get('SQLALCHEMY_DATABASE_URI', '').startswith('sqlite://'):
@sqlalchemy.event.listens_for(sqlalchemy.engine.Engine, 'connect')
def set_sqlite_pragma(dbapi_connection, connection_record):
dbapi_connection.execute('PRAGMA journal_mode=WAL')
dbapi_connection.execute('PRAGMA synchronous=NORMAL')
db.init_app(app)
db.create_all(app=app)
scheduler = flask_apscheduler.APScheduler()
scheduler.init_app(app)
api = flask_restful.Api(app)
medium_fields = {
'id': flask_restful.fields.Integer(),
'typename': flask_restful.fields.String(),
'caption': flask_restful.fields.String(),
'shortcode': flask_restful.fields.String(),
'taken_at': flask_restful.fields.DateTime(dt_format='iso8601'),
'width': flask_restful.fields.Integer(),
'height': flask_restful.fields.Integer(),
'display_url': flask_restful.fields.String(),
'thumbnail_url': flask_restful.fields.String(),
'likes': flask_restful.fields.Integer(),
'owner_id': flask_restful.fields.Integer(),
'owner_username': flask_restful.fields.String(),
'owner_profile_pic_url': flask_restful.fields.String(),
}
filter_parser = flask_restful.reqparse.RequestParser()
filter_parser.add_argument('filter', type=str)
filter_parser.add_argument('type', type=str)
filter_parser.add_argument('sort_by', type=str)
filter_parser.add_argument('sort_order', type=str)
filter_parser.add_argument('page_number', type=int)
filter_parser.add_argument('page_size', type=int)
class MediumResource(flask_restful.Resource):
@flask_restful.marshal_with(medium_fields)
def get(self, id):
q = db.session.query(Medium).filter(Medium.id == id)
medium = q.first()
if not medium:
flask_restful.abort(404, message='Medium {0} does not exist'.format(id))
return medium, 200
class MediaResource(flask_restful.Resource):
@flask_restful.marshal_with(medium_fields)
def get(self):
args = filter_parser.parse_args()
q = db.session.query(Medium)
if args['filter']:
q = q.filter(Medium.caption.ilike('%{}%'.format(args['filter'])))
if args['type']:
q = q.filter(Medium.typename == args['type'])
count = q.count()
if args['sort_order'] == 'random':
q = q.order_by(sqlalchemy.func.random())
elif args['sort_by']:
col = getattr(Medium, args['sort_by'], None)
if col:
if args['sort_order']:
order_by = getattr(col, args['sort_order'], None)
if order_by:
q = q.order_by(order_by())
else:
q = q.order_by(col)
if args['page_size']:
q = q.limit(args['page_size'])
if args['page_number'] and args['page_size']:
q = q.offset(args['page_number'] * args['page_size'])
media = q.all()
return media, 200, {'X-Total-Count': count}
api.add_resource(MediumResource, '/media/<int:id>')
api.add_resource(MediaResource, '/media')
if __name__ == '__main__':
scheduler.start()
app.run(host='0.0.0.0', threaded=True, debug=False)

@ -0,0 +1,22 @@
import flask_sqlalchemy
db = flask_sqlalchemy.SQLAlchemy(session_options=dict(autoflush=False))
class Medium(db.Model):
__tablename__ = 'media'
id = db.Column(db.Integer, primary_key=True)
typename = db.Column(db.String)
caption = db.Column(db.String)
shortcode = db.Column(db.String)
taken_at = db.Column(db.DateTime)
width = db.Column(db.Integer)
height = db.Column(db.Integer)
display_url = db.Column(db.String)
thumbnail_url = db.Column(db.String)
likes = db.Column(db.Integer)
owner_id = db.Column(db.Integer)
owner_username = db.Column(db.String)
owner_profile_pic_url = db.Column(db.String)

@ -0,0 +1,69 @@
import hashlib
import json
import re
from requests_futures.sessions import FuturesSession
BASE_URL = 'https://www.instagram.com'
QUERY_HASH = '42323d64886122307be10013ad2dcc44'
SHARED_DATA = re.compile(r'window\._sharedData = (\{.*\});</script>')
class Instagram(object):
def __init__(self, username):
self.username = username
shared_data = self._get_shared_data()
try:
graphql = shared_data['entry_data']['ProfilePage'][0]['graphql']
self.user = {k: v for k, v in graphql['user'].items() if not k.startswith('edge')}
self.rhx_gis = shared_data['rhx_gis']
except (IndexError, KeyError, TypeError):
self.user = {}
self.rhx_gis = None
def _get_shared_data(self):
session = FuturesSession()
r = session.get('{0}/{1}'.format(BASE_URL, self.username)).result()
if not r.ok:
return None
m = SHARED_DATA.search(r.text)
if m:
return json.loads(m.group(1))
return None
def fetch_media(self):
session = FuturesSession()
def get_media(count, cursor):
variables = json.dumps(dict(
id=self.user.get('id'),
first=count,
after=cursor))
gis = '{0}:{1}'.format(self.rhx_gis, variables)
gis = hashlib.md5(gis.encode('UTF-8')).hexdigest()
url = '{0}/graphql/query'.format(BASE_URL)
params = dict(
query_hash=QUERY_HASH,
variables=variables)
return session.get(url, params=params, headers={'X-Instagram-GIS': gis})
result = []
count = 50
cursor = ''
while True:
request = get_media(count, cursor)
r = request.result()
if not r.ok:
return []
data = r.json()
try:
data = data['data']['user']['edge_owner_to_timeline_media']
except KeyError:
return []
for edge in data.get('edges', []):
node = edge.get('node', {})
node['owner'].update(self.user)
result.append(node)
cursor = data['page_info']['end_cursor']
if not cursor:
break
return result

@ -0,0 +1,5 @@
Flask
Flask-APScheduler
Flask-RESTful
Flask-SQLAlchemy
requests-futures

@ -0,0 +1,58 @@
import datetime
import os
from db import Medium
from instagram import Instagram
class Sync(object):
@staticmethod
def _get(d, *keys, default=None):
try:
result = None
for key in keys:
if result:
if isinstance(result, list):
result = result[key]
else:
result = result.get(key, default)
else:
result = d.get(key, default)
return result
except (KeyError, IndexError):
return default
@staticmethod
def _to_datetime(val):
if not val:
return None
return datetime.datetime.utcfromtimestamp(val)
@classmethod
def sync_media(cls, app, db):
app.logger.info('Starting synchronization of media')
with app.app_context():
instagram = Instagram(os.getenv('INSTAGRAM_USERNAME'))
for med in instagram.fetch_media():
id = cls._get(med, 'id')
if not id:
continue
q = db.session.query(Medium).filter(Medium.id == id)
medium = q.first()
if not medium:
medium = Medium(id=id)
medium.typename = cls._get(med, '__typename')
medium.caption = cls._get(med, 'edge_media_to_caption', 'edges', 0, 'node', 'text')
medium.shortcode = cls._get(med, 'shortcode')
medium.taken_at = cls._to_datetime(cls._get(med, 'taken_at_timestamp'))
medium.width = cls._get(med, 'dimensions', 'width')
medium.height = cls._get(med, 'dimensions', 'height')
medium.display_url = cls._get(med, 'display_url')
medium.thumbnail_url = cls._get(med, 'thumbnail_src')
medium.likes = cls._get(med, 'edge_media_preview_like', 'count')
medium.owner_id = cls._get(med, 'owner', 'id')
medium.owner_username = cls._get(med, 'owner', 'username')
medium.owner_profile_pic_url = cls._get(med, 'owner', 'profile_pic_url')
db.session.add(medium)
db.session.commit()
app.logger.info('Synchronization of media completed')

@ -68,6 +68,15 @@ http {
root /twitch-logs;
}
location ^~ /instagram/api/ {
rewrite ^/instagram/api(/.*)$ $1 break;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_http_version 1.1;
tcp_nodelay on;
proxy_pass http://instagram-api:5000/;
}
location ^~ /quotes/api/ {
rewrite ^/quotes/api(/.*)$ $1 break;
proxy_set_header Host $host;

Loading…
Cancel
Save