parent
3315fb1ff3
commit
45ed610553
@ -0,0 +1,96 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
@ -0,0 +1,14 @@
|
||||
FROM python:alpine
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
RUN pip install --no-cache-dir --requirement requirements.txt
|
||||
|
||||
RUN addgroup -g 9999 lilia
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
USER nobody:lilia
|
||||
|
||||
ENTRYPOINT ["python", "app.py"]
|
@ -0,0 +1,116 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import flask
|
||||
import flask_apscheduler
|
||||
import flask_restful
|
||||
import flask_restful.fields
|
||||
import flask_restful.reqparse
|
||||
import sqlalchemy
|
||||
import sqlalchemy.engine
|
||||
|
||||
from db import db, Medium
|
||||
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.logger.setLevel(logging.INFO)
|
||||
app.config.update(
|
||||
ERROR_404_HELP=False,
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS=False,
|
||||
SQLALCHEMY_DATABASE_URI=os.getenv('SQLALCHEMY_DATABASE_URI'),
|
||||
SCHEDULER_TIMEZONE='UTC',
|
||||
SCHEDULER_JOBS=[
|
||||
dict(id='sync_media',
|
||||
func='sync:Sync.sync_media',
|
||||
args=(app, db),
|
||||
max_instances=1,
|
||||
trigger='interval',
|
||||
seconds=300)])
|
||||
|
||||
if app.config.get('SQLALCHEMY_DATABASE_URI', '').startswith('sqlite://'):
|
||||
@sqlalchemy.event.listens_for(sqlalchemy.engine.Engine, 'connect')
|
||||
def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||
dbapi_connection.execute('PRAGMA journal_mode=WAL')
|
||||
dbapi_connection.execute('PRAGMA synchronous=NORMAL')
|
||||
|
||||
db.init_app(app)
|
||||
db.create_all(app=app)
|
||||
|
||||
scheduler = flask_apscheduler.APScheduler()
|
||||
scheduler.init_app(app)
|
||||
|
||||
api = flask_restful.Api(app)
|
||||
|
||||
|
||||
medium_fields = {
|
||||
'id': flask_restful.fields.Integer(),
|
||||
'typename': flask_restful.fields.String(),
|
||||
'caption': flask_restful.fields.String(),
|
||||
'shortcode': flask_restful.fields.String(),
|
||||
'taken_at': flask_restful.fields.DateTime(dt_format='iso8601'),
|
||||
'width': flask_restful.fields.Integer(),
|
||||
'height': flask_restful.fields.Integer(),
|
||||
'display_url': flask_restful.fields.String(),
|
||||
'thumbnail_url': flask_restful.fields.String(),
|
||||
'likes': flask_restful.fields.Integer(),
|
||||
'owner_id': flask_restful.fields.Integer(),
|
||||
'owner_username': flask_restful.fields.String(),
|
||||
'owner_profile_pic_url': flask_restful.fields.String(),
|
||||
}
|
||||
|
||||
|
||||
filter_parser = flask_restful.reqparse.RequestParser()
|
||||
filter_parser.add_argument('filter', type=str)
|
||||
filter_parser.add_argument('type', type=str)
|
||||
filter_parser.add_argument('sort_by', type=str)
|
||||
filter_parser.add_argument('sort_order', type=str)
|
||||
filter_parser.add_argument('page_number', type=int)
|
||||
filter_parser.add_argument('page_size', type=int)
|
||||
|
||||
|
||||
class MediumResource(flask_restful.Resource):
|
||||
@flask_restful.marshal_with(medium_fields)
|
||||
def get(self, id):
|
||||
q = db.session.query(Medium).filter(Medium.id == id)
|
||||
medium = q.first()
|
||||
if not medium:
|
||||
flask_restful.abort(404, message='Medium {0} does not exist'.format(id))
|
||||
return medium, 200
|
||||
|
||||
|
||||
class MediaResource(flask_restful.Resource):
|
||||
@flask_restful.marshal_with(medium_fields)
|
||||
def get(self):
|
||||
args = filter_parser.parse_args()
|
||||
q = db.session.query(Medium)
|
||||
if args['filter']:
|
||||
q = q.filter(Medium.caption.ilike('%{}%'.format(args['filter'])))
|
||||
if args['type']:
|
||||
q = q.filter(Medium.typename == args['type'])
|
||||
count = q.count()
|
||||
if args['sort_order'] == 'random':
|
||||
q = q.order_by(sqlalchemy.func.random())
|
||||
elif args['sort_by']:
|
||||
col = getattr(Medium, args['sort_by'], None)
|
||||
if col:
|
||||
if args['sort_order']:
|
||||
order_by = getattr(col, args['sort_order'], None)
|
||||
if order_by:
|
||||
q = q.order_by(order_by())
|
||||
else:
|
||||
q = q.order_by(col)
|
||||
if args['page_size']:
|
||||
q = q.limit(args['page_size'])
|
||||
if args['page_number'] and args['page_size']:
|
||||
q = q.offset(args['page_number'] * args['page_size'])
|
||||
media = q.all()
|
||||
return media, 200, {'X-Total-Count': count}
|
||||
|
||||
|
||||
api.add_resource(MediumResource, '/media/<int:id>')
|
||||
api.add_resource(MediaResource, '/media')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
scheduler.start()
|
||||
app.run(host='0.0.0.0', threaded=True, debug=False)
|
@ -0,0 +1,22 @@
|
||||
import flask_sqlalchemy
|
||||
|
||||
|
||||
db = flask_sqlalchemy.SQLAlchemy(session_options=dict(autoflush=False))
|
||||
|
||||
|
||||
class Medium(db.Model):
|
||||
__tablename__ = 'media'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
typename = db.Column(db.String)
|
||||
caption = db.Column(db.String)
|
||||
shortcode = db.Column(db.String)
|
||||
taken_at = db.Column(db.DateTime)
|
||||
width = db.Column(db.Integer)
|
||||
height = db.Column(db.Integer)
|
||||
display_url = db.Column(db.String)
|
||||
thumbnail_url = db.Column(db.String)
|
||||
likes = db.Column(db.Integer)
|
||||
owner_id = db.Column(db.Integer)
|
||||
owner_username = db.Column(db.String)
|
||||
owner_profile_pic_url = db.Column(db.String)
|
@ -0,0 +1,69 @@
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
|
||||
from requests_futures.sessions import FuturesSession
|
||||
|
||||
|
||||
BASE_URL = 'https://www.instagram.com'
|
||||
QUERY_HASH = '42323d64886122307be10013ad2dcc44'
|
||||
SHARED_DATA = re.compile(r'window\._sharedData = (\{.*\});</script>')
|
||||
|
||||
|
||||
class Instagram(object):
|
||||
def __init__(self, username):
|
||||
self.username = username
|
||||
shared_data = self._get_shared_data()
|
||||
try:
|
||||
graphql = shared_data['entry_data']['ProfilePage'][0]['graphql']
|
||||
self.user = {k: v for k, v in graphql['user'].items() if not k.startswith('edge')}
|
||||
self.rhx_gis = shared_data['rhx_gis']
|
||||
except (IndexError, KeyError, TypeError):
|
||||
self.user = {}
|
||||
self.rhx_gis = None
|
||||
|
||||
def _get_shared_data(self):
|
||||
session = FuturesSession()
|
||||
r = session.get('{0}/{1}'.format(BASE_URL, self.username)).result()
|
||||
if not r.ok:
|
||||
return None
|
||||
m = SHARED_DATA.search(r.text)
|
||||
if m:
|
||||
return json.loads(m.group(1))
|
||||
return None
|
||||
|
||||
def fetch_media(self):
|
||||
session = FuturesSession()
|
||||
def get_media(count, cursor):
|
||||
variables = json.dumps(dict(
|
||||
id=self.user.get('id'),
|
||||
first=count,
|
||||
after=cursor))
|
||||
gis = '{0}:{1}'.format(self.rhx_gis, variables)
|
||||
gis = hashlib.md5(gis.encode('UTF-8')).hexdigest()
|
||||
url = '{0}/graphql/query'.format(BASE_URL)
|
||||
params = dict(
|
||||
query_hash=QUERY_HASH,
|
||||
variables=variables)
|
||||
return session.get(url, params=params, headers={'X-Instagram-GIS': gis})
|
||||
result = []
|
||||
count = 50
|
||||
cursor = ''
|
||||
while True:
|
||||
request = get_media(count, cursor)
|
||||
r = request.result()
|
||||
if not r.ok:
|
||||
return []
|
||||
data = r.json()
|
||||
try:
|
||||
data = data['data']['user']['edge_owner_to_timeline_media']
|
||||
except KeyError:
|
||||
return []
|
||||
for edge in data.get('edges', []):
|
||||
node = edge.get('node', {})
|
||||
node['owner'].update(self.user)
|
||||
result.append(node)
|
||||
cursor = data['page_info']['end_cursor']
|
||||
if not cursor:
|
||||
break
|
||||
return result
|
@ -0,0 +1,5 @@
|
||||
Flask
|
||||
Flask-APScheduler
|
||||
Flask-RESTful
|
||||
Flask-SQLAlchemy
|
||||
requests-futures
|
@ -0,0 +1,58 @@
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from db import Medium
|
||||
from instagram import Instagram
|
||||
|
||||
|
||||
class Sync(object):
|
||||
@staticmethod
|
||||
def _get(d, *keys, default=None):
|
||||
try:
|
||||
result = None
|
||||
for key in keys:
|
||||
if result:
|
||||
if isinstance(result, list):
|
||||
result = result[key]
|
||||
else:
|
||||
result = result.get(key, default)
|
||||
else:
|
||||
result = d.get(key, default)
|
||||
return result
|
||||
except (KeyError, IndexError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _to_datetime(val):
|
||||
if not val:
|
||||
return None
|
||||
return datetime.datetime.utcfromtimestamp(val)
|
||||
|
||||
@classmethod
|
||||
def sync_media(cls, app, db):
|
||||
app.logger.info('Starting synchronization of media')
|
||||
with app.app_context():
|
||||
instagram = Instagram(os.getenv('INSTAGRAM_USERNAME'))
|
||||
for med in instagram.fetch_media():
|
||||
id = cls._get(med, 'id')
|
||||
if not id:
|
||||
continue
|
||||
q = db.session.query(Medium).filter(Medium.id == id)
|
||||
medium = q.first()
|
||||
if not medium:
|
||||
medium = Medium(id=id)
|
||||
medium.typename = cls._get(med, '__typename')
|
||||
medium.caption = cls._get(med, 'edge_media_to_caption', 'edges', 0, 'node', 'text')
|
||||
medium.shortcode = cls._get(med, 'shortcode')
|
||||
medium.taken_at = cls._to_datetime(cls._get(med, 'taken_at_timestamp'))
|
||||
medium.width = cls._get(med, 'dimensions', 'width')
|
||||
medium.height = cls._get(med, 'dimensions', 'height')
|
||||
medium.display_url = cls._get(med, 'display_url')
|
||||
medium.thumbnail_url = cls._get(med, 'thumbnail_src')
|
||||
medium.likes = cls._get(med, 'edge_media_preview_like', 'count')
|
||||
medium.owner_id = cls._get(med, 'owner', 'id')
|
||||
medium.owner_username = cls._get(med, 'owner', 'username')
|
||||
medium.owner_profile_pic_url = cls._get(med, 'owner', 'profile_pic_url')
|
||||
db.session.add(medium)
|
||||
db.session.commit()
|
||||
app.logger.info('Synchronization of media completed')
|
Loading…
Reference in new issue