import json from urllib.parse import urlparse, parse_qs from pyquery import PyQuery as pq from requests_futures.sessions import FuturesSession BASE_URL = 'https://teespring.com' class Teespring(object): def __init__(self, store_name): self.store_name = store_name def _parse_variants(self, pid, html): result = [] d = pq(html) for div in d('div.image_stack__container[data-product-id="{0}"]'.format(pid)).items(): color_id = div.attr('data-color-id') front_url = div.find('div[data-side="front"]').find('img').eq(0).attr('data-original') back_url = div.find('div[data-side="back"]').find('img').eq(0).attr('data-original') li = d('li.product__color_list_item[data-product-id="{0}"][data-color-id="{1}"]'.format(pid, color_id)).eq(0) color_value = li.find('div').eq(0).attr('style').split(':')[1] result.append(dict( color_id=color_id, front_url=front_url, back_url=back_url, color_value=color_value)) return result def fetch_products(self): session = FuturesSession() def get_products(page): url = '{0}/api/stores/{1}/store_products'.format(BASE_URL, self.store_name) params = dict(page=page) return session.get(url, params=params, headers={'Accept': 'application/json'}) result = [] requests = [] page = 1 while True: request = get_products(page) r = request.result() if not r.ok: return [] data = r.json() for product in data.get('products', []): product['url'] = BASE_URL + product['url'] requests.append(session.get(product['url'])) result.append(product) next_url = data.get('next') if not next_url: break q = parse_qs(urlparse(next_url).query) page = q.get('page', [])[0] for product, request in zip(result, requests): q = parse_qs(urlparse(product['url']).fragment) pid = q.get('pid', [])[0] r = request.result() product['variants'] = self._parse_variants(pid, r.text) return result