import logging import re import urllib import xml.etree.cElementTree as ET import base64 import hashlib import hmac import time from django.utils import simplejson as json from BeautifulSoup import BeautifulSoup, NavigableString from base import Provider from utils import * from secrets import * class ImdbProvider(object): """Photo and some metadata for IMDb movie urls. Check sample response to see what metadata beyond that specified by the oEmbed spec is returned. Note that sometimes, a photo can't be found in which case you will get a link type response.""" title = 'IMDb' url = r'http://*.imdb.com/title/tt*/' url_re = r'imdb.com/title/(?Ptt\d{7,7})' example_url = 'http://www.imdb.com/title/tt0468569/' IMDB_NS = '{http://webservice.imdb.com/doc/2006-12-15/}' def set_value(self, elem, tag, d, key): """Check `tag` with Element `elem`. If exists, set `text` of tag as value of `key` in dictionary `d`. NOTE: `d` is modified for caller.""" e = elem.find('.//' + self.IMDB_NS + tag) if e is not None and e.text: d[key] = e.text return True else: return False def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() resource_id = matches.group('resource') params = urllib.urlencode({'ResourceId': resource_id}) fetch_url = 'http://cc00.clearspring.com/imdb/LookupTitle?' + params result = get_url(fetch_url) response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title} tree = ET.fromstring(result) if not self.set_value(tree, 'Source', response, 'url'): response['type'] = 'link' else: self.set_value(tree, 'Width', response, 'width') self.set_value(tree, 'Height', response, 'height') self.set_value(tree, 'Title', response, 'title') self.set_value(tree, 'Year', response, 'year') e = tree.find('.//'+self.IMDB_NS+'Director') if e: self.set_value(e, 'Name', response, 'author_name') self.set_value(e, 'NameId', response, 'author_url') if self.set_value(tree, 'PlotSummary', response, 'html'): response['html'] = u'

' + response['html'] + u'

' self.set_value(tree, 'Average', response, 'rating') json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response class AmazonProvider(Provider): """Product images (and author_name for books) for Amazon products. Will soon honour maxwidth/maxheight""" title = 'Amazon Product Image' url_re = r'amazon\.(?:com|co\.uk|de|ca|jp)/.*/?(?:gp/product|o/ASIN|obidos/ASIN|dp)/(?P\w{8,11})[/\?]?' url = 'http://*.amazon.(com|co.uk|de|ca|jp)/*/(gp/product|o/ASIN|obidos/ASIN|dp)/*' example_url = 'http://www.amazon.com/Myths-Innovation-Scott-Berkun/dp/0596527055' def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() params = {'Service': 'AWSECommerceService', 'AWSAccessKeyId': AWS_ACCESS_KEY_ID, # Please don't abuse! 'AssociateTag': 'antrixnet-20', 'Operation': 'ItemLookup', 'ResponseGroup': 'Images,ItemAttributes', 'Style': 'http://oohembed.com/static/amazon_json.xsl', 'ContentType': 'text/javascript', 'IdType': 'ASIN', 'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()), #ISO 8601 'ItemId': matches.group('asin')} str_to_sign = "GET" + "\n" + "xml-us.amznxslt.com" + "\n" + "/onca/xml" + "\n" str_to_sign = str_to_sign + urllib.urlencode(sorted(params.items())) # All query params sorted signature = hmac.new(key=AWS_SECRET_ACCESS_KEY, msg=str_to_sign, digestmod=hashlib.sha256).digest() signature = base64.encodestring(signature).strip("\n") # base64.urlsafe_b64encode(signature) params['Signature'] = signature # Add the Signature to the query params fetch_url = 'http://xml-us.amznxslt.com/onca/xml?' + urllib.urlencode(params) result = get_url(fetch_url) try: parsed = json.loads(result) except: logging.error("error decoding as json. String was\n%s" % result, exc_info=True) raise OohEmbedError("Error decoding response from Amazon.") item = parsed['Item'] # The returned item contains small, medium and large image details # Each size is in nested dict in `item` with keyname `img_`. # We pick the one we want and move it up to the item dict. item.update(item['img_large']) # Now we create a response by selecting all needed key/value pairs from `item`. # This mostly means removing `img_*` keys since the size we want is already # in top-level of `item`. # However, sometimes we don't get image details so 'url', 'thumbnail_url', etc., # attribute values will be empty strings. So we also prune those now. selected = dict((k, v) for k, v in item.iteritems() if not k.startswith('img_') and v) if not 'url' in selected: # Return a standard Amazon.com logo selected['url'] = \ 'http://images.amazon.com/images/G/01/x-locale/browse/upf/amzn-logo-5.gif' selected['width'] = 140 selected['height'] = 66 response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title} response.update(selected) # The returned url includes Subscription ID, etc. Replace it. response['author_url'] = query_url json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response class TwitPicProvider(Provider): """Photo and thumbnail for TwitPic.com photos.""" title = 'TwitPic' url = r'http://*.twitpic.com/*' url_re = r'twitpic.com/(?P\w+)' example_url = 'http://www.twitpic.com/1pz6z' def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() photo_url = 'http://twitpic.com/show/full/' + matches.group('id') thumb_url = 'http://twitpic.com/show/thumb/' + matches.group('id') response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title, 'thumbnail_url': thumb_url, 'thumbnail_width': 150, 'thumbnail_height': 150, 'url': photo_url} json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response class PhodroidProvider(Provider): """Provider for phodroid.com photos.""" title = 'Phodroid Photos' url = r'http://*.phodroid.com/*/*/*' url_re = r'phodroid.com/(?P\d\d/\d\d/\w+)/?' example_url = 'http://phodroid.com/09/06/k3q6bd' def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() photo_url = 'http://s.phodroid.com/' + matches.group('id') + '.jpg' response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title, 'url': photo_url} json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response class LJAvatarProvider(Provider): """Avatar image for LiveJournal user. Uses http://ljpic.seacrow.com/""" title = 'LiveJournal UserPic' url = r'http://*.livejournal.com/' url_re = r'(?P\w+).livejournal.com/?$' example_url = 'http://jace.livejournal.com' def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() fetch_url = 'http://ljpic.seacrow.com/json/' + matches.group('id') result = get_url(fetch_url) try: parsed = json.loads(result) except: logging.error("error decoding as json. String was\n%s" % result, exc_info=True) raise OohEmbedError("Error decoding response from LJPic.") response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title, 'url': parsed['image'], 'author_name': parsed['name']} json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response class XKCDProvider(Provider): """Provides the comic image link for an xkcd.com comic page""" title = 'XKCD Comic' url = r'http://*.xkcd.com/*/' url_re = r'xkcd\.com/\d+/?$' example_url = 'http://xkcd.com/310/' def provide(self, query_url, extra_params=None): matches = self.url_regex.search(query_url) if not matches: raise UnsupportedUrlError() result = get_url(query_url) soup = BeautifulSoup(result) photo = soup.find('div', id='contentContainer').find('img') response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title, 'url': photo['src'], 'title': photo['alt'], 'author_name': 'Randall Munroe', 'author_url': 'http://xkcd.com/'} json_response = json.dumps(response, ensure_ascii=False, indent=1) return json_response