oohEmbed/app/provider/photoprovider.py
2010-11-19 21:06:13 +08:00

245 lines
9.4 KiB
Python

import logging
import re
import urllib
import xml.etree.cElementTree as ET
import base64
import hashlib
import hmac
import time
from django.utils import simplejson as json
from BeautifulSoup import BeautifulSoup, NavigableString
from base import Provider
from utils import *
from secrets import *
class ImdbProvider(object):
"""Photo and some metadata for IMDb movie urls. Check sample response to see what metadata beyond that
specified by the oEmbed spec is returned. Note that sometimes, a photo can't be found in which case
you will get a link type response."""
title = 'IMDb'
url = r'http://*.imdb.com/title/tt*/'
url_re = r'imdb.com/title/(?P<resource>tt\d{7,7})'
example_url = 'http://www.imdb.com/title/tt0468569/'
IMDB_NS = '{http://webservice.imdb.com/doc/2006-12-15/}'
def set_value(self, elem, tag, d, key):
"""Check `tag` with Element `elem`. If exists, set `text` of tag
as value of `key` in dictionary `d`. NOTE: `d` is modified for caller."""
e = elem.find('.//' + self.IMDB_NS + tag)
if e is not None and e.text:
d[key] = e.text
return True
else:
return False
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
resource_id = matches.group('resource')
params = urllib.urlencode({'ResourceId': resource_id})
fetch_url = 'http://cc00.clearspring.com/imdb/LookupTitle?' + params
result = get_url(fetch_url)
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title}
tree = ET.fromstring(result)
if not self.set_value(tree, 'Source', response, 'url'):
response['type'] = 'link'
else:
self.set_value(tree, 'Width', response, 'width')
self.set_value(tree, 'Height', response, 'height')
self.set_value(tree, 'Title', response, 'title')
self.set_value(tree, 'Year', response, 'year')
e = tree.find('.//'+self.IMDB_NS+'Director')
if e:
self.set_value(e, 'Name', response, 'author_name')
self.set_value(e, 'NameId', response, 'author_url')
if self.set_value(tree, 'PlotSummary', response, 'html'):
response['html'] = u'<p>' + response['html'] + u'</p>'
self.set_value(tree, 'Average', response, 'rating')
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response
class AmazonProvider(Provider):
"""Product images (and author_name for books) for Amazon products. Will soon honour maxwidth/maxheight"""
title = 'Amazon Product Image'
url_re = r'amazon\.(?:com|co\.uk|de|ca|jp)/.*/?(?:gp/product|o/ASIN|obidos/ASIN|dp)/(?P<asin>\w{8,11})[/\?]?'
url = 'http://*.amazon.(com|co.uk|de|ca|jp)/*/(gp/product|o/ASIN|obidos/ASIN|dp)/*'
example_url = 'http://www.amazon.com/Myths-Innovation-Scott-Berkun/dp/0596527055'
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
params = {'Service': 'AWSECommerceService',
'AWSAccessKeyId': AWS_ACCESS_KEY_ID, # Please don't abuse!
'AssociateTag': 'antrixnet-20',
'Operation': 'ItemLookup',
'ResponseGroup': 'Images,ItemAttributes',
'Style': 'http://oohembed.com/static/amazon_json.xsl',
'ContentType': 'text/javascript',
'IdType': 'ASIN',
'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()), #ISO 8601
'ItemId': matches.group('asin')}
str_to_sign = "GET" + "\n" + "xml-us.amznxslt.com" + "\n" + "/onca/xml" + "\n"
str_to_sign = str_to_sign + urllib.urlencode(sorted(params.items())) # All query params sorted
signature = hmac.new(key=AWS_SECRET_ACCESS_KEY, msg=str_to_sign,
digestmod=hashlib.sha256).digest()
signature = base64.encodestring(signature).strip("\n") # base64.urlsafe_b64encode(signature)
params['Signature'] = signature # Add the Signature to the query params
fetch_url = 'http://xml-us.amznxslt.com/onca/xml?' + urllib.urlencode(params)
result = get_url(fetch_url)
try:
parsed = json.loads(result)
except:
logging.error("error decoding as json. String was\n%s" % result, exc_info=True)
raise OohEmbedError("Error decoding response from Amazon.")
item = parsed['Item']
# The returned item contains small, medium and large image details
# Each size is in nested dict in `item` with keyname `img_<size>`.
# We pick the one we want and move it up to the item dict.
item.update(item['img_large'])
# Now we create a response by selecting all needed key/value pairs from `item`.
# This mostly means removing `img_*` keys since the size we want is already
# in top-level of `item`.
# However, sometimes we don't get image details so 'url', 'thumbnail_url', etc.,
# attribute values will be empty strings. So we also prune those now.
selected = dict((k, v) for k, v in item.iteritems()
if not k.startswith('img_') and v)
if not 'url' in selected:
# Return a standard Amazon.com logo
selected['url'] = \
'http://images.amazon.com/images/G/01/x-locale/browse/upf/amzn-logo-5.gif'
selected['width'] = 140
selected['height'] = 66
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title}
response.update(selected)
# The returned url includes Subscription ID, etc. Replace it.
response['author_url'] = query_url
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response
class TwitPicProvider(Provider):
"""Photo and thumbnail for TwitPic.com photos."""
title = 'TwitPic'
url = r'http://*.twitpic.com/*'
url_re = r'twitpic.com/(?P<id>\w+)'
example_url = 'http://www.twitpic.com/1pz6z'
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
photo_url = 'http://twitpic.com/show/full/' + matches.group('id')
thumb_url = 'http://twitpic.com/show/thumb/' + matches.group('id')
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title,
'thumbnail_url': thumb_url, 'thumbnail_width': 150, 'thumbnail_height': 150,
'url': photo_url}
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response
class PhodroidProvider(Provider):
"""Provider for phodroid.com photos."""
title = 'Phodroid Photos'
url = r'http://*.phodroid.com/*/*/*'
url_re = r'phodroid.com/(?P<id>\d\d/\d\d/\w+)/?'
example_url = 'http://phodroid.com/09/06/k3q6bd'
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
photo_url = 'http://s.phodroid.com/' + matches.group('id') + '.jpg'
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title,
'url': photo_url}
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response
class LJAvatarProvider(Provider):
"""Avatar image for LiveJournal user. Uses http://ljpic.seacrow.com/"""
title = 'LiveJournal UserPic'
url = r'http://*.livejournal.com/'
url_re = r'(?P<id>\w+).livejournal.com/?$'
example_url = 'http://jace.livejournal.com'
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
fetch_url = 'http://ljpic.seacrow.com/json/' + matches.group('id')
result = get_url(fetch_url)
try:
parsed = json.loads(result)
except:
logging.error("error decoding as json. String was\n%s" % result, exc_info=True)
raise OohEmbedError("Error decoding response from LJPic.")
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title,
'url': parsed['image'], 'author_name': parsed['name']}
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response
class XKCDProvider(Provider):
"""Provides the comic image link for an xkcd.com comic page"""
title = 'XKCD Comic'
url = r'http://*.xkcd.com/*/'
url_re = r'xkcd\.com/\d+/?$'
example_url = 'http://xkcd.com/310/'
def provide(self, query_url, extra_params=None):
matches = self.url_regex.search(query_url)
if not matches:
raise UnsupportedUrlError()
result = get_url(query_url)
soup = BeautifulSoup(result)
photo = soup.find('div', id='contentContainer').find('img')
response = {'type': u'photo', 'version': u'1.0', 'provider_name': self.title,
'url': photo['src'], 'title': photo['alt'], 'author_name': 'Randall Munroe',
'author_url': 'http://xkcd.com/'}
json_response = json.dumps(response, ensure_ascii=False, indent=1)
return json_response