一.注册应用
网上有很多关于使用weibo API的帖子,但是很遗憾的是,大多没有很详细的介绍,还有的就是版本太老了。
关于注册的部分,我就不写了,按照新浪给的步骤走就OK了,需要注意的是,现在申请应用,需要给一个回调页面(callback url),因为新浪新版的API使用oauth2.0进行验证,传递的参数是需要使用callback url的。
但是,我想大多数人都是callback url的,我也没有,如果你有callback url,能正常申请APP的话,那么这篇文章就可以不看了(新浪定义的回调页面,是要求可以再公网访问的,localhost和局域网页面都是不行的)。
如果你没有url,或者像我一样,只是想用API来爬数据的话,那么你随便填写一个url,然后注册就可以了,我这篇文章里面使用的验证方式是oauth1.0的,不需要callback url。
二.SDK下载
我是使用python调用API爬数据的,所以下文的所有内容都是针对python的,其他语言的话,我不是很清楚了。
1.下载官网提供的SDK
http://michaelliao.github.com/sinaweibopy/
这是官方给的SDK下载地址,进入github,下载包michaelliao-sinaweibopy-XXXXX,其中有用的就是那个weibo.py文件了,其他的文件应该是用来生成sample的吧,但是很遗憾的是,里面的代码提供的接口需要使用oauth2.0进行验证,我们这种屌丝,没有callback页面,所以用不了,这条路pass掉。
2.使用老版本的SDK
老版本的新浪weibo SDK是在google code上面维护的,地址在http://code.google.com/p/sinaweibopy/wiki/Download,下载OAuth 1源码,这个包里提供的API是使用oauth 1进行验证的,不需要使用callback页面,然后在下载页面的左边那排标签里面,有个使用文旦,地址是http://code.google.com/p/sinaweibopy/wiki/OAuth1,进去之后,有很详细的使用教程。
三.使用API
虽然文档很详细,但是。。。我当时还是没有看明白,在网上找了个sample,然后改了改,大致明白了怎么使用oauth 1的SDK
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__version__ = '1.0'
__author__ = 'Liao Xuefeng ([email protected])'
'''
Python client SDK for sina weibo API using OAuth 1.0
'''
try:
import json
except ImportError:
import simplejson as json
import time
import hmac
import uuid
import base64
import urllib
import urllib2
import hashlib
import logging
_OAUTH_SIGN_METHOD = 'HMAC-SHA1'
_OAUTH_VERSION = '1.0'
class OAuthToken(object):
def __init__(self, oauth_token, oauth_token_secret, oauth_verifier=None, **kw):
self.oauth_token = oauth_token
self.oauth_token_secret = oauth_token_secret
self.oauth_verifier = oauth_verifier
for k, v in kw.iteritems():
setattr(self, k, v)
def __str__(self):
attrs = [s for s in dir(self) if not s.startswith('__')]
kvs = ['%s = %s' % (k, getattr(self, k)) for k in attrs]
return ', '.join(kvs)
__repr__ = __str__
class APIClient(object):
def __init__(self, app_key, app_secret, token=None, callback=None, domain='api.t.sina.com.cn'):
self.app_key = str(app_key)
self.app_secret = str(app_secret)
if token:
if isinstance(token, OAuthToken):
if token.oauth_token:
self.oauth_token = token.oauth_token
if token.oauth_token_secret:
self.oauth_token_secret = token.oauth_token_secret
if token.oauth_verifier:
self.oauth_verifier = token.oauth_verifier
else:
raise TypeError('token parameter must be instance of OAuthToken.')
self.callback = callback
self.api_url = 'http://%s' % domain
self.get = HttpObject(self, _HTTP_GET)
self.post = HttpObject(self, _HTTP_POST)
def _oauth_request(self, method, url, **kw):
params = dict( \
oauth_consumer_key=self.app_key, \
oauth_nonce=_generate_nonce(), \
oauth_signature_method=_OAUTH_SIGN_METHOD, \
oauth_timestamp=str(int(time.time())), \
oauth_version=_OAUTH_VERSION, \
oauth_token=self.oauth_token)
params.update(kw)
m = 'GET' if method==_HTTP_GET else 'POST'
bs = _generate_base_string(m, url, **params)
key = '%s&%s' % (self.app_secret, self.oauth_token_secret)
oauth_signature = _generate_signature(key, bs)
print 'params:', params
print 'base string:', bs
print 'key:', key, 'sign:', oauth_signature
print 'url:', url
r = _http_call(url, method, self.__build_oauth_header(params, oauth_signature=oauth_signature), **kw)
return r
def get_request_token(self):
'''
Step 1: request oauth token.
Returns:
OAuthToken object contains oauth_token and oauth_token_secret
'''
params = dict(oauth_callback=self.callback, \
oauth_consumer_key=self.app_key, \
oauth_nonce=_generate_nonce(), \
oauth_signature_method=_OAUTH_SIGN_METHOD, \
oauth_timestamp=str(int(time.time())), \
oauth_version=_OAUTH_VERSION)
url = '%s/oauth/request_token' % self.api_url
bs = _generate_base_string('GET', url, **params)
params['oauth_signature'] = base64.b64encode(hmac.new('%s&' % self.app_secret, bs, hashlib.sha1).digest())
r = _http_call(url, _HTTP_GET, return_json=False, **params)
kw = _parse_params(r, False)
return OAuthToken(**kw)
def get_authorize_url(self, oauth_token):
'''
Step 2: get authorize url and redirect to it.
Args:
oauth_token: oauth_token str that returned from request_token:
oauth_token = client.request_token().oauth_token
Returns:
redirect url, e.g. "http://api.t.sina.com.cn/oauth/authorize?oauth_token=ABCD1234XYZ"
'''
return '%s/oauth/authorize?oauth_token=%s' % (self.api_url, oauth_token)
def get_access_token(self):
'''
get access token from request token:
request_token = OAuthToken(oauth_token, oauth_secret, oauth_verifier)
client = APIClient(appkey, appsecret, request_token)
access_token = client.get_access_token()
'''
params = {
'oauth_consumer_key': self.app_key,
'oauth_timestamp': str(int(time.time())),
'oauth_nonce': _generate_nonce(),
'oauth_version': _OAUTH_VERSION,
'oauth_signature_method': _OAUTH_SIGN_METHOD,
'oauth_token': self.oauth_token,
'oauth_verifier': self.oauth_verifier,
}
url = '%s/oauth/access_token' % self.api_url
bs = _generate_base_string('GET', url, **params)
key = '%s&%s' % (self.app_secret, self.oauth_token_secret)
oauth_signature = _generate_signature(key, bs)
authorization = self.__build_oauth_header(params, oauth_signature=oauth_signature)
r = _http_call(url, _HTTP_GET, authorization, return_json=False)
kw = _parse_params(r, False)
return OAuthToken(**kw)
def __build_oauth_header(self, params, **kw):
'''
build oauth header like: Authorization: OAuth oauth_token="xxx", oauth_nonce="123"
Args:
params: parameter dict.
**kw: any additional key-value parameters.
'''
d = dict(**kw)
d.update(params)
L = [r'%s="%s"' % (k, v) for k, v in d.iteritems() if k.startswith('oauth_')]
return 'OAuth %s' % ', '.join(L)
def __getattr__(self, attr):
' a shortcut for client.get.funcall() to client.funcall() '
return getattr(self.get, attr)
def _obj_hook(pairs):
'''
convert json object to python object.
'''
o = JsonObject()
for k, v in pairs.iteritems():
o[str(k)] = v
return o
class APIError(StandardError):
'''
raise APIError if got failed json message.
'''
def __init__(self, error_code, error, request):
self.error_code = error_code
self.error = error
self.request = request
StandardError.__init__(self, error)
def __str__(self):
return 'APIError: %s: %s, request: %s' % (self.error_code, self.error, self.request)
class JsonObject(dict):
'''
general json object that can bind any fields but also act as a dict.
'''
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, value):
self[attr] = value
def _encode_multipart(**kw):
'''
Build a multipart/form-data body with generated random boundary.
'''
boundary = '----------%s' % hex(int(time.time() * 1000))
data = []
for k, v in kw.iteritems():
data.append('--%s' % boundary)
if hasattr(v, 'read'):
# file-like object:
ext = ''
filename = getattr(v, 'name', '')
n = filename.rfind('.')
if n != (-1):
ext = filename[n:].lower()
content = v.read()
data.append('Content-Disposition: form-data; name="%s"; filename="hidden"' % k)
data.append('Content-Length: %d' % len(content))
data.append('Content-Type: %s\r\n' % _guess_content_type(ext))
data.append(content)
else:
data.append('Content-Disposition: form-data; name="%s"\r\n' % k)
data.append(v.encode('utf-8') if isinstance(v, unicode) else v)
data.append('--%s--\r\n' % boundary)
return '\r\n'.join(data), boundary
_CONTENT_TYPES = { '.png': 'image/png', '.gif': 'image/gif', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpe': 'image/jpeg' }
def _guess_content_type(ext):
return _CONTENT_TYPES.get(ext, 'application/octet-stream')
_HTTP_GET = 0
_HTTP_POST = 1
_HTTP_UPLOAD = 2
def _http_call(url, method, authorization=None, return_json=True, **kw):
'''
send an http request and return headers and body if no error.
'''
params = None
boundary = None
if method==_HTTP_UPLOAD:
params, boundary = _encode_multipart(**kw)
else:
params = _encode_params(**kw)
http_url = '%s?%s' % (url, params) if method==_HTTP_GET and params else url
http_body = None if method==_HTTP_GET else params
req = urllib2.Request(http_url, data=http_body)
if authorization:
print 'Authorization:', authorization
req.add_header('Authorization', authorization)
if boundary:
req.add_header('Content-Type', 'multipart/form-data; boundary=%s' % boundary)
print method, http_url, 'BODY:', http_body
resp = urllib2.urlopen(req)
body = resp.read()
if return_json:
r = json.loads(body, object_hook=_obj_hook)
if hasattr(r, 'error_code'):
raise APIError(r.error_code, getattr(r, 'error', ''), getattr(r, 'request', ''))
return r
return body
class HttpObject(object):
def __init__(self, client, method):
self.client = client
self.method = method
def __getattr__(self, attr):
def wrap(**kw):
return self.client._oauth_request(self.method, '%s/%s.json' % (self.client.api_url, attr.replace('__', '/')), **kw)
return wrap
################################################################################
# utility functions
################################################################################
def _parse_params(params_str, unicode_value=True):
'''
parse a query string as JsonObject (also a dict)
Args:
params_str: query string as str.
unicode_value: return unicode value if True, otherwise str value. default true.
Returns:
JsonObject (inherited from dict)
>>> s = _parse_params('a=123&b=X%26Y&c=%E4%B8%AD%E6%96%87')
>>> s.a
u'123'
>>> s.b
u'X&Y'
>>> s.c==u'\u4e2d\u6587'
True
>>> s = _parse_params('a=123&b=X%26Y&c=%E4%B8%AD%E6%96%87', False)
>>> s.a
'123'
>>> s.b
'X&Y'
>>> s.c=='\xe4\xb8\xad\xe6\x96\x87'
True
>>> s.d #doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
KeyError:
'''
d = dict()
for s in params_str.split('&'):
n = s.find('=')
if n>0:
key = s[:n]
value = urllib.unquote(s[n+1:])
d[key] = value.decode('utf-8') if unicode_value else value
return JsonObject(**d)
def _encode_params(**kw):
'''
Encode parameters.
'''
if kw:
args = []
for k, v in kw.iteritems():
qv = v.encode('utf-8') if isinstance(v, unicode) else str(v)
args.append('%s=%s' % (k, _quote(qv)))
return '&'.join(args)
return ''
def _quote(s):
'''
quote everything including /
>>> _quote(123)
'123'
>>> _quote(u'\u4e2d\u6587')
'%E4%B8%AD%E6%96%87'
>>> _quote('/?abc=def& _+%')
'%2F%3Fabc%3Ddef%26%20_%2B%25'
'''
if isinstance(s, unicode):
s = s.encode('utf-8')
return urllib.quote(str(s), safe='')
def _generate_nonce():
' generate random uuid as oauth_nonce '
return uuid.uuid4().hex
def _generate_signature(key, base_string):
'''
generate url-encoded oauth_signature with HMAC-SHA1
'''
return _quote(base64.b64encode(hmac.new(key, base_string, hashlib.sha1).digest()))
def _generate_base_string(method, url, **params):
'''
generate base string for signature
>>> method = 'GET'
>>> url = 'http://www.sina.com.cn/news'
>>> params = dict(a=1, b='A&B')
>>> _generate_base_string(method, url, **params)
'GET&http%3A%2F%2Fwww.sina.com.cn%2Fnews&a%3D1%26b%3DA%2526B'
'''
plist = [(_quote(k), _quote(v)) for k, v in params.iteritems()]
plist.sort()
return '%s&%s&%s' % (method, _quote(url), _quote('&'.join(['%s=%s' % (k, v) for k, v in plist])))
if __name__=='__main__':
import doctest
doctest.testmod()
上面这段代码,就是你从http://code.google.com/p/sinaweibopy/wiki/Download下载的SDK,实际上就是个文件weibo1.py,如果你没法下载,可以直接从这里复制过去。
下面是我写的一个sample
#!/usr/bin/env python
# -*- coding: utf8 -*-
from weibo1 import APIClient, OAuthToken
import MySQLdb
import time
#通过提供的账号和密码,返回APIClient对象实例
def GetBlogClient(uname, passw):
APP_KEY = u'XXXXXXXXXX' # app key
APP_SECRET = u'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # app secret
#实例化APIClient
client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET)
#获取OAuth request token
reqToken = client.get_request_token()
#用户授权url
auth_url = client.get_authorize_url(reqToken)
post_data = urllib.urlencode({
"action": "submit",
"forcelogin": "",
"from": "",
"oauth_callback" : "http://api.weibo.com/oauth2/default.html",
"oauth_token" : reqToken.oauth_token,
"passwd" : passw,
"regCallback": "",
"ssoDoor": "",
"userId" : uname,
"vdCheckflag" : 1,
"vsnval":""
})
mat = re.search(
r'&oauth_verifier=(.+)',
urllib2.urlopen(urllib2.Request(
"http://api.t.sina.com.cn/oauth/authorize",
post_data,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1)',
'Referer': auth_url
}
)).url
)
if mat:
client = APIClient(
APP_KEY,
APP_SECRET,
OAuthToken(
reqToken.oauth_token,
reqToken.oauth_token_secret,
mat.group(1)
))
#返回APIClient
return APIClient(APP_KEY, APP_SECRET, client.get_access_token())
else:
raise Exception()
# end of class MyFrame
if __name__ == "__main__":
client = GetBlogClient("XXXXXXXXXX","XXXXX")
r = client.get.statuses__user_timeline() #使用APIstatuses/user/timeline
client = GetBlogClient("XXXXXXXXXX","XXXXX")
则填入你的微博用户名和密码就可以了。
四.各种问题
1.为什么用我的APP_KEY和 APP_SECRET配合微博账号运行失败?
其实,我申请了APP,然后运行也是失败的,我去找了一下原因,有人说是APP没有通过审核,需要将自己的微博账号添加到测试账号里面(添加工作可以再新浪的网站上完成),我修改了还不行,然后就去网上找了个KEY,然后运行,就没有问题了,我个人觉得应该是APP审核的问题,如果你设置了测试账号还不行,就和我一样去网上找个key吧。
2.为什么有的微博API能用有的用不了
需要注意的是,这个oauth1 SDK代码是配合API V1http://open.weibo.com/wiki/API%E6%96%87%E6%A1%A3使用的,现在最新的是API V2http://open.weibo.com/wiki/API%E6%96%87%E6%A1%A3_V2不支持这套SDK。
如果想使用V2的话,需要将weibo1.py中第44行的
def __init__(self, app_key, app_secret, token=None, callback=None, domain='api.t.sina.com.cn'):
改成
def __init__(self, app_key, app_secret, token=None, callback=None, domain='api.weibo.com'):
改完之后,V2的部分API也可以用了(部分的API可能还是用不了),但是API V1就不支持了。
3.就是不能用
如果这段代码怎么都不行的话,那么我觉得,可能是你使用这段代码的时间和我写这篇文章的时间相距太远了, 新浪又更新了API,所以,一切以新浪的官方文档为准http://open.weibo.com/wiki/%E9%A6%96%E9%A1%B5,当然,如果有什么问题的话,也可以给我留言。