python调用微信公众号搜索

该案例调用搜狗微信公众号搜索的接口,实现了输入关键字搜索然后返回对应的公众号名称,公众号,以及公众号描述的功能

# coding:utf-8
import requests
import urllib
from bs4 import BeautifulSoup

# 爬取的网址url:http://weixin.sogou.com/weixin?type=1&s_from=input&query=%E4%BA%A7%E5%93%81&ie=utf8&_sug_=n&_sug_type_=

# 定义获取url管理器的方法
def get_url(keyword, page):
    urlList = []
    for page in range(page):
        page = page + 1
        firstUrl = 'http://weixin.sogou.com/weixin?type=1&s_from=input&ie=utf8&_sug_=n&_sug_type_=&query='
        lastUrl = '&page='
        kw = urllib.quote(keyword)
        url = firstUrl + kw + lastUrl + str(page)
        urlList.append(url)
    return urlList


# 定义获取搜索结果的方法
def get_info(keyword, page):
    urlList = get_url(keyword, page)
    resList = []
    for url in urlList:
        response = requests.get(url)
        res = response.content
        soup = BeautifulSoup(res, 'html.parser')
        nameList = soup.findAll('p', attrs={'class': 'tit'})
        enameList = soup.findAll('label', attrs={'name': 'em_weixinhao'})
        summaryList = soup.select('.gzh-box2 + dl > dd')
        # 分页搜索结果返回的条数不一致(搜狗的反爬机制),所以这边选择了7作为临界值,某一个url返回的大于等于7,则爬取第2页,否则不进行第2页爬取
        # 这里应该还有更好的方案,大家可以想一想
        if len(nameList) >= 7:  
            for v in range(len(nameList)):
                resDict = {}
                resDict = {
                    'name': nameList[v].text.strip('\n'),
                    'ename': enameList[v].text,
                    'summary': summaryList[v].text
                }
                resList.append(resDict)
        else:
            for v in range(len(nameList)):
                resDict = {}
                resDict = {
                    'name': nameList[v].text.strip('\n'),
                    'ename': enameList[v].text,
                    'summary': summaryList[v].text
                }
                resList.append(resDict)
            break

    for weixin in resList:
        print '名字:%s' % (weixin['name'].encode('utf-8'))
        print '公众号:%s' % (weixin['ename'].encode('utf-8'))
        print '描述:%s' % (weixin['summary'].encode('utf-8'))
        print '\n'


if __name__ == '__main__':
    keyword = raw_input('请输入关键字:')
    page = input('请输入搜索结果的页数:')
    get_info(keyword, page)

你可能感兴趣的:(python调用微信公众号搜索)