python获取熊猫tv专区的人气数据

# coding:utf-8
from urllib import request
import re


class Spider():
    url = 'https://www.panda.tv/cate/kingglory'   # 切换专区,修改成专区的URL就可
    root_pattern = '
(.*?)
'
# 使用贪婪匹配会匹配到网页最后一个
name_pattern = '' number_pattern = '[\s\S]*?([\s\S]*?)' anchors = [] # 获取网页内容 def __get_content(self): r = request.urlopen(Spider.url) # 发送http请求 content = r.read() htmls = str(content,encoding='utf-8') return htmls # 处理抓取的内容 def __handle_content(self, htmls): root_html = re.findall(Spider.root_pattern,htmls, re.S) # re.S 使.匹配所有字符 for html in root_html: name = re.findall(Spider.name_pattern, html)[0] number = re.findall(Spider.number_pattern, html)[0] anchor = { 'name': name, 'number': number } Spider.anchors.append(anchor) # print(Spider.anchors) # 按人气排序 def sort(self, anchors): anchors = self.keey_seed(anchors) rank_list = sorted(anchors, key=lambda anchors : anchors['number'], reverse=True) return rank_list # 将人气转换成数值 def keey_seed(self, anchors): i = 0 for a in anchors: if '万' in a['number']: a['number'] = float(re.findall('\d*', a['number'])[0])*10000 else: a['number'] = float(a['number']) anchors[i] = a i += 1 return anchors def show(self,r): for a in r: print(a['name'],': ',a['number']) def run(self): contents = self.__get_content() self.__handle_content(contents) rank_list = self.sort(Spider.anchors) self.show(rank_list) spider = Spider() spider.run()

输出:python获取熊猫tv专区的人气数据_第1张图片

你可能感兴趣的:(Python,python,爬虫)