突发奇想,是否可以从双色球的历史信息中分析出现次数最多的组合,那就从1+1的组合开始分析,嘿嘿,经常中这种组合也挺不错哦!现在我们开始获取数据,应该从中彩网的爬取数据比较靠谱,下面是脚本。
# -*- coding:utf-8 -*-
import requests, bs4
import os, time
import operator
from itertools import combinations, permutations
class DoubleColorBall(object):
def __init__(self):
self.balls = {}
self.baseUrl = 'http://tubiao.zhcw.com/tubiao/ssqNew/ssqJsp/ssqZongHeFengBuTuAsc.jsp'
self.dataFile = 'd:\\balls_data.txt'
def getHtml(self, url):
headers = {
'Referer':'http://tubiao.zhcw.com/tubiao/ssqNew/ssqInc/ssqZongHeFengBuTuAsckj_year=2016.html',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
self.session = requests.Session()
response = self.session.get(url, headers=headers)
return response.text
def getBall(self):
for year in range(2003, 2018):
url = self.baseUrl + '?kj_year=%s' % (year, )
print(url)
html = self.getHtml(url)
self.bs = bs4.BeautifulSoup(html, 'html.parser')
if self.bs:
data = self.bs.find_all(class_='hgt')
self.parseBall(data)
def parseBall(self, data):
self.balls = {}
for row in data:
if not isinstance(row, bs4.element.Tag):
continue
center = row.find(class_="qh7").string.strip()
print(center)
if center.startswith("模拟"):
break
redBalls = row.find_all(class_="redqiu")
blueBall = row.find(class_="blueqiu3").string.strip()
self.balls[center] = [r.string for r in redBalls] + [blueBall]
self.saveBall(self.balls)
def saveBall(self, data):
with open(self.dataFile, 'a+') as f:
for r in sorted(data,reverse=True):
f.write(str(r) + ' ' + ' '.join(data[r]) + '\n')
class Analysis(object):
def __init__(self):
self.redrst = {}
self.bluerst = {}
self.redbluerst = {}
def run(self):
with open('d:\\balls_data.txt', 'r') as f:
for r in f.readlines():
rList = r.split(' ')
redBalls = rList[1:-1]
blueBall = rList[-1]
for i in combinations(redBalls, 1):
if self.redbluerst.get('-'.join(i)+'-'+str(blueBall)):
self.redbluerst['-'.join(i)+'-'+str(blueBall)] += 1
else:
self.redbluerst['-'.join(i)+'-'+str(blueBall)] = 1
sorted_redblue=sorted(self.redbluerst.items(),key=operator.itemgetter(1))
print(sorted_redblue[-3:])
if __name__ == '__main__':
ball = DoubleColorBall()
ball.getBall()
anal =Analysis()
anal.run()
输出结果:
[(‘20-9\n’, 38), (‘8-16\n’, 39), (‘1-12\n’, 40)]
总共爬取了2184期的数据,结果1+1组合中最多出现的也只有40次,概率为0.018。O(∩_∩)O哈哈~