可以采集的彩票类型包括:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩
本项目介绍了如何使用代理IP和多线程采集公开彩票数据,项目尚不具备使用条件,仅供学习参考
项目需要用Maven引入,这里输入引用文本打开后如果有报错,可以检查是否为JDK版本问题
运行Starter类启动爬虫
需要修改test.config包下面的Memory类,可以修改
1. 是否使用代理IP
2. 图片保存路径
3. 代理IP的API接口
4. 线程池数量
5. 默认超时时间
如果提示
获取代理IP出错: 请到 http://www.data5u.com 获取最新的代理IP-API接口,或者修改Memory.useProxyIp=false
那么按照提示关闭代理IP服务即可。
项目代码已上传到GITHUB https://github.com/mcj8089/crawl-caipiao.git
代码分为BEAN和核心采集:
CaiPiaoWinner
package test.bean;
import java.io.Serializable;
/**
* 中奖情况
*/
public class CaiPiaoWinner implements Serializable {
private static final long serialVersionUID = 1L;
private String idx; // 彩票ID : 彩票类型+期+奖项名称
private String cpId; // 彩票ID
private String remark; // 奖项名称
private Integer baseAwardNum; // 基本中奖注数(注)
private Float baseAwardMoney; // 基本中奖金额(元)
public String getIdx() {
return idx;
}
public void setIdx(String idx) {
this.idx = idx;
}
public String getCpId() {
return cpId;
}
public void setCpId(String cpId) {
this.cpId = cpId;
}
public String getRemark() {
return remark;
}
public void setRemark(String remark) {
this.remark = remark;
}
public Integer getBaseAwardNum() {
return baseAwardNum;
}
public void setBaseAwardNum(Integer baseAwardNum) {
this.baseAwardNum = baseAwardNum;
}
public Float getBaseAwardMoney() {
return baseAwardMoney;
}
public void setBaseAwardMoney(Float baseAwardMoney) {
this.baseAwardMoney = baseAwardMoney;
}
}
CaiPiaoIssue
package test.bean;
import java.io.Serializable;
import java.util.List;
/**
* 中奖情况
*/
public class CaiPiaoIssue implements Serializable {
private static final long serialVersionUID = 1L;
private String cpId; // 彩票ID : 彩票类型+期
private Integer type; // 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩
private String issue; // 期数
private String openTime; // 开奖时间
private Float saleMoney; // 销售金额
private Float prizePoolMoney; // 奖池金额
private String deadlineAwardDate; // 截止兑奖日期
private String frontWinningNum; // 开奖号码,前排
private String backWinningNum; // 开奖号码,后排
private List<CaiPiaoWinner> winnerList; // 中奖情况
public String getCpId() {
return cpId;
}
public void setCpId(String cpId) {
this.cpId = cpId;
}
public Integer getType() {
return type;
}
public void setType(Integer type) {
this.type = type;
}
public String getIssue() {
return issue;
}
public void setIssue(String issue) {
this.issue = issue;
}
public String getOpenTime() {
return openTime;
}
public void setOpenTime(String openTime) {
this.openTime = openTime;
}
public Float getSaleMoney() {
return saleMoney;
}
public void setSaleMoney(Float saleMoney) {
this.saleMoney = saleMoney;
}
public Float getPrizePoolMoney() {
return prizePoolMoney;
}
public void setPrizePoolMoney(Float prizePoolMoney) {
this.prizePoolMoney = prizePoolMoney;
}
public String getDeadlineAwardDate() {
return deadlineAwardDate;
}
public void setDeadlineAwardDate(String deadlineAwardDate) {
this.deadlineAwardDate = deadlineAwardDate;
}
public String getFrontWinningNum() {
return frontWinningNum;
}
public void setFrontWinningNum(String frontWinningNum) {
this.frontWinningNum = frontWinningNum;
}
public String getBackWinningNum() {
return backWinningNum;
}
public void setBackWinningNum(String backWinningNum) {
this.backWinningNum = backWinningNum;
}
public List<CaiPiaoWinner> getWinnerList() {
return winnerList;
}
public void setWinnerList(List<CaiPiaoWinner> winnerList) {
this.winnerList = winnerList;
}
}
CaipiaoHistoryCrawler
package test.crawler;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import test.bean.CaiPiaoIssue;
import test.bean.CaiPiaoWinner;
import test.config.Memory;
import test.util.CrawlerUtil;
import test.util.LogUtil;
import test.util.StrUtil;
/**
* 彩票历史
*/
public class CaipiaoHistoryCrawler extends Crawler {
String TAG = "CaipiaoHistoryCrawler";
Map<String, String> headerMap = new HashMap<String, String>();
int retryTime = 3;
AtomicInteger atoInt = new AtomicInteger(1);
Set<String> uniqSet = new HashSet<String>();
public void crawl() {
headerMap.put("Accept", "*/*");
headerMap.put("Accept-Encoding", "gzip, deflate, br");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive");
headerMap.put("Cookie", "BAIDU_SSP_lcr=https://www.baidu.com/link?url=riNXkDsMHCOiaKifIQRKh0P3RuASJjDVfIvNZy0PFwS&wd=&eqid=8a03215500000b570000000360dbeecd; _ga=GA1.2.1911959757.1625027094; _gid=GA1.2.724130032.1625027094; PHPSESSID=45a4gkalmomcnbjabcvkmij3p3; Hm_lvt_12e4883fd1649d006e3ae22a39f97330=1625027094; Hm_lvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027095; _gat_UA-66069030-3=1; Hm_lpvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027400; Hm_lpvt_12e4883fd1649d006e3ae22a39f97330=1625027400; KLBRSID=13ce4968858adba085afff577d78760d|1625027411|1625027093");
headerMap.put("Host", "jc.zhcw.com");
headerMap.put("Referer", "https://www.zhcw.com/kjxx/pl3/kjxq/");
headerMap.put("Sec-Fetch-Dest", "script");
headerMap.put("Sec-Fetch-Mode", "no-cors");
headerMap.put("Sec-Fetch-Site", "same-site");
crawlZhongCai(1);
crawlZhongCai(2);
crawlZhongCai(3);
crawlZhongCai(4);
crawlZhongCai(5);
crawlZhongCai(6);
crawlZhongCai(7);
crawlZhongCai(8);
LogUtil.logInfo(TAG, "采集任务已完成");
}
// 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩
private void crawlZhongCai(Integer type) {
Set<String> issueSet = new HashSet<String>();
String prefix = "";
String surfix = ".html";
if( type == 1 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/kl8.aspx");
prefix = "https://www.ydniu.com/open/kl8/";
} else if( type == 2 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/ssq.aspx");
prefix = "https://www.ydniu.com/open/ssq/";
} else if( type == 3 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/sd.aspx");
prefix = "https://www.ydniu.com/open/sd/";
} else if( type == 4 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/qlc.aspx");
prefix = "https://www.ydniu.com/open/qlc/";
} else if( type == 5 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/dlt.aspx");
prefix = "https://www.ydniu.com/open/dlt/";
} else if( type == 6 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/pl3.aspx");
prefix = "https://www.ydniu.com/open/pl3/";
} else if( type == 7 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/pl5.aspx");
prefix = "https://www.ydniu.com/open/pl5/";
} else if( type == 8 ) {
issueSet = getIssueSet("https://www.ydniu.com/open/qxc.aspx");
prefix = "https://www.ydniu.com/open/qxc/";
}
for( String issue : issueSet ) {
final String fPrefix = prefix;
Memory.threadPool.execute(new Runnable() {
@Override
public void run() {
if( !uniqSet.add(issue) ) {
return;
}
try {
// START
String url = fPrefix + issue + surfix;
String html = null;
for( int i = 1; i <= retryTime; i ++ ) {
try {
if( i == retryTime && Memory.useProxyIp ) {
html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);
} else {
html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);
}
if( StrUtil.isNotEmpty(html) && html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {
i = i - 1;
continue;
}
if( StrUtil.isNotEmpty(html) ) {
break;
}
} catch ( Exception e ) {
LogUtil.logInfo(TAG, "采集分期报错", e);
}
}
if( StrUtil.isNotEmpty(html) ) {
try {
Document startDoc = Jsoup.parse(html);
CaiPiaoIssue caiPiaoIssue = new CaiPiaoIssue();
caiPiaoIssue.setIssue(issue);
Elements openNumberRedEl = startDoc.select("#openNumber i");
Elements openNumberBlueEl = startDoc.select("#openNumber em");
StringBuilder redBallSB = new StringBuilder();
for( Element el : openNumberRedEl ) {
redBallSB.append(el.text()).append(",");
}
StringBuilder blueBallSB = new StringBuilder();
for( Element el : openNumberBlueEl ) {
blueBallSB.append(el.text()).append(",");
}
String temp = startDoc.select("#openDate").text();
String openTime = temp.split(",")[0].replace("开奖日期:", "");
String deadlineAwardDate = temp.split(",")[1].replace("兑奖截止日期:", "");
String frontWinningNum = redBallSB.toString();
String backWinningNum = blueBallSB.toString();
Float saleMoney = Float.valueOf(startDoc.select("#sumSales").text().replace(",", ""));
Float prizePoolMoney = Float.valueOf(startDoc.select("#prizePool").text().replace(",", ""));
frontWinningNum = frontWinningNum.substring(0, frontWinningNum.length() - 1);
backWinningNum = backWinningNum.substring(0, backWinningNum.length() - 1);
caiPiaoIssue.setBackWinningNum(backWinningNum);
caiPiaoIssue.setDeadlineAwardDate(deadlineAwardDate);
caiPiaoIssue.setFrontWinningNum(frontWinningNum);
caiPiaoIssue.setOpenTime(openTime);
caiPiaoIssue.setPrizePoolMoney(prizePoolMoney);
caiPiaoIssue.setSaleMoney(saleMoney);
caiPiaoIssue.setType(type);
caiPiaoIssue.setCpId(type + issue);
List<CaiPiaoWinner> winnerList = new ArrayList<CaiPiaoWinner>();
Elements trs = startDoc.select("#t_WinType tr");
for( Element tr : trs ) {
String name = tr.select("td").get(0).text();
String num = tr.select("td").get(1).text();
String money = tr.select("td").get(2).text();
CaiPiaoWinner winner = new CaiPiaoWinner();
winner.setBaseAwardMoney( Float.valueOf(money) );
winner.setBaseAwardNum(Integer.valueOf(num));
winner.setRemark(name);
winner.setCpId(caiPiaoIssue.getCpId());
winner.setIdx(type + issue + name);
winnerList.add(winner);
}
crawlToDB(winnerList);
} catch (Exception e) {
e.printStackTrace();
}
}
// END
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
}
private Set<String> getIssueSet(String url) {
headerMap.put("Host", "www.ydniu.com");
headerMap.put("Referer", "https://www.ydniu.com/open/ssq.aspx");
String html = null;
for( int i = 1; i <= retryTime; i ++ ) {
try {
if( i == retryTime && Memory.useProxyIp ) {
html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);
} else {
html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);
}
if( StrUtil.isNotEmpty(html) ) {
break;
}
} catch ( Exception e ) {
LogUtil.logInfo(TAG, "采集分期报错", e);
}
}
Set<String> reSet = new HashSet<>();
if( StrUtil.isNotEmpty(html) ) {
if( html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {
return getIssueSet(url);
}
Document document = Jsoup.parse(html);
Elements as = document.select(".iSelectBox .iSelectList.listOverFlow a");
for( Element el : as ) {
reSet.add(el.text());
}
}
return reSet;
}
}