实时采集福利彩票的中奖信息和最新开奖信息-JAVA

用来采集福利彩票的中奖信息和最新开奖信息。

可以采集的彩票类型包括:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩

本项目介绍了如何使用代理IP和多线程采集公开彩票数据,项目尚不具备使用条件,仅供学习参考

项目需要用Maven引入,这里输入引用文本打开后如果有报错,可以检查是否为JDK版本问题

运行Starter类启动爬虫

需要修改test.config包下面的Memory类,可以修改

1. 是否使用代理IP
2. 图片保存路径
3. 代理IP的API接口
4. 线程池数量
5. 默认超时时间

如果提示
获取代理IP出错: 请到 http://www.data5u.com 获取最新的代理IP-API接口,或者修改Memory.useProxyIp=false
那么按照提示关闭代理IP服务即可。

项目代码已上传到GITHUB https://github.com/mcj8089/crawl-caipiao.git

代码分为BEAN和核心采集:

CaiPiaoWinner

package test.bean;

import java.io.Serializable;

/**
 * 中奖情况
 */
public class CaiPiaoWinner implements Serializable {

	private static final long serialVersionUID = 1L;

	private String idx; // 彩票ID : 彩票类型+期+奖项名称
	
	private String cpId; // 彩票ID

	private String remark; // 奖项名称

	private Integer baseAwardNum; // 基本中奖注数(注)

	private Float baseAwardMoney; // 基本中奖金额(元)

	public String getIdx() {
		return idx;
	}
	public void setIdx(String idx) {
		this.idx = idx;
	}
	
	public String getCpId() {
		return cpId;
	}

	public void setCpId(String cpId) {
		this.cpId = cpId;
	}

	public String getRemark() {
		return remark;
	}

	public void setRemark(String remark) {
		this.remark = remark;
	}

	public Integer getBaseAwardNum() {
		return baseAwardNum;
	}

	public void setBaseAwardNum(Integer baseAwardNum) {
		this.baseAwardNum = baseAwardNum;
	}

	public Float getBaseAwardMoney() {
		return baseAwardMoney;
	}

	public void setBaseAwardMoney(Float baseAwardMoney) {
		this.baseAwardMoney = baseAwardMoney;
	}

}

CaiPiaoIssue

package test.bean;

import java.io.Serializable;
import java.util.List;

/**
 * 中奖情况
 */
public class CaiPiaoIssue implements Serializable {

	private static final long serialVersionUID = 1L;

	private String cpId; // 彩票ID : 彩票类型+期

	private Integer type; // 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩

	private String issue; // 期数

	private String openTime; // 开奖时间

	private Float saleMoney; // 销售金额

	private Float prizePoolMoney; // 奖池金额

	private String deadlineAwardDate; // 截止兑奖日期

	private String frontWinningNum; // 开奖号码,前排

	private String backWinningNum; // 开奖号码,后排
	
	private List<CaiPiaoWinner> winnerList; // 中奖情况

	public String getCpId() {
		return cpId;
	}

	public void setCpId(String cpId) {
		this.cpId = cpId;
	}

	public Integer getType() {
		return type;
	}

	public void setType(Integer type) {
		this.type = type;
	}

	public String getIssue() {
		return issue;
	}

	public void setIssue(String issue) {
		this.issue = issue;
	}

	public String getOpenTime() {
		return openTime;
	}

	public void setOpenTime(String openTime) {
		this.openTime = openTime;
	}

	public Float getSaleMoney() {
		return saleMoney;
	}

	public void setSaleMoney(Float saleMoney) {
		this.saleMoney = saleMoney;
	}

	public Float getPrizePoolMoney() {
		return prizePoolMoney;
	}

	public void setPrizePoolMoney(Float prizePoolMoney) {
		this.prizePoolMoney = prizePoolMoney;
	}

	public String getDeadlineAwardDate() {
		return deadlineAwardDate;
	}

	public void setDeadlineAwardDate(String deadlineAwardDate) {
		this.deadlineAwardDate = deadlineAwardDate;
	}

	public String getFrontWinningNum() {
		return frontWinningNum;
	}

	public void setFrontWinningNum(String frontWinningNum) {
		this.frontWinningNum = frontWinningNum;
	}

	public String getBackWinningNum() {
		return backWinningNum;
	}

	public void setBackWinningNum(String backWinningNum) {
		this.backWinningNum = backWinningNum;
	}

	public List<CaiPiaoWinner> getWinnerList() {
		return winnerList;
	}

	public void setWinnerList(List<CaiPiaoWinner> winnerList) {
		this.winnerList = winnerList;
	}
	
}

CaipiaoHistoryCrawler

package test.crawler;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import test.bean.CaiPiaoIssue;
import test.bean.CaiPiaoWinner;
import test.config.Memory;
import test.util.CrawlerUtil;
import test.util.LogUtil;
import test.util.StrUtil;

/**
 * 彩票历史
 */
public class CaipiaoHistoryCrawler extends Crawler {
	
	String TAG = "CaipiaoHistoryCrawler";
	
	Map<String, String> headerMap = new HashMap<String, String>();

	int retryTime = 3;
	
	AtomicInteger atoInt = new AtomicInteger(1);
	
	Set<String> uniqSet = new HashSet<String>();

	public void crawl() {
		
		headerMap.put("Accept", "*/*");
		headerMap.put("Accept-Encoding", "gzip, deflate, br");
		headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
		headerMap.put("Connection", "keep-alive");
		headerMap.put("Cookie", "BAIDU_SSP_lcr=https://www.baidu.com/link?url=riNXkDsMHCOiaKifIQRKh0P3RuASJjDVfIvNZy0PFwS&wd=&eqid=8a03215500000b570000000360dbeecd; _ga=GA1.2.1911959757.1625027094; _gid=GA1.2.724130032.1625027094; PHPSESSID=45a4gkalmomcnbjabcvkmij3p3; Hm_lvt_12e4883fd1649d006e3ae22a39f97330=1625027094; Hm_lvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027095; _gat_UA-66069030-3=1; Hm_lpvt_692bd5f9c07d3ebd0063062fb0d7622f=1625027400; Hm_lpvt_12e4883fd1649d006e3ae22a39f97330=1625027400; KLBRSID=13ce4968858adba085afff577d78760d|1625027411|1625027093");
		headerMap.put("Host", "jc.zhcw.com");
		headerMap.put("Referer", "https://www.zhcw.com/kjxx/pl3/kjxq/");
		headerMap.put("Sec-Fetch-Dest", "script");
		headerMap.put("Sec-Fetch-Mode", "no-cors");
		headerMap.put("Sec-Fetch-Site", "same-site");
		
		crawlZhongCai(1);
		crawlZhongCai(2);
		crawlZhongCai(3);
		crawlZhongCai(4);
		crawlZhongCai(5);
		crawlZhongCai(6);
		crawlZhongCai(7);
		crawlZhongCai(8);
		
		LogUtil.logInfo(TAG, "采集任务已完成");
		
	}

	// 彩票类型:1快乐8,2双色球,3福彩3D,4七乐彩,5大乐透,6排列3,7排列5,8七星彩
	private void crawlZhongCai(Integer type) {

		Set<String> issueSet = new HashSet<String>();
		
		String prefix = "";
		String surfix = ".html";
		
		if( type == 1 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/kl8.aspx");
			prefix = "https://www.ydniu.com/open/kl8/";
		} else if( type == 2 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/ssq.aspx");
			prefix = "https://www.ydniu.com/open/ssq/";
		} else if( type == 3 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/sd.aspx");
			prefix = "https://www.ydniu.com/open/sd/";
		} else if( type == 4 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/qlc.aspx");
			prefix = "https://www.ydniu.com/open/qlc/";
		} else if( type == 5 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/dlt.aspx");
			prefix = "https://www.ydniu.com/open/dlt/";
		} else if( type == 6 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/pl3.aspx");
			prefix = "https://www.ydniu.com/open/pl3/";
		} else if( type == 7 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/pl5.aspx");
			prefix = "https://www.ydniu.com/open/pl5/";
		} else if( type == 8 ) {
			issueSet = getIssueSet("https://www.ydniu.com/open/qxc.aspx");
			prefix = "https://www.ydniu.com/open/qxc/";
		}
		
		for( String issue : issueSet ) {
			
			final String fPrefix = prefix;
			Memory.threadPool.execute(new Runnable() {
				
				@Override
				public void run() {

					if( !uniqSet.add(issue) ) {
						return;
					}
					
					try {
						// START
						String url = fPrefix + issue + surfix;

						String html = null;
						for( int i = 1; i <= retryTime; i ++ ) {
							try {
								if( i == retryTime && Memory.useProxyIp ) {
									html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);
								} else {
									html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);
								}
								if( StrUtil.isNotEmpty(html) && html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {
									i = i - 1;
									continue;
								}
								if( StrUtil.isNotEmpty(html) ) {
									break;
								}
							} catch ( Exception e ) {
								LogUtil.logInfo(TAG, "采集分期报错", e);
							}
						}
						
						if( StrUtil.isNotEmpty(html) ) {
							
							try {
								
								Document startDoc = Jsoup.parse(html);
								
								CaiPiaoIssue caiPiaoIssue = new CaiPiaoIssue();
								caiPiaoIssue.setIssue(issue);

								Elements openNumberRedEl = startDoc.select("#openNumber i");
								Elements openNumberBlueEl = startDoc.select("#openNumber em");
								
								StringBuilder redBallSB = new StringBuilder();
								for( Element el : openNumberRedEl ) {
									redBallSB.append(el.text()).append(",");
								}
								
								StringBuilder blueBallSB = new StringBuilder();
								for( Element el : openNumberBlueEl ) {
									blueBallSB.append(el.text()).append(",");
								}
								
								String temp = startDoc.select("#openDate").text();
								
								String openTime = temp.split(",")[0].replace("开奖日期:", "");
								String deadlineAwardDate = temp.split(",")[1].replace("兑奖截止日期:", "");
								String frontWinningNum = redBallSB.toString();
								String backWinningNum = blueBallSB.toString();
								Float saleMoney = Float.valueOf(startDoc.select("#sumSales").text().replace(",", ""));
								Float prizePoolMoney = Float.valueOf(startDoc.select("#prizePool").text().replace(",", ""));
								
								frontWinningNum = frontWinningNum.substring(0, frontWinningNum.length() - 1);
								backWinningNum = backWinningNum.substring(0, backWinningNum.length() - 1);
								
								caiPiaoIssue.setBackWinningNum(backWinningNum);
								caiPiaoIssue.setDeadlineAwardDate(deadlineAwardDate);
								caiPiaoIssue.setFrontWinningNum(frontWinningNum);
								caiPiaoIssue.setOpenTime(openTime);
								caiPiaoIssue.setPrizePoolMoney(prizePoolMoney);
								caiPiaoIssue.setSaleMoney(saleMoney);
								caiPiaoIssue.setType(type);
								caiPiaoIssue.setCpId(type + issue);
								
							    List<CaiPiaoWinner> winnerList = new ArrayList<CaiPiaoWinner>();
								
								Elements trs = startDoc.select("#t_WinType tr");
			 					for( Element tr : trs ) {
			 						String name = tr.select("td").get(0).text();
			 						String num = tr.select("td").get(1).text();
			 						String money = tr.select("td").get(2).text();
			 						
			 						CaiPiaoWinner winner = new CaiPiaoWinner();
			 						winner.setBaseAwardMoney( Float.valueOf(money) );
			 						winner.setBaseAwardNum(Integer.valueOf(num));
			 						winner.setRemark(name);
			 						winner.setCpId(caiPiaoIssue.getCpId());
			 						winner.setIdx(type + issue + name);
			 						
			 						winnerList.add(winner);
			 					}
								
			 					crawlToDB(winnerList);
								
							} catch (Exception e) {
								e.printStackTrace();
							}
						
						}	
						// END
					} catch (Exception e) {
						e.printStackTrace();
					}
									
				}
			});
			
		}
	
	}

	private Set<String> getIssueSet(String url) {
		headerMap.put("Host", "www.ydniu.com");
		headerMap.put("Referer", "https://www.ydniu.com/open/ssq.aspx");
		
		String html = null;
		for( int i = 1; i <= retryTime; i ++ ) {
			try {
				if( i == retryTime && Memory.useProxyIp ) {
					html = CrawlerUtil.getHtml(url, false, false, Memory.DEFAULT_TIMEOUT, headerMap);
				} else {
					html = CrawlerUtil.getHtml(url, Memory.useProxyIp, false, Memory.DEFAULT_TIMEOUT, headerMap);
				}
				if( StrUtil.isNotEmpty(html) ) {
					break;
				}
			} catch ( Exception e ) {
				LogUtil.logInfo(TAG, "采集分期报错", e);
			}
		}
		
		Set<String> reSet = new HashSet<>();
		
		if( StrUtil.isNotEmpty(html) ) {
			
			if( html.contains("Bad Gateway: www.ydniu.com:443") || html.contains("白名单校验失败") ) {
				return getIssueSet(url);
			}
			
	 		Document document = Jsoup.parse(html);
	 		Elements as = document.select(".iSelectBox .iSelectList.listOverFlow a");
	 		
	 		for( Element el : as ) {
	 			reSet.add(el.text());
	 		}
		}
		
		return reSet;
	}

}

你可能感兴趣的:(爬虫系列,java,java采集彩票,代理ip)