直接上代码
1.如何获取代理ip
package com.aopa.test;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 获取代理IP,需要
* com.alibaba.fastjson.JSONObject以及Jsoup
*/
public class ProxyCralwerUnusedVPN {
ThreadLocal localWantedNumber = new ThreadLocal();
ThreadLocal> localProxyInfos = new ThreadLocal>();
public static void main(String[] args) {
ProxyCralwerUnusedVPN proxyCrawler = new ProxyCralwerUnusedVPN();
/**
* 想要获取的代理IP个数,由需求方自行指定。(如果个数太多,将导致返回变慢)
*/
proxyCrawler.startCrawler(3);
}
/**
* 暴露给外部模块调用的入口
* @param wantedNumber 调用方期望获取到的代理IP个数
*/
public String startCrawler(int wantedNumber) {
localWantedNumber.set(wantedNumber);
kuaidailiCom("http://www.xicidaili.com/nn/", 15);
kuaidailiCom("http://www.xicidaili.com/nt/", 15);
kuaidailiCom("http://www.xicidaili.com/wt/", 15);
kuaidailiCom("http://www.kuaidaili.com/free/inha/", 15);
kuaidailiCom("http://www.kuaidaili.com/free/intr/", 15);
kuaidailiCom("http://www.kuaidaili.com/free/outtr/", 15);
//返回数据ip+端口号
List rslist = localProxyInfos.get();
String result = "";
if(rslist!=null && rslist.size()>0){
//获取第一个ip
result += rslist.get(0).getIp()+","+rslist.get(0).getPort();
}
System.out.println(result);
return result;
}
private void kuaidailiCom(String baseUrl, int totalPage) {
String ipReg = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} \\d{1,6}";
Pattern ipPtn = Pattern.compile(ipReg);
for (int i = 1; i < totalPage; i++) {
if (getCurrentProxyNumber() >= localWantedNumber.get()) {
return;
}
try {
Document doc = Jsoup.connect(baseUrl + i + "/")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Encoding", "gzip, deflate, sdch")
.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
.header("Cache-Control", "max-age=0")
.header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
.header("Cookie", "Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1462812244; _gat=1; _ga=GA1.2.1061361785.1462812244")
.header("Host", "www.kuaidaili.com")
.header("Referer", "http://www.kuaidaili.com/free/outha/")
.timeout(30 * 1000)
.get();
Matcher m = ipPtn.matcher(doc.text());
while (m.find()) {
if (getCurrentProxyNumber() >= localWantedNumber.get()) {
break;
}
String[] strs = m.group().split(" ");
if (checkProxy(strs[0], Integer.parseInt(strs[1]))) {
System.out.println("获取到可用代理IP\t" + strs[0] + "\t" + strs[1]);
addProxy(strs[0], strs[1], "http");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 判断ip和端口是否有效
* @param ip
* @param port
* @return
*/
private static boolean checkProxy(String ip, Integer port) {
try {
//http://1212.ip138.com/ic.asp 可以换成任何比较快的网页
Jsoup.connect("http://1212.ip138.com/ic.asp")
.timeout(2 * 1000)
.proxy(ip, port)
.get();
return true;
} catch (Exception e) {
return false;
}
}
private int getCurrentProxyNumber() {
List proxyInfos = localProxyInfos.get();
if (proxyInfos == null) {
proxyInfos = new ArrayList();
localProxyInfos.set(proxyInfos);
return 0;
}
else {
return proxyInfos.size();
}
}
private void addProxy(String ip, String port, String protocol){
List proxyInfos = localProxyInfos.get();
if (proxyInfos == null) {
proxyInfos = new ArrayList();
proxyInfos.add(new ProxyInfo(ip, port, protocol));
}
else {
proxyInfos.add(new ProxyInfo(ip, port, protocol));
}
}
}
class ProxyInfo {
private String userName = "";
private String ip;
private String password = "";
private String type;
private String port;
private int is_internet = 1;
public ProxyInfo(String ip, String port, String type) {
this.ip = ip;
this.type = type;
this.port = port;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getPort() {
return port;
}
public void setPort(String port) {
this.port = port;
}
public int getIs_internet() {
return is_internet;
}
public void setIs_internet(int is_internet) {
this.is_internet = is_internet;
}
}
运行之后可以获取相关ip:
2.如何使用代理ip
package com.aopa.test;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.commons.lang.StringUtils;
import java.net.URL;
import java.net.URLConnection;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class MyTest {
/**
* 向指定 URL 发送POST方法的请求
*
* @param url
* 发送请求的 URL
* @param param
* 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param) {
PrintWriter out = null;
BufferedReader in = null;
String result = "";
try {
URL realUrl = new URL(url);
// 打开和URL之间的连接
URLConnection conn = realUrl.openConnection();
// 设置通用的请求属性
conn.setRequestProperty("accept", "*/*");
conn.setRequestProperty("connection", "Keep-Alive");
conn.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 发送POST请求必须设置如下两行
conn.setDoOutput(true);
conn.setDoInput(true);
// 获取URLConnection对象对应的输出流
out = new PrintWriter(conn.getOutputStream());
// 发送请求参数
out.print(param);
// flush输出流的缓冲
out.flush();
// 定义BufferedReader输入流来读取URL的响应
in = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
System.out.println("发送 POST 请求出现异常!" + e);
e.printStackTrace();
}
// 使用finally块来关闭输出流、输入流
finally {
try {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
return result;
}
/**
* 向指定URL发送GET方法的请求
*
* @param url
* 发送请求的URL
* @param param
* 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return URL 所代表远程资源的响应结果
* @throws UnsupportedEncodingException
*/
public static String sendGet(String url){
String result = "";
BufferedReader in = null;
try {
URL realUrl = new URL(url);
// 打开和URL之间的连接
URLConnection connection = realUrl.openConnection();
// 设置通用的请求属性
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
connection.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 建立实际的连接
connection.connect();
// 获取所有响应头字段
Map> map = connection.getHeaderFields();
// 遍历所有的响应头字段
for (String key : map.keySet()) {
System.out.println(key + "--->" + map.get(key));
}
// 定义 BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
System.out.println("发送GET请求出现异常!" + e);
e.printStackTrace();
}
// 使用finally块来关闭输入流
finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
return result;
}
public static void main(String[] args) throws IOException{
try {
//获取代理ip
ProxyCralwerUnusedVPN proxyCrawler = new ProxyCralwerUnusedVPN();
//想要获取的代理IP个数,由需求方自行指定。(如果个数太多,将导致返回变慢)
String ipresult = proxyCrawler.startCrawler(1);
if(!("").equals(ipresult)&&ipresult.length()>2){
String[] iphost = ipresult.split(",");
System.out.println("获取IP------>"+iphost[0]);
System.out.println("获取IP------>"+iphost[1]);
// 如果不设置,只要代理IP和代理端口正确
System.getProperties().setProperty("http.proxyHost", iphost[0]);
System.getProperties().setProperty("http.proxyPort", iphost[1]);
}else{
// 如果不设置,只要代理IP和代理端口正确
System.getProperties().setProperty("http.proxyHost", "58.252.6.165");
System.getProperties().setProperty("http.proxyPort", "9000");
}
//获取地震数据
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String getbegin = "2015-04-25 01:00:00";
String begin = getbegin.substring(0,10);
Calendar c = Calendar.getInstance();
c.setTime(sdf.parse(getbegin));
//设置当前日期
c.add(Calendar.YEAR, 1); //间隔时间
String end = sdf.format(c.getTime()).substring(0,10);
//获取地震历史数据
String url = "http://www.ceic.ac.cn/ajax/search?page=1&&start="+begin+"&&end="+end+"&&jingdu1=&&jingdu2=&&weidu1=&&weidu2=&&height1=&&height2=&&zhenji1=&&zhenji2=";
String result = sendGet(url);
System.out.println(result);
if(result!=null&&!("").equals(result)){
String val = StringUtils.substringBeforeLast(result.substring(1), ")");
JSONObject jsStr = JSONObject.parseObject(val);
JSONArray shuju = jsStr.getJSONArray("shuju");
Date oldhappenTime = sdf.parse(getbegin); //上次存储的最晚地震发生时刻
Date newbeginTime = sdf.parse(getbegin); //记录新的最晚地震发生时刻(存到Redis中用于记录)
for (int i = 0; i < shuju.size(); i++) {
JSONObject js = shuju.getJSONObject(i);
Date happenTime = sdf.parse(js.getString("O_TIME")); //地震发生时刻
String name = js.getString("LOCATION_C");//名称
if(happenTime.after(oldhappenTime)){
System.out.println(sdf.format(happenTime)+name);
//记录新的最晚地震发生时刻(存到Redis中用于记录)
if(happenTime.after(newbeginTime)){
newbeginTime=happenTime;//交换值
}
}
}
System.out.println("最晚地震时刻:"+sdf.format(newbeginTime));
System.out.println(shuju.size());
System.out.println(end);
}
} catch (java.text.ParseException e) {
e.printStackTrace();
}
}
}
获取数据: