更多知识请访问 www.itkc8.com
用线程代码
package com.cowboy.service;
import java.util.concurrent.*;
/**
* @ClassName CommonThreadPool
* @Description TODO
* @Author hux
* @Date 2019/5/22、15:40
* @Version 1.0
**/
public class CommonThreadPool {
private static ExecutorService exec = new ThreadPoolExecutor(50, 100, 0L,
TimeUnit.MILLISECONDS, new LinkedBlockingQueue(10000),
new ThreadPoolExecutor.CallerRunsPolicy());
public static void execute(Runnable command) {
exec.execute(command);
}
/**
* 子线程执行结束future.get()返回null,若没有执行完毕,主线程将会阻塞等待
* @param command
* @return
*/
public static Future submit(Runnable command) {
return exec.submit(command);
}
/**
* 子线程中的返回值可以从返回的future中获取:future.get();
* @param command
* @return
*/
public static Future submit(Callable command) {
return exec.submit(command);
}
public static void shutdown(){
exec.shutdown();
}
}
package com.cowboy.service;
import com.cowboy.model.Article;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* @ClassName ThreadTest
* @Description TODO
* @Author hux
* @Date 2019/5/22、15:41
* @Version 1.0
**/
public class ThreadTest {
private static final String URL = "https://blog.csdn.net/valada";//"https://blog.csdn.net/foruok";
public static void main(String[] args) {
int pageNow = 1;
int totalPage = getTotalPage();
System.out.println("总页数:"+totalPage);
long l1 = System.currentTimeMillis();
List futureList = new ArrayList<>();
for(pageNow = 1; pageNow <= totalPage; pageNow++) {
int finalI = pageNow;
Callable> task = () -> {
List artitcleByPage = getArtitcleByPage(finalI);
return artitcleByPage;
};
Future submit = CommonThreadPool.submit(task);
futureList.add(submit);
}
//主线程处理其他工作,让子线程异步去执行.
System.out.println("now waiting sub thread done.");
//主线程其他工作完毕,等待子线程的结束, 调用future.get()系列的方法即可。
List articleList = new ArrayList<>();
try {
for (Future future : futureList) {
List list = (List) future.get();
articleList.addAll(list);
}
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
System.out.println(System.currentTimeMillis() - l1);
System.out.println(articleList.size() + " "+articleList);
CommonThreadPool.shutdown();
//遍历输出博主所有的文章
for(Article article : articleList) {
System.out.println("文章标题:" + article.getTitle().replaceFirst("原",""));
System.out.println("文章绝对路劲地址:" + article.getAddress());
System.out.println("文章简介:" + article.getDesption());
System.out.println("发表时间:" + article.getTime());
System.out.println("阅读数量:" + article.getReadNum());
System.out.println("评论数量:" + article.getCommentNum());
}
}
/**
* 获取总页数
* @return
*/
public static int getTotalPage(){
Connection conn = Jsoup.connect(URL)
.userAgent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0")
.timeout(8000)
.method(Connection.Method.GET);
Document doc = null;
try {
doc = conn.get();
} catch (IOException e) {
e.printStackTrace();
}
Element body = doc.body();
int totalPage = 1;
Elements scripts = body.getElementsByTag("script");
Iterator it = scripts.iterator();
while(it.hasNext()) {
Element element = (Element)it.next();
String text = element.data();
if (text.contains("pageSize") && text.contains("listTotal")) {
int i = text.indexOf("var pageSize = ");
int i1 = text.indexOf("var listTotal = ");
int i2 = text.indexOf("var pageQueryStr =");
String pageSize = text.substring(i+15, i1);
String listTotal = text.substring(i1+16, i2);
double i3 = Double.parseDouble(pageSize.replace(";", "").trim());
double i4 = Double.parseDouble(listTotal.replace(";", "").trim());
double number = i4 / i3;
totalPage = (int)Math.ceil(number);
break;
}
}
return totalPage;
}
/**
* 分页读取
* @param pageNow
* @return
* @throws IOException
*/
public static List getArtitcleByPage(int pageNow) throws IOException {
Connection conn = Jsoup.connect(URL + "/article/list/" + pageNow)
.userAgent("Mozilla/5.0 (Windows NT 6.1; rv:47.0) Gecko/20100101 Firefox/47.")
.timeout(8000)
.method(Connection.Method.GET);
Document doc = conn.get();
Element body = doc.body();
List resultList = new ArrayList<>();
Elements articleList = body.getElementsByClass("article-item-box");
for(Element article : articleList){
Article articleEntity = new Article();
Element linkNode = (article.select("div h4 a")).get(0);
Element desptionNode = (article.getElementsByClass("content")).get(0);
Element articleManageNode = (article.getElementsByClass("info-box")).get(0);
articleEntity.setAddress(linkNode.attr("href"));
articleEntity.setTitle(linkNode.text());
articleEntity.setDesption(desptionNode.text());
articleEntity.setTime(articleManageNode.getElementsByClass("date").text());
articleEntity.setReadNum(articleManageNode.getElementsByClass("read-num").get(0).getElementsByClass("num").text());
articleEntity.setCommentNum(articleManageNode.getElementsByClass("read-num").get(1).getElementsByClass("num").text());
resultList.add(articleEntity);
}
return resultList;
}
}
结果:
处理时间:2946
处理数量:1989
不用线程
package com.cowboy.service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.*;
import com.cowboy.model.Article;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.*;
/**
* @author shizongger
* @date 2017/02/09
*/
public class Main {
private static final String URL = "https://blog.csdn.net/valada";
//private final static Executor executor = Executors.newCachedThreadPool();//启用多线程
public static void main(String[] args) throws IOException {
Connection conn = Jsoup.connect(URL)
.userAgent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0")
.timeout(8000)
.method(Connection.Method.GET);
Document doc = conn.get();
Element body = doc.body();
int totalPage = 1;
int pageNow = 1;
Elements scripts = body.getElementsByTag("script");
Iterator it = scripts.iterator();
while(it.hasNext()) {
Element element = (Element)it.next();
String text = element.data();
if (text.contains("pageSize") && text.contains("listTotal")) {
int i = text.indexOf("var pageSize = ");
int i1 = text.indexOf("var listTotal = ");
int i2 = text.indexOf("var pageQueryStr =");
String pageSize = text.substring(i+15, i1);
String listTotal = text.substring(i1+16, i2);
double i3 = Double.parseDouble(pageSize.replace(";", "").trim());
double i4 = Double.parseDouble(listTotal.replace(";", "").trim());
double number = i4 / i3;
totalPage = (int)Math.ceil(number);
break;
}
}
System.out.println("总页数:"+totalPage);
List articleList = new ArrayList<>();
long l1 = System.currentTimeMillis();
for(pageNow = 1; pageNow <= totalPage; pageNow++){
articleList.addAll(getArtitcleByPage(pageNow));
}
System.out.println(System.currentTimeMillis() - l1);
System.out.println(articleList.size() + " "+articleList);
//遍历输出博主所有的文章
/*for(Article article : articleList) {
System.out.println("文章标题:" + article.getTitle().replaceFirst("原",""));
System.out.println("文章绝对路劲地址:" + article.getAddress());
System.out.println("文章简介:" + article.getDesption());
System.out.println("发表时间:" + article.getTime());
System.out.println("阅读数量:" + article.getReadNum());
System.out.println("评论数量:" + article.getCommentNum());
}
System.out.println("总文章数量:"+articleList.size());*/
}
public static List getArtitcleByPage(int pageNow) throws IOException{
Connection conn = Jsoup.connect(URL + "/article/list/" + pageNow)
.userAgent("Mozilla/5.0 (Windows NT 6.1; rv:47.0) Gecko/20100101 Firefox/47.")
.timeout(8000)
.method(Connection.Method.GET);
Document doc = conn.get();
Element body = doc.body();
List resultList = new ArrayList<>();
Elements articleList = body.getElementsByClass("article-item-box");
for(Element article : articleList){
Article articleEntity = new Article();
Element linkNode = (article.select("div h4 a")).get(0);
Element desptionNode = (article.getElementsByClass("content")).get(0);
Element articleManageNode = (article.getElementsByClass("info-box")).get(0);
articleEntity.setAddress(linkNode.attr("href"));
articleEntity.setTitle(linkNode.text());
articleEntity.setDesption(desptionNode.text());
articleEntity.setTime(articleManageNode.getElementsByClass("date").text());
articleEntity.setReadNum(articleManageNode.getElementsByClass("read-num").get(0).getElementsByClass("num").text());
articleEntity.setCommentNum(articleManageNode.getElementsByClass("read-num").get(1).getElementsByClass("num").text());
resultList.add(articleEntity);
}
return resultList;
}
}
结果
处理时间:30073
处理数量:1989
30073/2946 算算效率提升了多少倍