首先声明:刷访问量 (:-& 是不对的!OK?明确这一点我们来从技术层面来探讨,如何实现刷 ****博客 访问量。
****博客 是具有防爬虫功能的,如果只是简单的刷新、抓取并不会增加访客数量,那么…
package org.bood.tasks;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 定时访问 ****博客内容
*
* @author:bood
* @date:2020/9/6
*/
@Configuration
@EnableScheduling
public class IncreaseBlogViews {
/**
* 用户ID
*/
private static String userId = "BUG_call110";
// 指定时间间隔 3分钟
@Scheduled(fixedRate = 180000)
public void run() throws InterruptedException, IOException {
Set<String> urls = new HashSet<>();
// ---------------------------------------------- 遍历每一页 获取文章链接 ----------------------------------------------
// 后面加pageNum即可
final String homeUrl = "https://blog.csdn.net/" + userId + "/article/list/";
InputStream is;
String pageStr;
StringBuilder curUrl;
for (int i = 1; i <= 10; i++) {
System.out.println("finding page " + i);
curUrl = new StringBuilder(homeUrl);
curUrl.append(i);
System.out.println(curUrl);
is = doGet(String.valueOf(curUrl));
// 一整页的html源码
pageStr = inputStreamToString(is, "UTF-8");
List<String> list = getMatherSubstrs(pageStr, "(?<=href=\")https://blog.csdn.net/" + userId + "/article/details/[0-9]{8,9}(?=\")");
urls.addAll(list);
if (pageStr.lastIndexOf("空空如也") != -1) {
System.out.println("No This Page!");
break;
} else {
System.out.println("~~~Success~~~");
}
}
System.out.println("总页数为: " + urls.size());
// ---------------------------------------------------打印每个链接---------------------------------------------------
System.out.println("打印每个链接");
for (String s : urls) {
System.out.println(s);
}
System.out.println("打印每个链接完毕!");
// ---------------------------------------------------访问每个链接---------------------------------------------------
System.out.println("开始访问每个链接!");
int i = 0;
for (String s : urls) {
doGet(s);
System.out.println("成功访问第" + (++i) + "个链接,共" + urls.size() + "个:" + s);
}
// ---------------------------------------------------程序结束---------------------------------------------------
System.out.println("运行完毕,成功增加访问数:" + urls.size());
}
public static InputStream doGet(String urlstr) throws IOException {
URL url = new URL(urlstr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
InputStream inputStream = conn.getInputStream();
return inputStream;
}
public static String inputStreamToString(InputStream is, String charset) throws IOException {
byte[] bytes = new byte[1024];
int byteLength = 0;
StringBuffer sb = new StringBuffer();
while ((byteLength = is.read(bytes)) != -1) {
sb.append(new String(bytes, 0, byteLength, charset));
}
return sb.toString();
}
/**
* 正则匹配
*/
public static List<String> getMatherSubstrs(String str, String regex) {
List<String> list = new ArrayList<>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
while (m.find()) {
list.add(m.group());
}
return list;
}
}