前言:因为发现自己的csdn博客被机器人自动评论,这些博客很多都是对别人进行评论,然后别人就有可能回访或者点赞关注等等,基本上总积分非常高,为了覆盖掉这些机器评论,本篇主要是实现java爬虫对自己的博客所有文章进行自动评论。
工具:webmagic
材料:由文件加载的自动随机评论语
/**
* 评论语加载
*/
public class CommentLoad {
private AtomicBoolean inited = new AtomicBoolean(false);
private List urls = new ArrayList<>();
// 默认刷新时间20秒
private static final long DEFAULT_REFRESH_TIME = 20000l;
private static final String DEFAULT_PATH = "comment.txt";
// 起始时间
private long beforeTime;
// 结束时间
private long endTime;
// 刷新时间
private long refreshTime = DEFAULT_REFRESH_TIME;
public CommentLoad() {
}
public CommentLoad(long refreshTime) {
super();
this.refreshTime = refreshTime;
}
public static void main(String[] args) throws InterruptedException, IOException {
// 功能1):从文件加载评论语列表
String path = DEFAULT_PATH;
CommentLoad commentLoad = new CommentLoad();
int i = 0;
while (true) {
Thread.sleep(1000l);
List list = commentLoad.loadComments(path);
System.out.println("计数时间:" + ++i);
System.out.println(list.size());
System.out.println(list);
}
// 功能2): 创建多个评论语到文件中
// path =
// CommentLoad.class.getClassLoader().getResource(path).getPath();
//
// System.out.println(path);
//
// // 写评论到评论文件中
// PrintWriter printWriter = new PrintWriter(new FileWriter(path,
// false));
// String[] str = new String[] { "文章", "很好", "思路清晰,", "大佬", "66", "加油",
// "学习了", "你真棒!" };
// for (int i = 0; i < 50; i++) {
// // System.out.println(flushArrToString(str));
// printWriter.println(flushArrToString(str));
// printWriter.flush();
// }
// printWriter.close();
}
/**
* 随机洗牌
*/
public static String flushArrToString(T[] arr) {
int length = arr.length;
int index = length - 1;
for (int i = 0; i < length && index > 0; i++) {
int num = createRandom(index);
T temp = arr[num];
arr[num] = arr[index];
arr[index] = temp;
index--;
}
StringBuilder builder = new StringBuilder();
for (T t : arr) {
builder.append(t.toString());
}
return builder.toString();
}
public static int createRandom(int end) {
return (new Random().nextInt(end));
}
/**
* 读取评论文本
*/
public List loadComments(String path) {
path = path == null ? DEFAULT_PATH : path;
if (!inited.get() || System.currentTimeMillis() > this.endTime) {
readComments(path);
}
return urls;
}
/**
* 读取评论文本
*/
private synchronized void readComments(String path) {
if (!inited.get() || System.currentTimeMillis() > this.endTime) {
try {
urls = doReadComments("comment.txt");
} catch (IOException e) {
e.printStackTrace();
}
this.beforeTime = System.currentTimeMillis();
this.endTime = beforeTime + this.refreshTime;
inited.set(true);
}
}
/**
* 读取评论文本
*/
private List doReadComments(String path) throws FileNotFoundException, IOException {
String res = CommentLoad.class.getClassLoader().getResource(path).getPath();
List comments = new ArrayList<>();
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(res));
String line;
while ((line = reader.readLine()) != null) {
comments.add(line.trim());
}
} finally {
if (reader != null) {
IOUtils.closeQuietly(reader);
}
}
return comments;
}
}
此类的主要功能就是从指定的文件path加载评论语到list列表
经过测试评论文章需要知道文章id,以及登陆态即可进行评论。
String content = "这个文章非常好啊"; // 评论内容
String articleId = "109261723"; // 评论文章id
Request request = new Request("https://blog.csdn.net/phoenix/web/v1/comment/submit");
request.setMethod(HttpConstant.Method.POST);
Map params = new HashMap<>();
params.put("commentId", "");
params.put("content", content);
params.put("articleId", articleId);
HttpRequestBody form = HttpRequestBody.form(params , "utf-8");
request.setRequestBody(form);
Spider.create(new ComentTest()).addRequest(request).thread(1).run(); // 需要设置登陆cookie
比如从最近发表博客的列表等方法获取,本次批量评论,采用单个博主的文章列表全部评论的方式,单个博主的文章采集列表从https://blog.csdn.net/用户名/article/list/分页,开始。
/**
* 自动评论---单个博主
*/
public static void main(String[] args) {
String user = "shuixiou1"; // csdn用户
int page = 3; // 此用户的文章分页数目
String[] alls = createInitUrls(user, page);
Spider.create(new CsdnConmentSpider()).addUrl(alls).thread(1).run();
}
/**
* 创建初始时的url集合
*/
private static String[] createInitUrls(String user, int page) {
List urls = new ArrayList<>();
for (int i = 1; i <= page; i++) {
urls.add(String.format(listUrl, user) + i);
}
String[] result = urls.toArray(new String[urls.size()]);
return result;
}
package com.pc.demos.csdn;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.pc.util.CookieUtil;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.HttpConstant;
/**
* csdn单个博主自动评论所有文章
*/
public class CsdnConmentSpider implements PageProcessor {
Logger logger = LoggerFactory.getLogger(getClass());
// 列表url
private static final String listUrl = "https://blog.csdn.net/%s/article/list/";
// 列表url规则
private static final String listUrlRegex = "https://blog\\.csdn\\.net/(.+)/article/list/(.*)";
// 详细url规则
private static final String detailUrlRegex = "https://blog\\.csdn\\.net/(.+)/article/details/(.*)";
// 评论语加载对象
private CommentLoad commentLoad = new CommentLoad();
@Override
public void process(Page page) {
// 列表页请求
if(page.getRequest().getUrl().matches(listUrlRegex)) {
List list = page.getHtml().xpath("//div[@class='article-item-box csdn-tracking-statistics']/h4/a").all();
for (String string : list) {
String link = Jsoup.parse(string).select("a").attr("href");
page.addTargetRequest(link);
}
// 详细页请求
} else if(page.getRequest().getUrl().matches(detailUrlRegex)){
System.out.println("详情页面加载:" + page.getRequest().getUrl());
// 文章id
String articleId = page.getRequest().getUrl().substring(page.getRequest().getUrl().lastIndexOf("/") + 1,
page.getRequest().getUrl().length());
Request request = new Request("https://blog.csdn.net/phoenix/web/v1/comment/submit");
request.setMethod(HttpConstant.Method.POST);
Map params = new HashMap<>();
List comments = commentLoad.loadComments(null);
params.put("commentId", "");
params.put("content", comments.get(new Random().nextInt(comments.size())));
params.put("articleId", articleId);
HttpRequestBody form = HttpRequestBody.form(params , "utf-8");
request.setRequestBody(form);
Map extras = new HashMap<>();
extras.put("articleId", articleId);
request.setExtras(extras);
page.addTargetRequest(request);
// 评论请求
} else {
String res = page.getJson().jsonPath("$..data").toString();
System.out.println("评论成功:返回id是" + res);
}
}
@Override
public Site getSite() {
Site site = Site.me().setCycleRetryTimes(3).setSleepTime(2000);
site.addHeader(":authority", "blog.csdn.net");
site.addHeader(":method:", "POST");
site.addHeader(":path:", "/phoenix/web/v1/comment/submit");
site.addHeader(":scheme", "https");
site.addHeader("accept", "application/json, text/javascript, */*; q=0.01");
site.addHeader("accept-encoding", "gzip, deflate, br");
site.addHeader("accept-language", "zh-CN,zh;q=0.9");
site.addHeader("origin", "https://blog.csdn.net");
site.addHeader("referer", "https://blog.csdn.net");
// 设置登陆后的cookie字符串
String cookieSpec = "################";
CookieUtil.setSiteCookies(site, cookieSpec );
return site;
}
/**
* 自动评论---单个博主
*/
public static void main(String[] args) {
String user = "shuixiou1"; // csdn用户
int page = 3; // 此用户的文章分页数目
String[] alls = createInitUrls(user, page);
Spider.create(new CsdnConmentSpider()).addUrl(alls).thread(1).run();
}
/**
* 创建初始时的url集合
*/
private static String[] createInitUrls(String user, int page) {
List urls = new ArrayList<>();
for (int i = 1; i <= page; i++) {
urls.add(String.format(listUrl, user) + i);
}
String[] result = urls.toArray(new String[urls.size()]);
return result;
}
}
经过一轮测试,没有被频率限制
1) 必须要设置登陆的cookie字符串 (代码中已经替换成######################)
2) 需要拿去使用的注意改写csdn博主名称!!!!