一文教你使用 Java 代码访问博客

首先声明:刷访问量 (:-& 是不对的!OK?明确这一点我们来从技术层面来探讨,如何实现刷 ****博客 访问量。
****博客 是具有防爬虫功能的,如果只是简单的刷新、抓取并不会增加访客数量,那么…

package org.bood.tasks;

import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 定时访问 ****博客内容
 *
 * @author:bood
 * @date:2020/9/6
 */
@Configuration
@EnableScheduling
public class IncreaseBlogViews {

    /**
     * 用户ID
     */
    private static String userId = "BUG_call110";

    // 指定时间间隔 3分钟
    @Scheduled(fixedRate = 180000)
    public void run() throws InterruptedException, IOException {
        Set<String> urls = new HashSet<>();

        // ---------------------------------------------- 遍历每一页 获取文章链接 ----------------------------------------------
        // 后面加pageNum即可
        final String homeUrl = "https://blog.csdn.net/" + userId + "/article/list/";
        InputStream is;
        String pageStr;
        StringBuilder curUrl;

        for (int i = 1; i <= 10; i++) {
            System.out.println("finding page " + i);
            curUrl = new StringBuilder(homeUrl);
            curUrl.append(i);
            System.out.println(curUrl);

            is = doGet(String.valueOf(curUrl));
            // 一整页的html源码
            pageStr = inputStreamToString(is, "UTF-8");
            List<String> list = getMatherSubstrs(pageStr, "(?<=href=\")https://blog.csdn.net/" + userId + "/article/details/[0-9]{8,9}(?=\")");
            urls.addAll(list);
            if (pageStr.lastIndexOf("空空如也") != -1) {
                System.out.println("No This Page!");
                break;
            } else {
                System.out.println("~~~Success~~~");
            }
        }
        System.out.println("总页数为: " + urls.size());

        // ---------------------------------------------------打印每个链接---------------------------------------------------
        System.out.println("打印每个链接");
        for (String s : urls) {
            System.out.println(s);
        }
        System.out.println("打印每个链接完毕!");

        // ---------------------------------------------------访问每个链接---------------------------------------------------
        System.out.println("开始访问每个链接!");
        int i = 0;
        for (String s : urls) {
            doGet(s);
            System.out.println("成功访问第" + (++i) + "个链接,共" + urls.size() + "个:" + s);
        }

        // ---------------------------------------------------程序结束---------------------------------------------------
        System.out.println("运行完毕,成功增加访问数:" + urls.size());
    }

    public static InputStream doGet(String urlstr) throws IOException {
        URL url = new URL(urlstr);
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestProperty("User-Agent",
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
        InputStream inputStream = conn.getInputStream();
        return inputStream;
    }

    public static String inputStreamToString(InputStream is, String charset) throws IOException {
        byte[] bytes = new byte[1024];
        int byteLength = 0;
        StringBuffer sb = new StringBuffer();
        while ((byteLength = is.read(bytes)) != -1) {
            sb.append(new String(bytes, 0, byteLength, charset));
        }
        return sb.toString();
    }

    /**
     * 正则匹配
     */
    public static List<String> getMatherSubstrs(String str, String regex) {
        List<String> list = new ArrayList<>();
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(str);
        while (m.find()) {
            list.add(m.group());
        }
        return list;
    }

}

你可能感兴趣的:(实践,java,正则表达式)