如何方便的下载csdn博客正文

csdn博客的正文的图片一般不能直接复制,需要复制到微信等再粘贴出来,比较麻烦

因此写了个工具类,方便进行下载,可以方便的生成html和word

最关键的是,可以原封不动的保留格式,顶多细节有小出入

需要jsoup和poi

 

package test.test2019;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by admin on 2019/1/15.
 */
public class JsoupTest {

   /**
    * 从csdn读取和加工正文
    * @param uriStr
    * @return
    */
   private static String readHtml(String uriStr){
      StringBuffer sb=new StringBuffer();
      sb.append("");
      try {
         URI uri=new URI(uriStr);
         Document doc= Jsoup.parse(uri.toURL(),10000);
         sb.append(doc.select("style").outerHtml());
         Elements elements=doc.select("link[rel=\"stylesheet\"]");
         String url=null;
         for(Element element:elements){
            url=element.attr("href");
            sb.append("");
            sb.append("\r\n");
         }
         sb.append("");
         sb.append("");
         sb.append("

下载地址:").append(uriStr).append("

"); doc.select("#article_content p").toggleClass("fontclass"); sb.append(doc.select(".blog-content-box").outerHtml()); sb.append(""); } catch (Exception e) { e.printStackTrace(); } return sb.toString(); } public static boolean writeDocFile( File file, String html,String encoding) { boolean w = false; File fileDir=file.getParentFile(); if (!fileDir.exists()) { fileDir.mkdirs(); } try { byte b[] = html.getBytes(encoding); ByteArrayInputStream bais = new ByteArrayInputStream(b); POIFSFileSystem poifs = new POIFSFileSystem(); DirectoryEntry directory = poifs.getRoot(); DocumentEntry documentEntry = directory.createDocument( "WordDocument", bais); FileOutputStream ostream = new FileOutputStream(file); poifs.writeFilesystem(ostream); bais.close(); ostream.close(); }catch(IOException e){ e.printStackTrace(); } return w; } /**从csdn截取正文 * @param uriStr * @throws IOException * @throws URISyntaxException */ public static void writeCSDNWordFile(String uriStr,File wordFile) { writeDocFile(wordFile,readHtml(uriStr),"UTF-8"); } /**从csdn截取正文 * @param uriStr * @throws IOException * @throws URISyntaxException */ public static void writeCSDNHtmlFile(String uriStr,File localFile) throws IOException { FileUtils.writeStringToFile(localFile,readHtml(uriStr),"UTF-8"); } public static void main(String[] args) throws IOException, URISyntaxException { String html="D:/test/word/jxl-excel.html"; String doc="D:/test/word/jxl-excel.doc"; String uri="https://blog.csdn.net/a1091662876/article/details/87722035"; writeCSDNHtmlFile(uri,new File(html)); writeCSDNWordFile(uri,new File(doc)); } }

你可能感兴趣的:(如何方便的下载csdn博客正文)