Java通过URL获取公众号文章生成HTML

说明:通过公众号URL获取的内容,文字可以正常显示,但是图片存在跨域访问的问题,微信不允许跨域访问公众号图片,所以需要将公众号图片从存入本地后,再上传至OSS,然后把HTML中的图片全部替换为自己的OSS地址就可以了

这里就需要在后台对HTML进行DOM的解析,需要用的Jsoup


			com.aliyun.oss
			aliyun-sdk-oss
			2.2.3

		
		
		    org.jsoup
		    jsoup
		    1.9.2
		

controller

package com.iueang.controller;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import com.iueang.util.DownLoadImg;
import com.iueang.util.GetBody;
import com.iueang.util.OssUtil2;
import com.iueang.util.UrlUtil;
@Controller
public class TestUrl {
	
	@RequestMapping("tohtml")
	public String tohtml() {
		return "html/index.html";
	}
	@RequestMapping("getHtml")
	@ResponseBody
	public Map getHtml(String url){
		//获取url文章生成文本
		String html = UrlUtil.getAccess(url);
		String reg = "(.*?)";
		String head=GetBody.getSubUtilSimple(html, reg);
		String HTTPHOST="http://yueang2.oss-cn-qingdao.aliyuncs.com/testimg/";
		String newsBody=head;
		Document doc = Jsoup.parse(newsBody);
	      Elements pngs = doc.select("img[data-src]");
	      System.out.println(pngs);
	      for (Element element : pngs) {
	    	 //获取图片地址
	        String imgUrl = element.attr("data-src");
	        //下载图片到本地
	        String filename=DownLoadImg.downloadPicture(imgUrl);
			File file =new File("D:\\m2\\"+filename);
			//上传至oss
			Boolean flag = OssUtil2.uploadFileToOss(file, "testimg/"+filename);
			if(flag) {
				file.delete();
			}
	        String newsrc =HTTPHOST + filename;
	         element.attr("src", newsrc);
	      }
	      newsBody = doc.toString();
	      System.out.println(newsBody);
		Map map=new HashMap();
		map.put("resultHtml", newsBody);
		return map;
		
	}
}

util工具类

GetBody类

package com.iueang.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class GetBody {

	public static String getSubUtilSimple(String html, String reg) {
		 Pattern pattern = Pattern.compile(reg);// 匹配的模式
	        Matcher m = pattern.matcher(html);
	        while(m.find()){
	            return m.group(1);
	        }
	        return "";
	}

}

OssUtil类

package com.iueang.util;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

import com.aliyun.oss.OSSClient;
import com.aliyun.oss.model.ObjectMetadata;

public class OssUtil2 {	 
	//以下几个参数值必填,参考文章最后文档
	static String endpoint = "http://oss-cn-qingdao.aliyuncs.com";
	static String accessKeyId = "oss获取";
	static String accessKeySecert = "oss获取";
	static String bucketName = "yueang2";
	 
	/**
	 * 上传单个文件到OSS
	 * @param file 要上传的文件File对象
	 * @param objName 上传后的文件名,包含文件夹,比如 game/game/test.txt
	 * @return
	 */
	public static boolean uploadFileToOss(File file, String objName) {
	    try {
	        OSSClient ossClient = null;
	        try {
	            ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecert);
	        }catch (Exception e){
	            e.printStackTrace();
	        }
	        ObjectMetadata meta = new ObjectMetadata();
	        ossClient.putObject(bucketName, objName, file, meta);
	        ossClient.shutdown();
	    } catch (Exception e) {
	        e.printStackTrace();
	        return false;
	    }
	    return true;
	}
}

DownLoadImg类

package com.iueang.util;

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.UUID;

import sun.misc.BASE64Encoder;
public class DownLoadImg {
	 public static String downloadPicture(String urlList) {
		 String filename="iueang"+UUID.randomUUID().toString()+".png";
		 String path="D:/m2/"+filename;
         URL url = null;
         try {
             url = new URL(urlList);
             DataInputStream dataInputStream = new DataInputStream(url.openStream());
             FileOutputStream fileOutputStream = new FileOutputStream(new File(path));
             ByteArrayOutputStream output = new ByteArrayOutputStream();

             byte[] buffer = new byte[1024];
             int length;

             while ((length = dataInputStream.read(buffer)) > 0) {
                 output.write(buffer, 0, length);
             }
             BASE64Encoder encoder = new BASE64Encoder();
             String encode = encoder.encode(buffer);
             fileOutputStream.write(output.toByteArray());
             dataInputStream.close();
             fileOutputStream.close();
         } catch (MalformedURLException e) {
             e.printStackTrace();
         } catch (IOException e) {
             e.printStackTrace();
         }
         System.out.println("Download返回的filname="+filename);
		return filename;
     }
}

 

你可能感兴趣的:(高级技术)