1、项目搭建:idea + gradle + springboot
build.gradle:引入selenium相关依赖
apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'idea'
apply plugin: 'org.springframework.boot'
sourceCompatibility = 1.8
group = 'com.boom.basement'
def version = '1.0.0.RELEASE'
def artifactId = 'boom-selenium'
buildscript {
ext {
springBootVersion = '1.5.12.RELEASE'
}
repositories {
maven { url 'http://maven.aliyun.com/nexus/content/groups/public/' }
}
dependencies {
classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}")
classpath("org.springframework.boot:spring-boot-maven-plugin:${springBootVersion}")
}
}
repositories {
maven { url 'http://maven.aliyun.com/nexus/content/groups/public/' }
}
dependencies {
// springboot
compile "org.springframework.boot:spring-boot-starter:$springBootVersion"
compile "org.springframework.boot:spring-boot-starter-web:$springBootVersion"
compile "org.springframework.boot:spring-boot-starter-thymeleaf:$springBootVersion"
// 邮件发送
// compile "org.springframework.boot:spring-boot-starter-mail:$springBootVersion"
compile group: 'javax.mail', name: 'mail', version: '1.4.7'
// selenium
compile group: 'org.seleniumhq.selenium', name: 'selenium-api', version: '3.141.59'
compile group: 'org.seleniumhq.selenium', name: 'selenium-remote-driver', version: '3.141.59'
compile('org.seleniumhq.selenium:selenium-chrome-driver:3.141.59') {
exclude module: 'selenium-api'
exclude module: 'selenium-remote-driver'
}
// pdf
compile 'com.itextpdf:itextpdf:5.4.2'
compile 'org.xhtmlrenderer:flying-saucer-pdf:9.0.8'
}
注意点:
①selenium-chrome-driver包自动依赖的selenium-api和selenium-remote-driver的版本并不是与其版本相同的3.141.59,需排除后手动引入正确版本
②需使用javax.mail,SpringBoot集成的spring-boot-starter-mail可能会导致邮箱服务器连接超时(具体不知道原因)
③flying-saucer-pdf的版本使用9.0.8
主启动类:
@SpringBootApplication
public class BoomSeleniumApplication {
public static void main(String[] args) {
SpringApplication.run(BoomSeleniumApplication.class, args);
}
}
2、编写controller:完成源代码爬取和转为pdf并通过邮件发送等
import com.lowagie.text.DocumentException;
import com.sun.mail.util.MailSSLSocketFactory;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.xhtmlrenderer.pdf.ITextRenderer;
import javax.activation.DataHandler;
import javax.activation.FileDataSource;
import javax.mail.*;
import javax.mail.internet.*;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.GeneralSecurityException;
import java.util.Properties;
@Controller
@RequestMapping("/selenium")
public class SeleniumController {
@RequestMapping("/index")
public String index() {
// 测试页面
return "index";
}
@RequestMapping("/sendMail")
public void sendMail() throws GeneralSecurityException, MessagingException, IOException, DocumentException {
// 设置环境变量:指明chrome驱动的位置,chromedriver须提前下载并置于指定位置下
System.setProperty("webdriver.chrome.driver", "d:\\chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
// 设置为 headless 模式:必须设置为无头模式
chromeOptions.addArguments("--headless");
// chromeOptions.addArguments("--disable-gpu");
// 设置浏览器窗口大小
chromeOptions.addArguments("--window-size=1920,1080");
// 相当于创建一个虚拟浏览器
WebDriver driver = new ChromeDriver(chromeOptions);
// 相当于在浏览器中输入网址并回车
driver.get("http://localhost:8080/selenium/index");
// 爬取网页的源代码
String pageSource = driver.getPageSource();
// 调用方法将源代码转为pdf
createPDF(new FileOutputStream("d:\\index.pdf"), pageSource);
// 邮件发送相关参数设置
Properties props = new Properties();
// 开启debug调试
props.setProperty("mail.debug", "true");
// 发送服务器需要身份验证
props.setProperty("mail.smtp.auth", "true");
// 设置邮件服务器主机名
props.setProperty("mail.host", "smtp.qq.com");
// 发送邮件协议名称
props.setProperty("mail.transport.protocol", "smtp");
MailSSLSocketFactory sf = new MailSSLSocketFactory();
sf.setTrustAllHosts(true);
props.put("mail.smtp.ssl.enable", "true");
props.put("mail.smtp.ssl.socketFactory", sf);
// 创建会话
Session session = Session.getInstance(props);
// 根据会话创建邮件信息
Message msg = new MimeMessage(session);
// 邮件主题
msg.setSubject("JavaMail Test");
// //创建图片节点
// MimeBodyPart image = new MimeBodyPart();
// //读取本地文件
// DataHandler dataHandler = new DataHandler(new FileDataSource("src/123.jpg"));
// //将图片添加至结点
// image.setDataHandler(dataHandler);
// //为"节点"设置一个唯一编号
// image.setContentID("pic");
// 创建文本"节点"
MimeBodyPart text = new MimeBodyPart();
// 将爬取的源代码作为邮件的正文
text.setContent(pageSource, "text/html;charset=UTF-8");
// 创建附件结点
MimeBodyPart attachment = new MimeBodyPart();
// 读取本地文件:将源代码转为的pdf文件
DataHandler dataHandler2 = new DataHandler(new FileDataSource("d:\\index.pdf"));
// 将文件添加至结点
attachment.setDataHandler(dataHandler2);
// 设置附件的文件名(需要编码)
attachment.setFileName(MimeUtility.encodeText(dataHandler2.getName()));
// 创建混合节点 将图片节点 文件结点 附件结点 加入
MimeMultipart multipart = new MimeMultipart();
// multipart.addBodyPart(image);
multipart.addBodyPart(text);
multipart.addBodyPart(attachment);
// 将混合节点加入邮件中
msg.setContent(multipart);
// 设置邮件发送方
msg.setFrom(new InternetAddress("[email protected]"));
// 开始会话传输
Transport transport = session.getTransport();
// 连接邮箱:指定邮箱和授权码
transport.connect("smtp.qq.com", "[email protected]", "xxxxxx");
// 给目标邮箱发送邮件
transport.sendMessage(msg, new Address[]{new InternetAddress("[email protected]")});
transport.close();
}
/**
* 将html转为pdf
*
* @param out
* @param html
* @throws IOException
* @throws DocumentException
*/
public static void createPDF(OutputStream out, String html) throws IOException, DocumentException {
ITextRenderer renderer = new ITextRenderer();
renderer.setDocumentFromString(html);
// 解决中文支持问题
// ITextFontResolver fontResolver = renderer.getFontResolver();
// fontResolver.addFont("pdf/font/fangsong.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
// fontResolver.addFont("pdf/font/PingFangSC.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
renderer.layout();
renderer.createPDF(out);
}
}
注意:需提前下载chromedriver.exe,下载地址:chromedriver
3、index.html:注意须在resources/templates/文件夹下,因为SpringBoot默认将thymeleaf的模板位置设置在了该位置下,否则需要手动配置thymeleaf相关参数
<html lang="en" xmlns:th="http://www.thymeleaf.org">
<head>
<title>Titletitle>
head>
<body>
<a href="https://www.baidu.com">Helloa>
body>
html>
注意:
①在html中尽量不要有、等标签,会影响pdf的转换,且将html作为邮件的正文时邮件厂商出于安全考虑是不支持外联(link)的。详情参考:HTML邮件兼容问题
②邮件厂商对于发送text/html有限制,有可能在发送几次之后就发送不过去了,建议将html转换为pdf后发送