需求:每月初下载http://www.safe.gov.cn/safe/gzhbdmyzslb/index.html中上个月的表格进行解析。
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-excelant</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.46</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure</artifactId>
<version>2.1.5.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>5.1.7.RELEASE</version>
</dependency>
</dependencies>
//根据唯一标识获得className为"list_conr"的标签
Elements listConr = document.getElementsByClass("list_conr");
//主页按照不同月份有不同链接,是一个列表,我们需要获得最新的月份,即第一个列表里的链接,该链接能打开表格下载的网页
Element ahref=listConr.select("ul").get(0).select("li").get(0).selectFirst("dt").selectFirst("a");
String hrefDetail=ahref.attr("href");
//获得xlsx的下载链接url
Element axlsxhref=document1.getElementsByClass("detail_content").first().select("div").first().select("p").get(2).select("a").first();
String axlsxhrefDetail=axlsxhref.attr("href");
下载表格,加了定时任务为了区分,在表格文件名上包括当前的时间
SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy年MM月dd日HH_mm_ss");
String date=dateFormat.format(new Date());
//文件复制
InputStream inputStream = entity3.getContent();
FileUtils.copyToFile(inputStream,new File("E:\\rmbrate\\rate"+date+fileType));
public class ConversionRateToDollar {
private String month;
private String currency;
private String currencyCN;
private String unit;
private double rate;
public String getMonth() {
return month;
}
public void setMonth(String month) {
this.month = month;
}
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public String getCurrencyCN() {
return currencyCN;
}
public void setCurrencyCN(String currencyCN) {
this.currencyCN = currencyCN;
}
public String getUnit() {
return unit;
}
public void setUnit(String unit) {
this.unit = unit;
}
public double getRate() {
return rate;
}
public void setRate(double rate) {
this.rate = rate;
}
}
我写了一个处理表格的ExcelUtil类,分别对.xls和.xlsx文件进行处理,返回JavaBean的集合对象
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.alibaba.fastjson.JSON;
public class ExcelUtil {
//导入Excel数据
public List<ConversionRateToDollar> importExcelActionForXLSX(String filePath) throws Exception {
//打开工作簿
XSSFWorkbook wookbook = new XSSFWorkbook(new FileInputStream(filePath));
//获得第一张表
XSSFSheet sheet = wookbook.getSheetAt(0);
//获得表内容的第三行第二列单元格,作为日期
XSSFRow dateRow=sheet.getRow(2);
XSSFCell dateCell=dateRow.getCell(1);
String date=dateCell.getStringCellValue();
//获取到Excel文件中的所有行数
int numberOfRows = sheet.getPhysicalNumberOfRows();
//遍历行(表的第7行开始,是我们想要的数据,最后三行是表的“注”,无需遍历)
List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
for (int i = 6; i < numberOfRows - 3; i++) {
// 对单独每行的操作:
XSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map = new HashMap<String, Object>();
//ConversionRateToDollar对象的“month”属性设置为标题的日期
String month = date;
//ConversionRateToDollar对象的“currency”属性
XSSFCell currencyCell = row.getCell(1);
String currency = getValue(currencyCell);
//ConversionRateToDollar对象的“currencyCN”属性
XSSFCell currencyCNCell = row.getCell(2);
String currencyCN = getValue(currencyCNCell);
//ConversionRateToDollar对象的“unit”属性
XSSFCell unitCell = row.getCell(3);
String unit = getValue(unitCell);
//ConversionRateToDollar对象的“rate”属性
XSSFCell rateCell = row.getCell(4);
String rate = getValue(rateCell);
map.put("month",month);
map.put("currency",currency);
map.put("currencyCN",currencyCN);
map.put("unit",unit);
map.put("rate",rate);
list.add(map);
}
}
//由于“各种货币对美元折算率”表一行有两个对象,将右边一组的对象按同样方法遍历
for (int i = 6; i < numberOfRows - 3; i++) {//最后三行是表的注解,不是我们需要的内容
//对每行的操作
XSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map1 = new HashMap<String, Object>();
//获取到Excel文件中的所有的列
int cells = row.getPhysicalNumberOfCells();
String month = "2019-05";
XSSFCell currencyCell = row.getCell(5);
String currency = getValue(currencyCell);
XSSFCell currencyCNCell = row.getCell(6);
String currencyCN = getValue(currencyCNCell);
XSSFCell unitCell = row.getCell(7);
String unit = getValue(unitCell);
XSSFCell rateCell = row.getCell(8);
String rate = getValue(rateCell);
map1.put("month",month);
map1.put("currency",currency);
map1.put("currencyCN",currencyCN);
map1.put("unit",unit);
map1.put("rate",rate);
list.add(map1);
}
}
//输出list的json
System.out.println("list = "+JSON.toJSONString(list));
//将json转化为javaBean
List<ConversionRateToDollar> conversionRateToDollarList= JSONObject.parseArray(JSON.toJSONString(list),ConversionRateToDollar.class);
return conversionRateToDollarList;
//System.out.println("各种货币对美元折算率 对象个数: "+conversionRateToDollarList.size());
}
private String getValue(XSSFCell xSSFCell){
if(null == xSSFCell){
return "";
}
if (xSSFCell.getCellType() == xSSFCell.CELL_TYPE_BOOLEAN) {
// 返回布尔类型的值
return String.valueOf(xSSFCell.getBooleanCellValue());
} else if (xSSFCell.getCellType() == xSSFCell.CELL_TYPE_NUMERIC) {
// 返回数值类型的值
return String.valueOf(xSSFCell.getNumericCellValue());
} else {
// 返回字符串类型的值
return String.valueOf(xSSFCell.getStringCellValue());
}
}
//导入Excel数据
public List<ConversionRateToDollar> importExcelActionForXLS(String filePath) throws Exception {
//打开工作簿
HSSFWorkbook wookbook = new HSSFWorkbook(new FileInputStream(filePath));
//获得第一张表
HSSFSheet sheet = wookbook.getSheetAt(0);
//获得表内容的第三行第二列单元格,作为日期
HSSFRow dateRow=sheet.getRow(2);
HSSFCell dateCell=dateRow.getCell(1);
String date=dateCell.getStringCellValue();
//获取到Excel文件中的所有行数
int numberOfRows = sheet.getPhysicalNumberOfRows();
//遍历行(表的第7行开始,是我们想要的数据,最后三行是表的“注”,无需遍历)
List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
for (int i = 6; i < numberOfRows - 3; i++) {
// 对单独每行的操作:
HSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map = new HashMap<String, Object>();
//ConversionRateToDollar对象的“month”属性设置为标题的日期
String month = date;
//ConversionRateToDollar对象的“currency”属性
HSSFCell currencyCell = row.getCell(1);
String currency = getValueForXLS(currencyCell);
//ConversionRateToDollar对象的“currencyCN”属性
HSSFCell currencyCNCell = row.getCell(2);
String currencyCN = getValueForXLS(currencyCNCell);
//ConversionRateToDollar对象的“unit”属性
HSSFCell unitCell = row.getCell(3);
String unit = getValueForXLS(unitCell);
//ConversionRateToDollar对象的“rate”属性
HSSFCell rateCell = row.getCell(4);
String rate = getValueForXLS(rateCell);
map.put("month",month);
map.put("currency",currency);
map.put("currencyCN",currencyCN);
map.put("unit",unit);
map.put("rate",rate);
list.add(map);
}
}
//由于“各种货币对美元折算率”表一行有两个对象,将右边一组的对象按同样方法遍历
for (int i = 6; i < numberOfRows - 3; i++) {//最后三行是表的注解,不是我们需要的内容
//对每行的操作
HSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map1 = new HashMap<String, Object>();
//获取到Excel文件中的所有的列
int cells = row.getPhysicalNumberOfCells();
String month = "2019-05";
HSSFCell currencyCell = row.getCell(5);
String currency = getValueForXLS(currencyCell);
HSSFCell currencyCNCell = row.getCell(6);
String currencyCN = getValueForXLS(currencyCNCell);
HSSFCell unitCell = row.getCell(7);
String unit = getValueForXLS(unitCell);
HSSFCell rateCell = row.getCell(8);
String rate = getValueForXLS(rateCell);
map1.put("month",month);
map1.put("currency",currency);
map1.put("currencyCN",currencyCN);
map1.put("unit",unit);
map1.put("rate",rate);
list.add(map1);
}
}
//输出list的json
System.out.println("list = "+JSON.toJSONString(list));
//将json转化为javaBean
List<ConversionRateToDollar> conversionRateToDollarList= JSONObject.parseArray(JSON.toJSONString(list),ConversionRateToDollar.class);
return conversionRateToDollarList;
}
private String getValueForXLS(HSSFCell hSSFCell){
if(null == hSSFCell){
return "";
}
if (hSSFCell.getCellType() == hSSFCell.CELL_TYPE_BOOLEAN) {
// 返回布尔类型的值
return String.valueOf(hSSFCell.getBooleanCellValue());
} else if (hSSFCell.getCellType() == hSSFCell.CELL_TYPE_NUMERIC) {
// 返回数值类型的值
return String.valueOf(hSSFCell.getNumericCellValue());
} else {
// 返回字符串类型的值
return String.valueOf(hSSFCell.getStringCellValue());
}
}
}
在SpringBoot的启动类添加注解
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication
@EnableScheduling
public class GetExcelByHref {
public static void main(String[] args) {
SpringApplication.run(GetExcelByHref.class,args);
}
}
配置组件,在里面写入需要定时执行的任务方法,使用cron表达式。本来表达式应该配置为每月初执行,这里为了测试效果写了一个每五分钟执行一次。
@Component
public class GetConversionRateToDollar {
@Scheduled(cron = "0 0/5 * * * *")
public void getConversionRateToDollar() throws IOException {
.....
}
}
package com.crawler;
public class ConversionRateToDollar {
private String month;
private String currency;
private String currencyCN;
private String unit;
private double rate;
public String getMonth() {
return month;
}
public void setMonth(String month) {
this.month = month;
}
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public String getCurrencyCN() {
return currencyCN;
}
public void setCurrencyCN(String currencyCN) {
this.currencyCN = currencyCN;
}
public String getUnit() {
return unit;
}
public void setUnit(String unit) {
this.unit = unit;
}
public double getRate() {
return rate;
}
public void setRate(double rate) {
this.rate = rate;
}
}
package com.crawler;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.alibaba.fastjson.JSON;
public class ExcelUtil {
//导入Excel数据
public List<ConversionRateToDollar> importExcelActionForXLSX(String filePath) throws Exception {
//打开工作簿
XSSFWorkbook wookbook = new XSSFWorkbook(new FileInputStream(filePath));
//获得第一张表
XSSFSheet sheet = wookbook.getSheetAt(0);
//获得表内容的第三行第二列单元格,作为日期
XSSFRow dateRow=sheet.getRow(2);
XSSFCell dateCell=dateRow.getCell(1);
String date=dateCell.getStringCellValue();
//获取到Excel文件中的所有行数
int numberOfRows = sheet.getPhysicalNumberOfRows();
//遍历行(表的第7行开始,是我们想要的数据,最后三行是表的“注”,无需遍历)
List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
for (int i = 6; i < numberOfRows - 3; i++) {
// 对单独每行的操作:
XSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map = new HashMap<String, Object>();
//ConversionRateToDollar对象的“month”属性设置为标题的日期
String month = date;
//ConversionRateToDollar对象的“currency”属性
XSSFCell currencyCell = row.getCell(1);
String currency = getValue(currencyCell);
//ConversionRateToDollar对象的“currencyCN”属性
XSSFCell currencyCNCell = row.getCell(2);
String currencyCN = getValue(currencyCNCell);
//ConversionRateToDollar对象的“unit”属性
XSSFCell unitCell = row.getCell(3);
String unit = getValue(unitCell);
//ConversionRateToDollar对象的“rate”属性
XSSFCell rateCell = row.getCell(4);
String rate = getValue(rateCell);
map.put("month",month);
map.put("currency",currency);
map.put("currencyCN",currencyCN);
map.put("unit",unit);
map.put("rate",rate);
list.add(map);
}
}
//由于“各种货币对美元折算率”表一行有两个对象,将右边一组的对象按同样方法遍历
for (int i = 6; i < numberOfRows - 3; i++) {//最后三行是表的注解,不是我们需要的内容
//对每行的操作
XSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map1 = new HashMap<String, Object>();
//获取到Excel文件中的所有的列
int cells = row.getPhysicalNumberOfCells();
String month = "2019-05";
XSSFCell currencyCell = row.getCell(5);
String currency = getValue(currencyCell);
XSSFCell currencyCNCell = row.getCell(6);
String currencyCN = getValue(currencyCNCell);
XSSFCell unitCell = row.getCell(7);
String unit = getValue(unitCell);
XSSFCell rateCell = row.getCell(8);
String rate = getValue(rateCell);
map1.put("month",month);
map1.put("currency",currency);
map1.put("currencyCN",currencyCN);
map1.put("unit",unit);
map1.put("rate",rate);
list.add(map1);
}
}
//输出list的json
System.out.println("list = "+JSON.toJSONString(list));
//将json转化为javaBean
List<ConversionRateToDollar> conversionRateToDollarList= JSONObject.parseArray(JSON.toJSONString(list),ConversionRateToDollar.class);
return conversionRateToDollarList;
//System.out.println("各种货币对美元折算率 对象个数: "+conversionRateToDollarList.size());
}
private String getValue(XSSFCell xSSFCell){
if(null == xSSFCell){
return "";
}
if (xSSFCell.getCellType() == xSSFCell.CELL_TYPE_BOOLEAN) {
// 返回布尔类型的值
return String.valueOf(xSSFCell.getBooleanCellValue());
} else if (xSSFCell.getCellType() == xSSFCell.CELL_TYPE_NUMERIC) {
// 返回数值类型的值
return String.valueOf(xSSFCell.getNumericCellValue());
} else {
// 返回字符串类型的值
return String.valueOf(xSSFCell.getStringCellValue());
}
}
//导入Excel数据
public List<ConversionRateToDollar> importExcelActionForXLS(String filePath) throws Exception {
//打开工作簿
HSSFWorkbook wookbook = new HSSFWorkbook(new FileInputStream(filePath));
//获得第一张表
HSSFSheet sheet = wookbook.getSheetAt(0);
//获得表内容的第三行第二列单元格,作为日期
HSSFRow dateRow=sheet.getRow(2);
HSSFCell dateCell=dateRow.getCell(1);
String date=dateCell.getStringCellValue();
//获取到Excel文件中的所有行数
int numberOfRows = sheet.getPhysicalNumberOfRows();
//遍历行(表的第7行开始,是我们想要的数据,最后三行是表的“注”,无需遍历)
List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
for (int i = 6; i < numberOfRows - 3; i++) {
// 对单独每行的操作:
HSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map = new HashMap<String, Object>();
//ConversionRateToDollar对象的“month”属性设置为标题的日期
String month = date;
//ConversionRateToDollar对象的“currency”属性
HSSFCell currencyCell = row.getCell(1);
String currency = getValueForXLS(currencyCell);
//ConversionRateToDollar对象的“currencyCN”属性
HSSFCell currencyCNCell = row.getCell(2);
String currencyCN = getValueForXLS(currencyCNCell);
//ConversionRateToDollar对象的“unit”属性
HSSFCell unitCell = row.getCell(3);
String unit = getValueForXLS(unitCell);
//ConversionRateToDollar对象的“rate”属性
HSSFCell rateCell = row.getCell(4);
String rate = getValueForXLS(rateCell);
map.put("month",month);
map.put("currency",currency);
map.put("currencyCN",currencyCN);
map.put("unit",unit);
map.put("rate",rate);
list.add(map);
}
}
//由于“各种货币对美元折算率”表一行有两个对象,将右边一组的对象按同样方法遍历
for (int i = 6; i < numberOfRows - 3; i++) {//最后三行是表的注解,不是我们需要的内容
//对每行的操作
HSSFRow row = sheet.getRow(i);
// 行不为空
if (row != null) {
Map<String, Object> map1 = new HashMap<String, Object>();
//获取到Excel文件中的所有的列
int cells = row.getPhysicalNumberOfCells();
String month = "2019-05";
HSSFCell currencyCell = row.getCell(5);
String currency = getValueForXLS(currencyCell);
HSSFCell currencyCNCell = row.getCell(6);
String currencyCN = getValueForXLS(currencyCNCell);
HSSFCell unitCell = row.getCell(7);
String unit = getValueForXLS(unitCell);
HSSFCell rateCell = row.getCell(8);
String rate = getValueForXLS(rateCell);
map1.put("month",month);
map1.put("currency",currency);
map1.put("currencyCN",currencyCN);
map1.put("unit",unit);
map1.put("rate",rate);
list.add(map1);
}
}
//输出list的json
System.out.println("list = "+JSON.toJSONString(list));
//将json转化为javaBean
List<ConversionRateToDollar> conversionRateToDollarList= JSONObject.parseArray(JSON.toJSONString(list),ConversionRateToDollar.class);
return conversionRateToDollarList;
}
private String getValueForXLS(HSSFCell hSSFCell){
if(null == hSSFCell){
return "";
}
if (hSSFCell.getCellType() == hSSFCell.CELL_TYPE_BOOLEAN) {
// 返回布尔类型的值
return String.valueOf(hSSFCell.getBooleanCellValue());
} else if (hSSFCell.getCellType() == hSSFCell.CELL_TYPE_NUMERIC) {
// 返回数值类型的值
return String.valueOf(hSSFCell.getNumericCellValue());
} else {
// 返回字符串类型的值
return String.valueOf(hSSFCell.getStringCellValue());
}
}
}
package com.crawler;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
@Component
public class GetConversionRateToDollar {
@Scheduled(cron = "0 0/5 * * * *")
public void getConversionRateToDollar() throws IOException {
/**
1.创建HttpClient对象;
2.创建请求方法的实例,并指定请求URL。如果需要发送GET请求,创建HttpGet对象;如果需要发送POST请求,创建HttpPost对象;
3.如果需要发送请求参数,可调用HttpGet,HttpPost共同的setParams(HttpParams params)方法来添加请求参数;
4.调用HttpClient对象的execute(HttpUriRequest request)发送请求,该方法返回一个HttpResponse;
5.调用HttpResponse的getAllHeaders().getHeaders(String name)等方法可获得服务器的响应头;调用HttpResponse的get
6.释放连接,无论执行方法是否成功,都必须释放资源
*/
//创建HttpClientBuilder
HttpClientBuilder httpClientBuilder= HttpClientBuilder.create();
//创建HttpClient
CloseableHttpClient closeableHttpClient=httpClientBuilder.build();
HttpGet httpGet=new HttpGet("http://www.safe.gov.cn/safe/gzhbdmyzslb/index.html");
System.out.println(httpGet.getRequestLine());
try {
String urlHeader = "http://www.safe.gov.cn";
//执行get请求
HttpResponse httpResponse = closeableHttpClient.execute(httpGet);
//获取响应消息实体
HttpEntity entity = httpResponse.getEntity();
//响应状态
System.out.println("status:" + httpResponse.getStatusLine());
if (entity != null) {
System.out.println("contentEncoding:" + entity.getContentEncoding());
//返回html文本信息
String response = EntityUtils.toString(entity, "UTF-8");
//System.out.println(response);
//解析页面内容
Document document= Jsoup.parse(response);
//根据唯一标识获得className为"list_conr"的标签
Elements listConr = document.getElementsByClass("list_conr");
//主页按照不同月份有不同链接,是一个列表,我们需要获得最新的月份,即第一个列表里的链接,该链接能打开表格下载的网页
Element ahref=listConr.select("ul").get(0).select("li").get(0).selectFirst("dt").selectFirst("a");
String hrefDetail=ahref.attr("href");
//新的页面,创建新的HttpClientBuilder
HttpClientBuilder httpClientBuilder1= HttpClientBuilder.create();
//创建HttpClient
CloseableHttpClient closeableHttpClient1=httpClientBuilder1.build();
//请求某个月份的表格下载页面
HttpGet httpGet1=new HttpGet(urlHeader+hrefDetail);
System.out.println(httpGet.getRequestLine());
try {
//执行get请求
HttpResponse httpResponse1 = closeableHttpClient1.execute(httpGet1);
//获取响应消息实体
HttpEntity entity1 = httpResponse1.getEntity();
//响应状态
System.out.println("status:" + httpResponse1.getStatusLine());
if(entity1!=null){
//html文本信息
String response1 = EntityUtils.toString(entity1, "UTF-8");
//System.out.println(response1);
//解析页面内容
Document document1= Jsoup.parse(response1);
//获得xlsx的下载链接url
Element axlsxhref=document1.getElementsByClass("detail_content").first().select("div").first().select("p").get(2).select("a").first();
String axlsxhrefDetail=axlsxhref.attr("href");
String url = urlHeader+axlsxhrefDetail;
//创建httpClient实例
CloseableHttpClient httpClient3 = HttpClients.createDefault();
//创建httpGet实例
HttpGet httpGet3 = new HttpGet(url);
//获得页面响应
CloseableHttpResponse response3 = httpClient3.execute(httpGet3);
//获取.后缀,为"xlsx"
String fileType = url.substring(url.lastIndexOf("."), url.length());
if (response3 != null){
HttpEntity entity3 = response3.getEntity();
if (entity3 != null){
// //如果设置定时任务,那么在文件标题中显示上个月的月份,便于区分
// Calendar calendar=Calendar.getInstance();
// calendar.add(Calendar.MONTH,-1);
// String date=new SimpleDateFormat("yyyy年MM月").format(calendar.getTime());
SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy年MM月dd日HH_mm_ss");
String date=dateFormat.format(new Date());
//文件复制
InputStream inputStream = entity3.getContent();
FileUtils.copyToFile(inputStream,new File("E:\\rmbrate\\rate"+date+fileType));
List<ConversionRateToDollar> conversionRateToDollarList=new ArrayList<ConversionRateToDollar>();
ExcelUtil excelUtil=new ExcelUtil();
try {
//.xlsx后缀
if (fileType.equals(".xlsx")) {
conversionRateToDollarList=excelUtil.importExcelActionForXLSX("E:\\rmbrate\\rate"+date+fileType);
}
//.xls后缀
else if (fileType.equals(".xls")) {
conversionRateToDollarList=excelUtil.importExcelActionForXLS("E:\\rmbrate\\rate"+date+fileType);
}
System.out.println(conversionRateToDollarList.size());
} catch (Exception e) {
e.printStackTrace();
}
}
}
if (response3 != null){
response3.close();
}
if (httpClient3 != null){
httpClient3.close();
}
}
}catch (IOException e){
e.printStackTrace();
}
finally {
try {
closeableHttpClient1.close();
}catch (IOException e){
e.printStackTrace();
}
}
}
}catch (IOException e){
e.printStackTrace();
}
finally {
try {
closeableHttpClient.close();
}catch (IOException e){
e.printStackTrace();
}
}
}
}
package com.crawler;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication
@EnableScheduling
public class GetExcelByHref {
public static void main(String[] args) {
SpringApplication.run(GetExcelByHref.class,args);
}
}