创建Spring Boot工程
选择依赖
配置pom.xml
4.0.0
com.landawang
operation-web
0.0.1-SNAPSHOT
jar
operation-web
Demo project for Spring Boot
org.springframework.boot
spring-boot-starter-parent
2.0.3.RELEASE
UTF-8
UTF-8
10
org.springframework.boot
spring-boot-starter-web
org.springframework.boot
spring-boot-devtools
runtime
org.springframework.boot
spring-boot-starter-test
test
org.springframework.boot
spring-boot-starter-thymeleaf
org.apache.poi
poi-ooxml
3.10-FINAL
org.apache.httpcomponents
httpclient
4.5.1
org.jsoup
jsoup
1.11.3
org.springframework.boot
spring-boot-maven-plugin
重命名application.properties为application.yml,修改内容
server:
port: 8080 # 配置端口
spring:
thymeleaf:
# 开发时关闭缓存,不然无法看到实时页面
cache: false
prefix: classpath:/templates/
suffix: .html
创建resources/templates/index.html
生成商品信息
请选择excel文件
创建PageController并访问页面
/**
* Created with IntelliJ IDEA.
* Project: operation-web
* Package: com.landawang.operationweb.controller
* User: Wangxin
* Date: 2018/7/12
* Time: 16:46
* Copyright © 2018年 黄旺鑫. All rights reserved.
*/
package com.landawang.operationweb.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
@Controller
public class PageController {
@RequestMapping("/")
public String index() {
return "index";
}
}
ExcelController.java,编写实现逻辑
/**
* Created with IntelliJ IDEA.
* Project: operation-web
* Package: com.landawang.operationweb.controller
* User: Wangxin
* Date: 2018/7/12
* Time: 17:26
* Copyright © 2018年 黄旺鑫. All rights reserved.
*/
package com.landawang.operationweb.controller;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.poi.xssf.usermodel.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import javax.servlet.http.HttpServletResponse;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
@RestController
@RequestMapping("/excel")
public class ExcelController {
@PostMapping("/upload")
public void uploadExcel(HttpServletResponse response, @RequestParam("file") MultipartFile file) {
/**
* 1. 获取上传的url列表
* 2. 遍历获取url对应页面的HTML源码
* 3. 提取对应的商品信息字段
* 4. 输出的excel
* */
try {
// 读取Excel文件
XSSFWorkbook workbook = new XSSFWorkbook(file.getInputStream());
// 读取Excel工作表
XSSFSheet sheet = workbook.getSheetAt(0);
// 创建输出Excel文件
XSSFWorkbook newWorkbook = new XSSFWorkbook();
// 创建Sheet
XSSFSheet newSheet = newWorkbook.createSheet();
// 创建标题行
XSSFRow titleRow = newSheet.createRow(0);
// 设置标题行
XSSFCell cell1 = titleRow.createCell(0, STCellType.INT_STR);
cell1.setCellValue("商品编码");
XSSFCell cell2 = titleRow.createCell(1, STCellType.INT_STR);
cell2.setCellValue("商品名称");
XSSFCell cell3 = titleRow.createCell(2, STCellType.INT_STR);
cell3.setCellValue("商品分类");
// 设置宽度
newSheet.setColumnWidth(0, 2560);
newSheet.setColumnWidth(1, 25600);
newSheet.setColumnWidth(2, 5120);
// 遍历获取HTML源码,提取信息
for (int i = 0; i < sheet.getLastRowNum(); i++) {
// 获取行
XSSFRow row = sheet.getRow(i);
// 获取列
XSSFCell cell = row.getCell(0);
// 获取url
String url = cell.getStringCellValue();
// 输出的Excel创建行
XSSFRow newRow = newSheet.createRow(i + 1);
// 判断url不为空并且包含http
if (!url.isEmpty() && url.contains("http")) {
// 获取商品信息集合
Map data = getProductInfo(url);
// 输出商品信息到Excel表
if (data != null) {
XSSFCell cellOne = newRow.createCell(0, STCellType.INT_STR);
cellOne.setCellValue(data.get("sku"));
XSSFCell cellTwo = newRow.createCell(1, STCellType.INT_STR);
cellTwo.setCellValue(data.get("name"));
XSSFCell cellThree = newRow.createCell(2, STCellType.INT_STR);
cellThree.setCellValue(data.get("cat"));
}
}
// 打印调试
System.out.println("\n内容是:" + url);
}
// 下载excel
response.setContentType("application/octet-stream");
// 以时间戳命名
String fileName = String.valueOf(new Date().getTime()) + ".xlsx";
response.setHeader("Content-disposition", "attachment;filename=" + fileName);
response.flushBuffer();
// 输出excel
newWorkbook.write(response.getOutputStream());
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 提取商品信息
* */
private Map getProductInfo(String url) throws Exception {
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(url);
// 模拟浏览器浏览
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0");
CloseableHttpResponse response1 = httpclient.execute(httpGet);
// The underlying HTTP connection is still held by the response object
// to allow the response content to be streamed directly from the network socket.
// In order to ensure correct deallocation of system resources
// the user MUST call CloseableHttpResponse#close() from a finally clause.
// Please note that if response content is not fully consumed the underlying
// connection cannot be safely re-used and will be shut down and discarded
// by the connection manager.
// 结果集合
Map reslut = null;
//获取响应状态码
int StatusCode = response1.getStatusLine().getStatusCode();
try {
HttpEntity entity1 = response1.getEntity();
//如果状态响应码为200,则获取html实体内容或者json文件
if(StatusCode == 200){
String html = EntityUtils.toString(entity1, Consts.UTF_8);
// 提取HTML得到商品信息结果
reslut = getData(html);
// 消耗掉实体
EntityUtils.consume(response1.getEntity());
}else {
//否则,消耗掉实体
EntityUtils.consume(response1.getEntity());
}
} finally {
response1.close();
}
return reslut;
}
private static Map getData (String html) throws Exception{
//获取的数据,存放在集合中
Map data = new HashMap();
//采用Jsoup解析
Document doc = Jsoup.parse(html);
//获取html标签中的内容
// 标题
String name = doc.select("div[class=sku-name]").text();
if (name != null) {
data.put("name", name);
}
// sku
String sku = "";
Elements elements = doc.select("a[data-sku]");
for (Element ele: elements) {
if (ele.hasAttr("data-sku")) {
sku = ele.attr("data-sku");
break;
}
}
if (sku != null) {
data.put("sku", sku);
}
String cat = doc.select("a[clstag=shangpin|keycount|product|mbNav-1]").text();
if (cat != null) {
data.put("cat", cat);
}
System.out.print(sku + "---------" + cat + "---------" + name);
//返回数据
return data;
}
}
解析HTML参考:https://jsoup.org
网络请求参考:http://hc.apache.org/httpcomponents-client-4.5.x/quickstart.html