前言:
文件上传用的已经很多,java web 大概用到如下
- Struts
- Spring MVC CommonsMultipartResolver
- Commons-fileupload
Struts/Spring MVC 实现都是基于Commons-fileupload,但背后的原理,大多数估计没有关注,最近阅读一些开源源码也发现,只有基础才是最重要的,万变不离其宗,在it领域不然会被漫天的新技术,冲昏了头,不知所措,下面开始。
HTTP:
- 表单form 类似
2. 用浏览器追踪表单提交,会发现如下
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Content-Type:multipart/form-data; boundary=----WebKitFormBoundary4PCP0w0H0qxg16VB
Origin:http://localhost:8080
Referer:http://localhost:8080/sys/template/tem/create
Upgrade-Insecure-Requests:1
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36
------WebKitFormBoundary4PCP0w0H0qxg16VB
Content-Disposition: form-data; name="id"
------WebKitFormBoundary4PCP0w0H0qxg16VB
Content-Disposition: form-data; name="name"
测试
------WebKitFormBoundary4PCP0w0H0qxg16VB
Content-Disposition: form-data; name="type"
INDEX
------WebKitFormBoundary4PCP0w0H0qxg16VB
Content-Disposition: form-data; name="layoutFile"; filename="confirm-btn.png"
Content-Type: image/png
------WebKitFormBoundary4PCP0w0H0qxg16VB
Content-Disposition: form-data; name="temFile"; filename="login.html"
Content-Type: text/html
------WebKitFormBoundary4PCP0w0H0qxg16VB--
重要部位红色已经标注,表单提交时http 头部的 Content-Type 会有一个boundary分隔符,分隔符会分割表单提交的每项内容(也就是每个input域),如是文件则Content-Disposition会出现一个filename,同时带上Content-Type描述文件类型,否则没有,大体的解析格式如下(为了显示观看,故意换行显示,实际上没有)
-----------分隔符\r\n
Content-Disposition: form-data; name="XX"\r\n
Content-Type: image/png\r\n
\r\n
具体内容
------------分隔符\r\n
Content-Disposition: form-data; name="XX"; filename="XX"\r\n
Content-Type: image/png\r\n
\r\n
具体内容
------------分隔符\r\n
Content-Disposition: form-data; name="XX"; filename="XX"\r\n
Content-Type: image/png\r\n
\r\n
具体内容
------------分隔符--\r\n
注:最后一行会多出--,例如---------------分隔符--\r\n,同时------------分隔符会比boundary=----分隔符 多--两个,总体可以理解以--boundary进行分割的
JAVA Servlet 实现:
@WebServlet(urlPatterns="/file/upload")
public class FileServlet extends HttpServlet{
private static final long serialVersionUID = 1L;
@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String contentType = request.getContentType();
//文件上传(类似:Content-Type:multipart/form-data; boundary=----WebKitFormBoundary4PCP0w0H0qxg16VB)
if(contentType != null && contentType.startsWith("multipart/form-data")){
try {
List fileItems = FileItemParse.parseForm(request);
System.out.println(fileItems);
} catch (Exception e) {
e.printStackTrace();
}
}
}
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
request.getRequestDispatcher("/WEB-INF/views/file/upload.jsp")
.forward(request, response);
}
}
public class FileItemParse {
//获取边界值
public static String getBoundary(HttpServletRequest request) {
String rtnStr = null;
String tmpType = request.getContentType();
if (null != tmpType) {
rtnStr = tmpType.contains("boundary=") ? tmpType.split("boundary=")[1] : null;
}
return "--".concat(rtnStr); //此处应该是规范,比ContentType中多2个-
}
//解析表单
public static List parseForm(HttpServletRequest request) throws Exception{
List fileItems = new ArrayList();
byte[] boundaryBytes = getBoundary(request).getBytes();
int boundaryBytesLen = boundaryBytes.length;
BufferedInputStream input = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
input = new BufferedInputStream(request.getInputStream());
int tmpI = -1;
int tmpL = -1;
FileItem item = null;
//跳过分界线
input.skip(boundaryBytesLen);
while ((tmpI = input.read()) != -1) {
if (tmpI == 13) {
tmpL = (input.read());
if (tmpL == 10) {
if (out.size() == 0) { //跳过空行分隔符
continue;
}
String bufferStr = out.toString("UTF-8");
//Content-Disposition
if(bufferStr.contains("Content-Disposition:")){
item = new FileItem();
String[] tmpStr = bufferStr.split(";");
String nameV = tmpStr[1].split("=")[1];
item.setParamName(nameV.substring(1, nameV.length() - 1)); //去除"
if(bufferStr.contains("filename")){//文件表单域
String filenameV = tmpStr[2].split("=")[1];
item.setFileName(filenameV.substring(1, filenameV.length() - 1)); //去除"
}else{//普通表单域
fetchContent(item, input, boundaryBytes);
fileItems.add(item);
}
out.reset();
continue;
}
//Content-Type
if(bufferStr.contains("Content-Type:")){
item.setMimeType(bufferStr.split(":")[1].trim());
fetchContent(item, input, boundaryBytes);
fileItems.add(item);
//文件存储
out.reset();
continue;
}
}
out.write(tmpI);
out.write(tmpL);
}
out.write(tmpI);
}
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
if (null != input) {
try {
out.close();
input.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return fileItems;
}
//内容提取
private static void fetchContent(FileItem item, BufferedInputStream input, byte[] boundaryBytes) throws IOException{
input.skip(2); //跳过空行分隔符
int i = -1;
int l = -1;
ByteArrayOutputStream tempOut = new ByteArrayOutputStream();
byte[] tempByte = new byte[boundaryBytes.length];
while((i = input.read()) != -1){
if (13 == i) {
l = input.read();
if (10 == l && isBoundary(input, boundaryBytes, tempByte)) {
break;
}
else {
tempOut.write(i);
tempOut.write(l);
if (10 == l) { //如不是分解符,则写入存储
tempOut.write(tempByte);
}
continue;
}
}
tempOut.write(i);
}
if(item.getMimeType() != null){ //文件
//此处测试环境,故直接写入本地文件,正式应写入系统java.io.temp目录
String url = "d:/temp/" + item.getFileName();
File file = new File(url);
if(!file.getParentFile().exists()){
file.getParentFile().mkdirs();
}
FileOutputStream out = new FileOutputStream(file);
out.write(tempOut.toByteArray());
out.flush();
out.close();
item.setSimpleField(false);
item.setFilePath(url);
}
else{
item.setParamValue(new String(tempOut.toByteArray(), "UTF-8"));
item.setSimpleField(true);
}
}
private static boolean isBoundary(BufferedInputStream input, byte[] sourceBoundaryBytes, byte[] temp) throws IOException{
int count = input.read(temp);
for (int i = 0; i < count; i++) {
if (sourceBoundaryBytes[i] != temp[i]) {
return false;
}
}
return true;
}
}
public class FileItem {
//file
private String mimeType; //文件类型
private String filePath; //存储路径
private String fileName; //上传文件名
//true:非file表单项, false:file表单项
private boolean isSimpleField;
private String paramName;
private String paramValue;
//get set
}
以上只是一个简单的不完全实现,主要是针对HTTP 文件上传数据协议的一个解析过程,更多的可以去看Commons-fileupload源码,里面有更进一步的数据封装(例如进度条)。
参考文献:
http://www.ietf.org:80/rfc/rfc1867.txt
http://www.ietf.org:80/rfc/rfc2045.txt
http://blog.csdn.net/ybygjy/article/details/5869158