java解决PDF中的XSS攻击

1、依赖


      org.apache.pdfbox
      pdfbox
      2.0.26


2、

public String uploadFile(MultipartFile file) throws Exception
    {
        if(file.getContentType().equals(MediaType.APPLICATION_PDF_VALUE) &&
                FileUtils.containsJavaScript(FileUtils.multipartFileToFile(file))){
            throw new CustomException("所上传文件可能具有XSS攻击,请重新上传安全文件!");
        }
        String name = FileUploadUtils.upload(localFilePath, file);
        return name;
    }

   /**
     * File转MultipartFile
     * @param file 文件对象
     * @return Multipart文件对象
     */
    public static File multipartFileToFile(MultipartFile mulFile) throws IOException {
        InputStream ins = mulFile.getInputStream();
        String fileName = mulFile.getOriginalFilename();
        String prefix = getFileNameNoSuffix(fileName) + UUID.randomUUID().toString();
        String suffix = "." + getSuffixNameName(fileName);
        File toFile = File.createTempFile(prefix, suffix);
        OutputStream os = new FileOutputStream(toFile);
        int bytesRead = 0;
        byte[] buffer = new byte[8192];
        while ((bytesRead = ins.read(buffer, 0, 8192)) != -1) {
            os.write(buffer, 0, bytesRead);
        }
        os.close();
        ins.close();
        return toFile;
    }

   /**
     * 获取不带扩展名的文件名
     */
    public static String getFileNameNoSuffix(String filename) {
        if ((filename != null) && (filename.length() > 0)) {
            int dot = filename.lastIndexOf('.');
            if ((dot > -1) && (dot < (filename.length()))) {
                return filename.substring(0, dot);
            }
        }
        return filename;
    }

    /**
     * 获取文件扩展名
     */
    public static String getSuffixNameName(String filename) {
        if ((filename != null) && (filename.length() > 0)) {
            int dot = filename.lastIndexOf('.');
            if ((dot > -1) && (dot < (filename.length() - 1))) {
                return filename.substring(dot + 1);
            }
        }
        return filename;
    }

    /**
     * 校验pdf文件是否包含js脚本
     **/
    public static boolean containsJavaScript(File file) throws IOException {
        RandomAccessFile is = new RandomAccessFile(file, "r");
        try{
            PDFParser parser = new PDFParser(is);
            parser.parse();
            PDDocument doc = parser.getPDDocument();
            String CosName = doc.getDocument().getTrailer().toString();
            if(CosName.contains("COSName{JS}")){
                return true;
            }
        }catch (Exception e){
            System.out.println("PDF效验异常:"+e.getMessage());
        }finally {
            is.close();
        }
        return false;
    }
    /**
     * File转MultipartFile
     * @param file 文件对象
     * @return Multipart文件对象
     */
    public static MultipartFile getMultipartFile(File file) {
        FileItem item = new DiskFileItemFactory().createItem("file"
                , MediaType.MULTIPART_FORM_DATA_VALUE
                , true
                , file.getName());
        try (InputStream input = new FileInputStream(file);
             OutputStream os = item.getOutputStream()) {
            // 流转移
            IOUtils.copy(input, os);
        } catch (Exception e) {
            throw new IllegalArgumentException("Invalid file: " + e, e);
        }

        return new CommonsMultipartFile(item);
    }

你可能感兴趣的:(java,pdf,xss,开发语言,前端)