java 获取html字符串中的img标签src

/**
 * 获取img标签的src
 * */
public static String getImgSrc(String content){

    List list = new ArrayList();
    //目前img标签标示有3种表达式
    //        
    //开始匹配content中的标签
    Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|>|>)");
    Matcher m_img = p_img.matcher(content);
    boolean result_img = m_img.find();
    if (result_img) {
        while (result_img) {
            //获取到匹配的标签中的内容
            String str_img = m_img.group(2);

            //开始匹配标签中的src
            Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
            Matcher m_src = p_src.matcher(str_img);
            if (m_src.find()) {
                String str_src = m_src.group(3);
                list.add(str_src);
            }
            //结束匹配标签中的src

            //匹配content中是否存在下一个标签,有则继续以上步骤匹配标签中的src
            result_img = m_img.find();
        }
    }
    //去掉静态表情图和编辑器表情图片
    List srcstemp=new ArrayList<>();
    for(String imageSrc:list){
        if(!imageSrc.contains("/static") && !imageSrc.contains("/emotion")){
            srcstemp.add(imageSrc);
        }
    }
    list=srcstemp;
    if(list.size()>0){
        return list.get(0);
    }else{
        return null;
    }
}

 

 

 

private static List getImageSrcList(String html, int length,HttpServletRequest request) {
    String imageURL= ApplicationUtil.getSiteConfigBean().getNewImagePath(request);
    ArrayList list = new ArrayList<>();
    ArrayList srcIndexList= new ArrayList<>();
    if (html != null && html.length() != 0) {
        //提取图片
        String patternString = "]*?\\bsrc[\\s]*=[\\s|\'|\"]*([^\\s|\'|\"]*)[\\s|\'|\"]*";
        Pattern patten = Pattern.compile(patternString);
        Matcher m = patten.matcher(html);
        while (m.find()) {
            try {
                String src = m.group(1);
                System.out.println(src);
                //如果是gif则不缩略
                if("gif".equalsIgnoreCase(src.substring(src.lastIndexOf('.')+1,src.length()))
                        ||  src.startsWith("/static")){
                    list.add(src);
                    //记录排序位置
                    int srcIndex=html.indexOf(m.group(1));
                    srcIndexList.add(srcIndex);
                }else{
                    list.add(getShortImage(src));
                    int srcIndex=html.indexOf(m.group(1));
                    srcIndexList.add(srcIndex);
                }
                if (list.size() >= length && length > 0) {
                    break;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        //提取视频截图
        patternString = "]*?\\bimgsrc[\\s]*=[\\s|\'|\"]*([^\\s|\'|\"]*)[\\s|\'|\"]*";
        patten = Pattern.compile(patternString);
        m = patten.matcher(html);
        while (m.find()) {
            try {
                String src = "video_"+imageURL+m.group(1);
                System.out.println(src);
                list.add(src);
                int srcIndex=html.indexOf(m.group(1));
                srcIndexList.add(srcIndex);
                if (list.size() >= length && length > 0) {
                    break;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    //缩略图排序
    ArrayList result = new ArrayList<>();
    int total=srcIndexList.size();
    do{
        if(total>0){
            //获取最小值索引
            int index=srcIndexList.indexOf(Collections.min(srcIndexList));
            result.add(list.get(index));
            srcIndexList.remove(index);
            list.remove(index);
            total=srcIndexList.size();
        }
    }while (total>0);

    return result;
}

你可能感兴趣的:(java 获取html字符串中的img标签src)