扫描本地页面文件.html或jsp,按照原目录结构,提取页面相关的js.css.img等

做项目的过程中经常会用到一些现有模板,比如Metronic模板,因为这些模板都是整套很全面的,有时候大小接近100M,

然而我们项目只需要用到其中的一两个页面效果,

传统的方式1.是不管大小把所有依赖的资源全部扔到项目中,这样会造成项目非常臃肿,废文件太多

传统的方式2.人工根据去看页面引用找出相应的资源提取出来放入项目,这样的工作量非常大.且很繁琐

 

所有需要一个工具能指定某个页面后直接将这个页面相关的资源引用按照原目录(必须)结构提取出来.

 

我在百度搜了半天没搜到.所以自己写了一个java版的工具

实现两个功能1.根据本地模板提取相应资源

                      2.指定项目webapp目录,自动扫描项目中所有的.jsp文件,提取它所有的资源.(因为项目可能经过多次或者长时间开发,改版等情况,会有很多资源已经废弃,但是未删除)

 

废话不说了,直接放代码,(因为是自己使用的临时工具,所以注释啥的,没写太清晰,代码中的处理大多也是根据我项目结构的情况做的处理)

 

扫描本地页面文件.html或jsp,按照原目录结构,提取页面相关的js.css.img等_第1张图片

 

 

1.提取模板文件的.

 

 

import com.steadystate.css.parser.CSSOMParser;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

import org.w3c.dom.css.CSSImportRule;
import org.w3c.dom.css.CSSRule;
import org.w3c.dom.css.CSSRuleList;
import org.w3c.dom.css.CSSStyleDeclaration;
import org.w3c.dom.css.CSSStyleRule;
import org.w3c.dom.css.CSSStyleSheet;
import org.w3c.dom.css.CSSValue;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;

import java.net.URI;
import java.net.URL;

import java.nio.channels.FileChannel;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HTML2extract {
	private static String	oldDir		= "D:/IDE/web_ui/Metronic v4.1.0/";
	private static String	newDir		= "D:/MyMetronic/";
	private static String	_temp		= "http://";
	private static String	subdir		= "v4.1.0/theme/templates/admin3/";
	private static URI		base		= null;
	private static String	htmlpage	= oldDir + subdir + "index.html";

	public static void main(String[] args) throws Exception {
		base = new URI( _temp + subdir );

		File input = new File( htmlpage );

		String htmpage_dir = htmlpage.substring( 0, htmlpage.lastIndexOf( "/" ) );
		mkDir( htmpage_dir );

		fCopy( htmlpage, htmlpage.replace( oldDir, newDir ) );

		Document doc = Jsoup.parse( input, "UTF-8" );
		List<String> list = new ArrayList<String>();
		// css
		getUrl( list, doc, "link", "href" );
		// js
		getUrl( list, doc, "script", "src" );
		// img
		getUrl( list, doc, "img", "src" );

		//
		for (String url : list) {
			if ((url == null) || (url.trim().length() == 0)) {
				continue;
			}

			xf( url );

			// css文件
			if (url.toLowerCase().trim().endsWith( ".css" )) {
				List<String> l2 = new ArrayList<String>();
				cssParser( l2, url.replace( _temp, oldDir ) );

				for (String cimg : l2) {
					xf( getRealUrl( url.substring( 0, url.lastIndexOf( "/" ) ), cimg ) );
				}
			}
		}
	}

	private static String getRealUrl(String d1, String d2) {
		// System.out.println(d1+d2);
		if (d2.startsWith( "../" )) {
			d1 = d1.substring( 0, d1.lastIndexOf( "/" ) );
			d2 = d2.substring( 3, d2.length() );

			return getRealUrl( d1, d2 );
		} else {
			return d1 + "/" + d2;
		}
	}

	private static void xf(String url) {
		// 原地址
		String s = url.replace( _temp, oldDir );
		System.out.println( s );

		// 新增地址
		String t = url.replace( _temp, newDir );
		String n_dir = t.substring( 0, t.lastIndexOf( "/" ) );
		mkDir( n_dir );
		// System.out.println( n_dir );
		fCopy( s, t );
	}

	/**
	 * 获取地址
	 * 
	 * @param list
	 * @param tag
	 * @param attr
	 */
	private static void getUrl(List<String> list, Document doc, String tag, String attr) {
		Elements imgs = doc.getElementsByTag( tag );

		for (Element el : imgs) {
			try {
				String url = el.attr( attr );
				URI abs = base.resolve( url );
				URL absURL = abs.toURL(); // 转成URL
				list.add( absURL.toString() );
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}

	private static void cssParser(List<String> l, String cssf) {
		String URIPath = "file:///" + cssf;
		CSSOMParser cssparser = new CSSOMParser();
		CSSStyleSheet css = null;

		try {
			css = cssparser.parseStyleSheet( new org.w3c.css.sac.InputSource( URIPath ), null, null );
		} catch (Exception e) {
			System.out.println( "解析css文件异常:" + e );
		}

		if (css != null) {
			CSSRuleList cssrules = css.getCssRules();

			for (int i = 0; i < cssrules.getLength(); i++) {
				CSSRule rule = cssrules.item( i );

				if (rule instanceof CSSStyleRule) {
					CSSStyleRule cssrule = (CSSStyleRule) rule;

					// System.out.println( "cssrule.getCssText:" +
					// cssrule.getCssText() );
					// System.out.println( "cssrule.getSelectorText:" +
					// cssrule.getSelectorText() );
					CSSStyleDeclaration styles = cssrule.getStyle();
					CSSValue cssval = ((styles.getPropertyCSSValue( "background-image" ) == null) ? styles.getPropertyCSSValue( "background" )
							: styles.getPropertyCSSValue( "background-image" ));

					if (cssval != null) {
						String text = cssval.getCssText();
						Pattern pat = Pattern.compile( "url\\((.*?)\\)" );
						Matcher mat = pat.matcher( text );

						if (mat.find()) {
							String url = (mat.group( 1 ));
							l.add( url );
						}
					}

					// for (int j = 0, n = styles.getLength(); j < n; j++) {
					// System.out.println( styles.item( j ) + ":" +
					// styles.getPropertyValue( styles.item( j ) ) );
					// }
				} else if (rule instanceof CSSImportRule) {
					// CSSImportRule cssrule = (CSSImportRule) rule;
					// System.out.println( cssrule.getHref() );
				}
			}
		}
	}

	/**
	 * 生成文件夹
	 * 
	 * @param fileurl
	 */
	public static void mkDir(String fileurl) {
		// 创建文件夹
		File saveDirFile = new File( fileurl );

		if (!saveDirFile.exists()) {
			saveDirFile.mkdirs();
		}
	}

	/**
	 * 复制文件
	 * 
	 * @param s
	 * @param t
	 */
	public static void fCopy(String s, String t) {
		FileInputStream fi = null;
		FileOutputStream fo = null;
		FileChannel in = null;
		FileChannel out = null;

		try {
			fi = new FileInputStream( s );

			if (fi != null) {
				fo = new FileOutputStream( t );
				in = fi.getChannel(); // 得到对应的文件通道
				out = fo.getChannel(); // 得到对应的文件通道
				in.transferTo( 0, in.size(), out ); // 连接两个通道,并且从in通道读取,然后写入out通道
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				fi.close();
				in.close();
				fo.close();
				out.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
}

 

 

2.扫描整个项目jsp (maven结构)

 

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.w3c.dom.css.CSSImportRule;
import org.w3c.dom.css.CSSRule;
import org.w3c.dom.css.CSSRuleList;
import org.w3c.dom.css.CSSStyleDeclaration;
import org.w3c.dom.css.CSSStyleRule;
import org.w3c.dom.css.CSSStyleSheet;
import org.w3c.dom.css.CSSValue;

import com.steadystate.css.parser.CSSOMParser;

public class JSP2extract {
	private static String	oldDir		= "D:/IDE/workspace_2015/chinasport/src/";//"e:/Metronic v4.1.0/";
	private static String	newDir		= "D:/myJsp/";//"D:/MyMetronic/";
	private static String	_temp		= "http://";
	private static String	subdir		= null;//"v4.1.0/theme/templates/admin3/";
	private static URI		base		= null;
	private static String	htmlpage	= null;//oldDir + subdir + "page_timeline.html";

	public static void main(String[] args) throws Exception {
		fdir(oldDir);
		  
	}
	private static String rdir(String d){
	   String regex = "src/(.*?)/webapp/";
	   Pattern pat = Pattern.compile(regex);  
	   Matcher matcher = pat.matcher(d);     
	   while (matcher.find()) { 
//		   String temp = str.substring(matcher.start(),matcher.end());
		   String temp=matcher.group( 1 );
	     //str = (temp);
//		   System.out.println(str.replace( "/"+temp+"/", "/main/" ));
		   return d.replace( "/"+temp+"/", "/main/" );
	   }  
	   return d;
	}
	private static void fdir(String s){
		//System.out.println(s);
		try {
			File f = new File( s );
			if (!f.isDirectory()) {
				if (f.getPath().toLowerCase().endsWith( ".jsp" )) {
					//System.out.println( "path=" + f.getPath() );
					cdir(f.getPath());
				}
			} else if (f.isDirectory()) {
				String[] fs = f.list();
				for (int i = 0; i < fs.length; i++) {
					File f2 = new File( f + "\\" + fs[ i ] );
					if (!f2.isDirectory()) {
						if (f2.getPath().toLowerCase().endsWith( ".jsp" )) {
							//System.out.println( "path=" + f2.getPath() );
							cdir( f2.getPath() );
						}
					} else if (f2.isDirectory()) {
						fdir( f + "\\" + fs[ i ] );
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	private static void cdir(String f) {
		if(f==null){
			return;
		}
		try {
			f=f.replace( "\\","/" );
			subdir=f.replace( oldDir, "" );
			subdir=(subdir.substring( 0,subdir.lastIndexOf( "/" )));
			base = new URI( _temp + subdir );
			htmlpage=oldDir + subdir + f.substring( f.lastIndexOf( "/" ),f.length() );
			File input = new File( htmlpage );

			String htmpage_dir = htmlpage.replace( oldDir, newDir ).substring( 0, htmlpage.replace( oldDir, newDir ).lastIndexOf( "/" ) );
			mkDir(htmpage_dir);
			//System.out.println("htmlpage="+htmlpage);
			fCopy( htmlpage, htmlpage.replace( oldDir, newDir ) );

			Document doc = Jsoup.parse( input, "UTF-8" );
			List<String> list = new ArrayList<String>();
			// css
			getUrl( list, doc, "link", "href" );
			// js
			getUrl( list, doc, "script", "src" );
			// img
			getUrl( list, doc, "img", "src" );

			//
			for (String url : list) {
				if ((url == null) || (url.trim().length() == 0)) {
					continue;
				}
				//System.out.println("url==1===="+url);
				url=rdir(url.replace( "${basePath}", (f.substring( 0,f.indexOf( "webapp" )+"webapp".length() )) ));
				//System.out.println("url==2===="+url);
				xf( url );

				// css文件
				if (url.toLowerCase().trim().endsWith( ".css" )) {
					List<String> l2 = new ArrayList<String>();
					cssParser( l2, url.replace( _temp, oldDir ) );

					for (String cimg : l2) {
						xf( getRealUrl( url.substring( 0, url.lastIndexOf( "/" ) ), cimg ) );
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	private static String getRealUrl(String d1, String d2) {
		// System.out.println(d1+d2);
		if (d2.startsWith( "../" )) {
			d1 = d1.substring( 0, d1.lastIndexOf( "/" ) );
			d2 = d2.substring( 3, d2.length() );

			return getRealUrl( d1, d2 );
		} else {
			return d1 + "/" + d2;
		}
	}

	private static void xf(String url) {
		// 原地址
		String s = url;//.replace( oldDir, newDir );
		//System.out.println("old="+s );

		// 新增地址
		String t = url.replace( oldDir, newDir );
		System.out.println("new="+t);
		String n_dir = t.substring( 0, t.lastIndexOf( "/" ) );
		mkDir( n_dir );
		// System.out.println( n_dir );
		fCopy( s, t );
	}

	/**
	 * 获取地址
	 * 
	 * @param list
	 * @param tag
	 * @param attr
	 */
	private static void getUrl(List<String> list, Document doc, String tag, String attr) {
		Elements els = doc.getElementsByTag( tag );

		for (Element el : els) {
			try {
				String url = el.attr( attr );
				//URI abs = base.resolve( url );
				//URL absURL = abs.toURL(); // 转成URL
				list.add(url);// absURL.toString() );
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}

	private static void cssParser(List<String> l, String cssf) {
		String URIPath = "file:///" + cssf;
		CSSOMParser cssparser = new CSSOMParser();
		CSSStyleSheet css = null;

		try {
			css = cssparser.parseStyleSheet( new org.w3c.css.sac.InputSource( URIPath ), null, null );
		} catch (Exception e) {
			System.out.println( "解析css文件异常:" + e );
		}

		if (css != null) {
			CSSRuleList cssrules = css.getCssRules();

			for (int i = 0; i < cssrules.getLength(); i++) {
				CSSRule rule = cssrules.item( i );

				if (rule instanceof CSSStyleRule) {
					CSSStyleRule cssrule = (CSSStyleRule) rule;

					// System.out.println( "cssrule.getCssText:" +
					// cssrule.getCssText() );
					// System.out.println( "cssrule.getSelectorText:" +
					// cssrule.getSelectorText() );
					CSSStyleDeclaration styles = cssrule.getStyle();
					CSSValue cssval = ((styles.getPropertyCSSValue( "background-image" ) == null) ? styles.getPropertyCSSValue( "background" )
							: styles.getPropertyCSSValue( "background-image" ));
					if (cssval != null) {
						String text = cssval.getCssText();
						Pattern pat = Pattern.compile( "url\\((.*?)\\)" );
						Matcher mat = pat.matcher( text );

						if (mat.find()) {
							String url = (mat.group( 1 ));
							l.add( url );
						}
					}

					// for (int j = 0, n = styles.getLength(); j < n; j++) {
					// //System.out.println( styles.item( j ) + ":"
					// +styles.getPropertyValue( styles.item( j ) ) );
					//
					// String text = styles.getPropertyValue( styles.item( j )
					// );//cssval.getCssText();
					// // System.out.println(text);
					// Pattern pat = Pattern.compile("url\\((.*?)\\)");
					// Matcher mat = pat.matcher(text);
					// if (mat.find()) {
					// String url = (mat.group(1));
					// l.add(url);
					// }
					// }
				} else if (rule instanceof CSSImportRule) {
					// CSSImportRule cssrule = (CSSImportRule) rule;
					// System.out.println( cssrule.getHref() );
				}
			}
		}
	}

	/**
	 * 生成文件夹
	 * 
	 * @param fileurl
	 */
	public static void mkDir(String fileurl) {
		// 创建文件夹
		File saveDirFile = new File( fileurl );

		if (!saveDirFile.exists()) {
			saveDirFile.mkdirs();
		}
	}

	/**
	 * 复制文件
	 * 
	 * @param s
	 * @param t
	 */
	public static void fCopy(String s, String t) {
		FileInputStream fi = null;
		FileOutputStream fo = null;
		FileChannel in = null;
		FileChannel out = null;

		try {
			fi = new FileInputStream( s );

			if (fi != null) {
				fo = new FileOutputStream( t );
				in = fi.getChannel(); // 得到对应的文件通道
				out = fo.getChannel(); // 得到对应的文件通道
				in.transferTo( 0, in.size(), out ); // 连接两个通道,并且从in通道读取,然后写入out通道
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				fi.close();
				in.close();
				fo.close();
				out.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
}

 

 

你可能感兴趣的:(扫描本地页面文件.html或jsp,按照原目录结构,提取页面相关的js.css.img等)