公司在做一个美国的项目,由于jsp页面中做得不规范,包含有大量的style,内联的css。美国方面就要求我们Proper use of HTML。
Currently the implementation of html is poor in the UI. Start with the most basic of fundamentals:
1.) Put content in the html
2.) Put style information in css
There should be no position or style information the html the only exception that comes to mind is the layout (not style) of the tabular data, which belongs in a table.
这种苦差事当然落在我这个新手上了。手动去一个个找出来,那我可以两天不休息了,还得把眼睛找花。没有办法,总得找出点方法来对付吧。想想HTML Parser提取页面,得到含有style属性的节点,修改后再写入。代码如下:package comet.servlet; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.HasAttributeFilter; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.nodes.TagNode; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; public class PullStyle { List<File> processFile = new ArrayList<File>(); /** * 提取style,生成class,单个class名称为父名称_子名称_style_i * * @param args * @throws IOException * @throws ParserException */ public static void main(String[] args) throws ParserException, IOException { // TODO Auto-generated method stub PullStyle pullStyle = new PullStyle(); File toFile = new File("E://css//test_.css");//生成的css文件用的位置 toFile.createNewFile(); pullStyle.getFiles("D:/Program Files/eclipse/works/router2/www",0); for(File file : pullStyle.processFile){ String fileName = file.getName(); pullStyle.processStyle(file,toFile,"style",file.getParentFile().getName()+"_"+fileName.substring(0, fileName.lastIndexOf("."))); } } /** * 第一个参数需要处理的文件的位置,可以是文件夹, * 第二个参数没有什么特别意思,是用来记录从当前文件起的目录深度 * * @param rootPath * @param level * @return */ public List<File> getFiles(String rootPath,int level){ File root = new File(rootPath); String line = ""; for(int i=0;i<level;i++){ line+="--"; } //System.out.println(line+rootPath+"--"+root.isDirectory()); level++; if(root.isDirectory()) { File[] children = root.listFiles(); for(File child : children) { getFiles(rootPath+"/"+child.getName(),level); } }else { if(rootPath.endsWith(".jsp")) { processFile.add(root); } } return processFile; } /** * 抽取出页面文件中styleType属性的内容, * 并并写到一个文件,在这个过程中为含有styleType属性的标签加入相应内容的class属性。 * * @param file * @param content * @param styleType * @param name * @throws ParserException * @throws IOException */ public void processStyle(File fromFile,File toFile,String styleType,String name) throws ParserException, IOException { String content = file2String(fromFile);//文件转化 StringBuffer buffer = new StringBuffer(); Parser parser = new Parser(); parser.setInputHTML(content); //HasAttributeFilter,看名字就知道了,对提取页面属性太棒了 NodeFilter hasAttributeFilter = new HasAttributeFilter("style"); NodeList nodeList = parser.extractAllNodesThatMatch(hasAttributeFilter); int i = 0; for(NodeIterator iterator=nodeList.elements();iterator.hasMoreNodes();){ Node node = iterator.nextNode(); TagNode tagNode = (TagNode)node; String style; if((style = tagNode.getAttribute(styleType))!=null ){ //System.out.println(style); String oldHtml = tagNode.toHtml();//保存修改前的html /** * 组成所需要的class */ String className = name +"_style_"+(i++); buffer.append("."+className+"{"); buffer.append(style); buffer.append("}/n"); tagNode.removeAttribute(styleType);//除去style属性 String oldClass = tagNode.getAttribute("class")== null ?"":tagNode.getAttribute("class"); tagNode.setAttribute("class", oldClass+" " + className);//添加class属性 content = content.replace(oldHtml, tagNode.toHtml());//这是一招迫不得己的方法(一种有创意的方法^_^),,以修改后的内容替换修改前内容 } } //System.out.println(content); /** * 生成的class文件当然得写上页面。 * 这是取css/allPage.css这个名字 */ parser.setInputHTML(content); NodeFilter linkFilter = new TagNameFilter("link"); nodeList = parser.extractAllNodesThatMatch(linkFilter); System.out.println(nodeList.asString()); for(NodeIterator iterator=nodeList.elements();iterator.hasMoreNodes();){ Node node = iterator.nextNode(); TagNode tagNode = (TagNode)node; String oldHtml = tagNode.toHtml(); String newHtml = oldHtml+"/n"+"<link href="css/allPage.css" mce_href="css/allPage.css" rel='stylesheet' type='text/css' />"; System.out.println(newHtml); content = content.replace(oldHtml, newHtml); break; } writeFile(toFile,buffer.toString(),true);//写入class文件 writeFile(fromFile,content,false);//把修改后的内容写回原文件 } /** * 读文件并把内容转化为string * * @param file * @return * @throws IOException */ public String file2String(File file) throws IOException{ StringBuffer buffer = new StringBuffer(); FileReader fileReader = null; BufferedReader bufferedReader = null; String line; try { fileReader = new FileReader(file); bufferedReader = new BufferedReader(fileReader); while((line = bufferedReader.readLine()) != null) { buffer.append(line); buffer.append("/n"); } }catch(IOException e) { throw e; }finally { if(bufferedReader != null) { bufferedReader.close(); } if(fileReader != null) { fileReader.close(); } } return buffer.toString(); } /** * 把内容写入文件 * * @param file * @param content * @throws IOException */ public void writeFile(File file,String content,boolean canWrite) throws IOException { FileWriter fileWriter = null; BufferedWriter bufferWriter = null; try { if(file != null && file.canWrite()) { fileWriter = new FileWriter(file,canWrite); bufferWriter = new BufferedWriter(fileWriter); bufferWriter.append(content); bufferWriter.append("/n"); bufferWriter.flush(); } }catch(IOException e) { throw e; }finally { if(bufferWriter != null) { bufferWriter.close(); } if(fileWriter != null) { fileWriter.close(); } } } /** * html parser的测试方法。TagNameFilter测试td * */ public void testHtmlPage() { String inputHTML = "<html>" + "<head>" + "<title>Welcome to the HTMLParser website</title>" + "</head>" + "<body>" + "Welcome to HTMLParser" + "<table id=’table1′ >" + "<tr><td>1-11</td><td>1-12</td><td>1-13</td>" + "<tr><td>1-21</td><td>1-22</td><td>1-23</td>" + "<tr><td>1-31</td><td>1-32</td><td>1-33</td></table>" + "<table id=’table2′ >" + "<tr><td>2-11</td><td>2-12</td><td>2-13</td>" + "<tr><td>2-21</td><td>2-22</td><td>2-23</td>" + "<tr><td>2-31</td><td>2-32</td><td>2-33</td></table>" + "</body>" + "</html>"; Parser parser = new Parser(); try { parser.setInputHTML(inputHTML); parser.setEncoding(parser.getURL()); NodeFilter nf = new TagNameFilter("td"); NodeList list = parser.parse(nf); int i = 0; for (NodeIterator iterator = list.elements(); iterator .hasMoreNodes();) { TagNode node = (TagNode)iterator.nextNode(); System.out.println("testHtmlPage -node is :" + node.toHtml()); String old = node.toHtml(); node.setAttribute("class", "testClass"+i); i++; inputHTML = inputHTML.replace(old, node.toHtml()); System.out.println("testHtmlPage -node is=========== :" + node.toHtml()); } System.out.println(inputHTML); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * html parser的测试方法。TagNameFilter测试link * */ public void linkTest() throws ParserException { Parser parser = new Parser(); String content = "<head><title>Link Test</title>" + "<link href="’/test01/css.css’" mce_href="’/test01/css.css’" text=’text/css’ rel=’stylesheet’ />" + "<link href="’/test02/css.css’" mce_href="’/test02/css.css’" text=’text/css’ rel=’stylesheet’ />" + "</head>" + "<body>"; parser .setInputHTML(content); NodeFilter linkFilter = new TagNameFilter("link"); NodeList nodeList = parser.extractAllNodesThatMatch(linkFilter); for(NodeIterator iterator=nodeList.elements();iterator.hasMoreNodes();){ Node node = iterator.nextNode(); TagNode tagNode = (TagNode)node; String oldHtml = tagNode.toHtml(); String newHtml = oldHtml+"/n"+"<link href="css/allPage.css" mce_href="css/allPage.css" rel='stylesheet' type='text/css' />"; System.out.println(newHtml); content = content.replace(oldHtml, newHtml); break; } } } 方法可能有点不笨,望大家指教。