自己实现HTML-Beautify - 2


文章截图 - 更好的排版
源代码下载

上一篇文章,我们按照 http://jsbeautifier.org/ 的基本框架,用我们自己的语言实现了对Html简单的格式化功能。
今天我们来完善这段JavaScript脚本,主要添加如下功能:
  • 删除多余的空格,回车换行以及制表符
  • 保持script和style标签里面的内容格式不变化
  • 注释部分的格式不变化
  • 标签属性赋值等号前后不能有空格(type="text/javascript")
  • 自封闭标签/>之前必须有空格(<br />)
源代码:
//  优化过的HTML-Beautify
     function  HtmlBeautify(source, indent_value) {
        
this .source  =  source;
        
this .indent_value  =  indent_value;
        
this .result  =   "" ;

        
this .parse();
    }

    
//  分析并产生输出到this.result
    HtmlBeautify.prototype.parse  =   function () {
        
var  that  =   this ;
        
//  当前分析到哪个字符,当前标记值,标记类型,
         //  输出数组,缩进级别,当前格式化内容(去掉多余空格)
         var  pos  =   0 , token_value  =   "" , token_type  =   "" ,
        output 
=  [], indent_level  =   0 , is_format_content  =   true ;

        
//  把这些标签作为Single Tag
         var  single_token  =   " br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed " .split( ' , ' );
        
var  white_space  =   " \r\n\t  " .split( "" );

        
//  获取下一个标记(首先获取正文,如果正文为空则获取标签)
         function  nextToken() {
            
var  token_value_array  =  [], val  =   "" , space  =   false ;

            
//  "&lt;"之前的所有内容作为正文标签
             while  ((val  =  that.source[pos])  !==   " &lt; " ) {
                
if  (pos  & gt; =  that.source.length) {
                    token_type 
=   " END " ;
                    
return ;
                }

                
if  (is_format_content) {
                    
if  ($.inArray(val, white_space)  & gt; =   0 ) {
                        space 
=   true ;
                        pos
++ ;
                        
continue ;
                    }

                    
if  (space) {
                        token_value_array.push(
"   " );
                        space 
=   false ;
                    }
                }
                token_value_array.push(val);
                pos
++ ;
            }

            token_value 
=  token_value_array.join( "" ).replace( / ^\n+|\n$ / g, "" );
            
if  ($.trim(token_value)  ===   "" ) {
                
//  如果正文标记为空,则获取标签标记
                 if ( ! is_format_content) {
                    is_format_content 
=   true ;
                }
                nextTokenTag();
            } 
else  {
                token_type 
=   " CONTENT " ;
            }
        }

        
//  下一个标签标记
         function  nextTokenTag() {
            
var  token_value_array  =  [], val  =   "" ,
                tagName 
=   "" , space  =   false , is_comment  =   false ;
            
            
//  这是一个注释标签
             if (that.source[pos  +   1 ===   " ! "   &&  
                that.source[pos 
+   2 ===   " - "   &&
                that.source[pos 
+   3 ===   " - " ) {
                is_comment 
=   true ;
            }

            
//  获取标签标记,直到遇到"&gt;"
             do  {
                val 
=  that.source[pos];

                
if  ( ! is_comment) {
                    
//  如果此字符为空格换行制表符,则跳过此字符
                     if  ($.inArray(val, white_space)  & gt; =   0 ) {
                        space 
=   true ;
                        pos
++ ;
                        
continue ;
                    }

                    
if  (space) {
                        
if (token_value_array[token_value_array.length  -   1 !==   " = "   &&  val  !==   " = " ) {
                            token_value_array.push(
"   " );
                        }
                        space 
=   false ;
                    }
                }

                
if (val  ===   " / "   &&  that.source[pos  +   1 ===   " &gt; "   &&  token_value_array[token_value_array.length  -   1 !==   "   " ) {
                    token_value_array.push(
"   " );
                }

                token_value_array.push(val);
                pos
++ ;
            } 
while  (val  !==   " &gt; " );

            token_value 
=  $.trim(token_value_array.join( "" ));
            
//  当前标签的名称(小写)
            tagName  =  getTagName();
            
            
if (is_comment) {
                token_type 
=   " SINGLE_TAG " ;
            } 
else  {
                
if  (token_value[ 1 ===   " / " ) {
                    
//  token_value以"&lt;/"开始,则认为是结束标签
                    token_type  =   " END_TAG " ;
                } 
else   if  ($.inArray(tagName, single_token)  & gt; =   0   ||  token_value[token_value.length  -   2 ===   " / " ) {
                    
//  如果标签在single_token或者token_value以"/&gt;"结尾,则认为是独立标签
                     //  这种判断没有考虑这种情况:"&lt;br&gt;&lt;/br&gt;"
                    token_type  =   " SINGLE_TAG " ;
                } 
else  {
                    token_type 
=   " START_TAG " ;
                    
if  (tagName  ===   " script "   ||  tagName  ===   " style " ) {
                        is_format_content 
=   false ;
                    }
                }
            }
        }

        
function  getTagName() {
            
var  tagName  =  token_value.substr( 1 , token_value.length  -   2 );
            
var  spaceIndex  =  tagName.indexOf( "   " );
            
if  (spaceIndex  & gt;  0 ) {
                tagName 
=  tagName.substr( 0 , spaceIndex);
            }
            
return  tagName.toLowerCase();
        }

        
//  输出当前标记
         function  outputToken() {
            output.push(token_value);
        }
        
//  输出新行
         function  outputLine() {
            output.push(
" \n " );
        }
        
//  输出缩进
         function  outputIndent() {
            
for  ( var  i  =   0 ; i  & lt; indent_level; i ++ ) {
                output.push(that.indent_value);
            }
        }

        
//  parse的主体函数,循环获取下一个Token
         while  ( true ) {
            nextToken();

            
//  当前Token为结束标记
             if  (token_type  ===   " END " ) {
                
break ;
            }

            
switch  (token_type) {
                
case   " START_TAG " :
                    
//  我们对缩进的控制非常简单,开始标签后缩进一个单位
                    outputLine();
                    outputIndent();
                    outputToken();
                    indent_level
++ ;
                    
break ;
                
case   " END_TAG " :
                    
//  结束标签前减少一个单位缩进
                    indent_level -- ;
                    outputLine();
                    outputIndent();
                    outputToken();
                    
break ;
                
case   " SINGLE_TAG " :
                    outputLine();
                    outputIndent();
                    outputToken();
                    
break ;
                
case   " CONTENT " :
                    outputLine();
                    
if (is_format_content) {
                        outputIndent();
                    }
                    outputToken();
                    
break ;
            }
        }
        
//  去除最前面的"\n"
         this .result  =  output.join( "" ).substr( 1 );
    };

    $(
function () {
        $(
" #format " ).click( function () {

            
//  实例化HtmlBeautify,传递需要解析的HTML片段和缩进字符串
             var  beautify  =   new  HtmlBeautify($( " #content " ).val(),  "      " );
            $(
" #content " ).val(beautify.result);

        });
    });

此时的代码已经有点复杂了,而对于更加复杂的环境,比如对JavaScript的格式化如果还是这样手工解析势必会更加复杂,或许有更好的解决办法。

你可能感兴趣的:(html)