去除网页源代码中的网页标签,只留文字部分 asp 函数

<%           
Function RemoveHTML( strText )
     Dim TAGLIST
     TAGLIST = ";!--;!DOCTYPE;A;ACRONYM;ADDRESS;APPLET;AREA;B;BASE;BASEFONT;" &_
               "BGSOUND;BIG;BLOCKQUOTE;BODY;BR;BUTTON;CAPTION;CENTER;CITE;CODE;" &_
               "COL;COLGROUP;COMMENT;DD;DEL;DFN;DIR;DIV;DL;DT;EM;EMBED;FIELDSET;" &_
               "FONT;FORM;FRAME;FRAMESET;HEAD;H1;H2;H3;H4;H5;H6;HR;HTML;I;IFRAME;IMG;" &_
               "INPUT;INS;ISINDEX;KBD;LABEL;LAYER;LAGEND;LI;LINK;LISTING;MAP;MARQUEE;" &_
               "MENU;META;NOBR;NOFRAMES;NOSCRIPT;OBJECT;OL;OPTION;P;PARAM;PLAINTEXT;" &_
               "PRE;Q;S;SAMP;SCRIPT;SELECT;SMALL;SPAN;STRIKE;STRONG;STYLE;SUB;SUP;" &_
               "TABLE;TBODY;TD;TEXTAREA;TFOOT;TH;THEAD;TITLE;TR;TT;U;UL;VAR;WBR;XMP;"

     Const BLOCKTAGLIST = ";APPLET;EMBED;FRAMESET;HEAD;NOFRAMES;NOSCRIPT;OBJECT;SCRIPT;STYLE;"
   
     Dim nPos1
     Dim nPos2
     Dim nPos3
     Dim strResult
     Dim strTagName
     Dim bRemove
     Dim bSearchForBlock
   
     nPos1 = InStr(strText, "<")
     Do While nPos1 > 0
         nPos2 = InStr(nPos1 + 1, strText, ">")
         If nPos2 > 0 Then
             strTagName = Mid(strText, nPos1 + 1, nPos2 - nPos1 - 1)
      strTagName = Replace(Replace(strTagName, vbCr, " "), vbLf, " ")

             nPos3 = InStr(strTagName, " ")
             If nPos3 > 0 Then
                 strTagName = Left(strTagName, nPos3 - 1)
             End If
           
             If Left(strTagName, 1) = "/" Then
                 strTagName = Mid(strTagName, 2)
                 bSearchForBlock = False
             Else
                 bSearchForBlock = True
             End If
           
             If InStr(1, TAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
                 bRemove = True
                 If bSearchForBlock Then
                     If InStr(1, BLOCKTAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
                         nPos2 = Len(strText)
                         nPos3 = InStr(nPos1 + 1, strText, "</" & strTagName, vbTextCompare)
                         If nPos3 > 0 Then
                             nPos3 = InStr(nPos3 + 1, strText, ">")
                         End If
                       
                         If nPos3 > 0 Then
                             nPos2 = nPos3
                         End If
                     End If
                 End If
             Else
                 bRemove = False
             End If
           
             If bRemove Then
                 strResult = strResult & Left(strText, nPos1 - 1)
                 strText = Mid(strText, nPos2 + 1)
             Else
                 strResult = strResult & Left(strText, nPos1)
                 strText = Mid(strText, nPos1 + 1)
             End If
         Else
             strResult = strResult & strText
             strText = ""
         End If
       
         nPos1 = InStr(strText, "<")
     Loop
     strResult = strResult & strText
   
     RemoveHTML = strResult
End Function
'---------------------------------------------

'注:以上是我翻译的 www.codeproject.com 上的一篇老外的文章,自认为比较好。

'--------------------------------------------

Function stripHTML(strtext)
dim arysplit,i,j, strOutput
arysplit=split(strtext,"<")
if len(arysplit(0))>0 then j=1 else j=0
for i=j to ubound(arysplit)
   if instr(arysplit(i),">") then
     arysplit(i)=mid(arysplit(i),instr(arysplit(i),">")+1)
   else
     arysplit(i)="<" & arysplit(i)
   end if
next
strOutput = join(arysplit, "")
strOutput = mid(strOutput, 2-j)
strOutput = replace(strOutput,">",">")
strOutput = replace(strOutput,"<","<")
strOutput = replace(strOutput," ","")
stripHTML = strOutput
End Function
 %>

你可能感兴趣的:(JOIN,function,asp)