取HTML的中文字

/// <summary>

/// 从html中提取纯文本

/// </summary>

/// <param name="strHtml"></param>

/// <returns></returns>

public static string GetHtmlContentText(string strHtml)

{

if (string.IsNullOrEmpty(strHtml))

return "";

Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase);

string strOutput = regex.Replace(strHtml, "");//替换掉"<"和">"之间的内容

strOutput = strOutput.Replace("<", "");

strOutput = strOutput.Replace(">", "");

strOutput = strOutput.Replace("&nbsp;", "");

return strOutput;

}

你可能感兴趣的:(取HTML的中文字)