命名空间
using System.Text.RegularExpressions;
1.我们采集到的数据 比如有些日期很不规则 带有中文 比如2013年5月8日 2013年5月 这里提供把这类不规则的日期转化成datatime类型
public DateTime ConvertTime(string a) { DateTime dt; try { try { dt = Convert.ToDateTime(a); } catch { dt = ClearDate(a); } } catch { string newStr = ""; Regex re = new Regex(@"(\d{2,4})年(\d{1,2})月(\d{1,2})日*|(\d{2,4})年(\d{1,2})月*|(\d{1,2})月(\d{1,2})日*|(\d {2,4})年*", RegexOptions.None); string[] lines = re.Split(a); List<string> dates = new List<string>(); int n; if (lines.Length >= 4) { for (int i = 0; i < 4; i++) { if (int.TryParse(lines[i], out n)) { dates.Add(lines[i].ToString()); //为数字 } } } else { for (int i = 0; i < lines.Length; i++) { if (int.TryParse(lines[i], out n)) { dates.Add(lines[i].ToString()); //为数字 } } } if (dates.Count() == 1) { newStr = dates[0].ToString()+"-1-1"; } else { for (int s = 0; s < dates.Count(); s++) { if (s == dates.Count() - 1) { if (dates[s] == "" | dates[s] == "0" | dates[s] == "00") { dates[s] = "1"; } newStr += dates[s].ToString(); } else { if (dates[s] == "" | dates[s] == "0" | dates[s] == "00") { dates[s] = "1"; } newStr += dates[s].ToString() + "-"; } } } dt = Convert.ToDateTime(newStr); } return dt; } public DateTime ClearDate(string a) { Regex r = new Regex(@"([1-9]\d*\-{1}\d*\-{1}\d*)"); //开始匹配 Match m = r.Match(a); string newStr = ""; while (m.Success) { //匹配成功 newStr += m.Groups[0].Value; //从上一个匹配结束的位置开始下一个匹配 m = m.NextMatch(); } return Convert.ToDateTime(newStr); }
直接调用ConvertTime就可完成转换
2.带中文的字符串只保留数字和小数点
public double ConvertNumber(string a) { Regex r = new Regex(@"([1-9]\d*\.?\d*)|(0\.\d*[1-9])"); //开始匹配 Match m = r.Match(a); string newStr = ""; while (m.Success) { //匹配成功 newStr += m.Groups[0].Value; //从上一个匹配结束的位置开始下一个匹配 m = m.NextMatch(); } if (newStr == "") { newStr = "0.0"; } return Convert.ToDouble(newStr); }
3.去html标志
public static string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); // Htmlstring = System.Web.HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); Htmlstring = Htmlstring.Trim(); return Htmlstring; }