<tr>
<td>5345454354td><td>2010-3-29 13:48:33td><td>周杰伦td>
tr>
<tr>
<td>6565465466td><td>2010-3-29 15:34:38td><td>张学友td>
tr>
<tr>
<td>6546546546td><td>2010-3-30 19:30:50td><td>刘德华td>
tr>
<tr>
<td>9875646545td><td>2010-3-31 2:20:58td><td>郭富城td>
tr>
<tr>
<td>7868768768td><td>2010-3-31 8:03:11td><td>梁朝伟td>
tr>
若想取标记
之间的内容, 可以这样分析
<td>(.*?)</td>
string str = "..........";
string pstr = "(.*?) ";
MatchCollection mc = Regex.Matches(str, pstr);
for (int i = 0; i < mc.Count; i++)
{
Response.Write(mc[i].Result("$1"));
}
MatchCollection mc = Regex.Matches(html,@"(?is)(?<=).+?(?= )");
foreach(Match m in mc)
{
//Response.Write(m.Value);//web
MessageBox.Show(m.Value);
}
表达式说明
(?<=Expression)
逆序肯定环视,表示所在位置左侧能够匹配Expression
(? 逆序否定环视,表示所在位置左侧不能匹配Expression
(?=Expression)
顺序肯定环视,表示所在位置右侧能够匹配Expression
(?!Expression)
顺序否定环视,表示所在位置右侧不能匹配Expression
(?is)(?<=<td>).+?(?=</td>)
(?is)
模式修饰,i表示忽略大小写,s表示单行模式.能匹配回车换行(?<=) 逆序肯定环视,需要匹配的结果以开头,但是匹配,结果中不包含
.+?
任意字符,每次匹配到符合的(任意字符),即尝试匹配后面的表达式,直到后面的表达式失败,回溯上一次匹配结果。
(?=
)
顺序肯定环视,匹配的结果最后要以
结尾,但
不匹配,结果中不包含
xml
内容比dom4j
快50倍?long t1 = System.nanoTime();
String str = "1416900555 2348714844 1 1 1 0 ";
// Document doc = null;
// try {
// doc = DocumentHelper.parseText(str);
// } catch (DocumentException e) {
// log.error("解析群发xml错误:"+e.getMessage(), e);
// }
//
// Element root = doc.getRootElement();
// String msgid = root.elementTextTrim("MsgID");
// String Status = root.elementTextTrim("Status");
// String TotalCount = root.elementTextTrim("TotalCount");
// String FilterCount = root.elementTextTrim("FilterCount");
// String SentCount = root.elementTextTrim("SentCount");
// String ErrorCount = root.elementTextTrim("ErrorCount");
String msgid = RegExp.getString(str,
"(?<=)[\\s\\S]*?(?= )").trim();
String Status = RegExp.getString(str,
"(?<= )")
.trim();
String TotalCount = RegExp.getString(str,
"(?<=)[\\s\\S]*?(?= )")
.trim();
String FilterCount = RegExp.getString(str,
"(?<=)[\\s\\S]*?(?= )")
.trim();
String SentCount = RegExp.getString(str,
"(?<=)[\\s\\S]*?(?= )")
.trim();
String ErrorCount = RegExp.getString(str,
"(?<=)[\\s\\S]*?(?= )")
.trim();
long t2 = System.nanoTime();
log.info(t2-t1);
log.info((t2-t1)*0.000001);
log.info(msgid+", "+Status+", "+TotalCount+", "+FilterCount+", "+SentCount+", "+ErrorCount);
dom4j
运行结果:
2014-11-26 15:25:29,716 INFO [Test] 70 - <220279310>
2014-11-26 15:25:29,719 INFO [Test] 71 - <220.27930999999998>《==看这里
2014-11-26 15:25:29,719 INFO [Test] 72 - <2348714844, send success, 1, 1, 1, 0>
正则运行结果:
2014-11-26 15:28:08,575 INFO [Test] 70 - <4633684>
2014-11-26 15:28:08,578 INFO [Test] 71 - <4.633684>《==看这里
2014-11-26 15:28:08,578 INFO [Test] 72 - <2348714844, , 1, 1, 1, 0>
public class RegExp {
public static ArrayList<String> getStrs(String source, String regex) {
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(source);
ArrayList<String> list = new ArrayList();
while (m.find()) {
list.add(source.substring(m.start(), m.end()));
}
return list;
}
public static String getString(String source, String regex) {
ArrayList<String> list = getStrs(source, regex);
if (list.size() > 0) {
return (String) list.get(0);
}
return "";
}
public static ArrayList<String> getStrs(String source, String beginStr,
String endStr, boolean isLong) {
if (isLong) {
return getStrs(source,
"(?<=" + replay(beginStr) + ")[\\s\\S]*(?=" + replay(endStr) +
")");
}
return getStrs(source,
"(?<=" + replay(beginStr) + ")[\\s\\S]*?(?=" + replay(endStr) +
")");
}
public static String getString(String source, String beginStr,
String endStr, boolean isLong) {
if (isLong) {
return getString(source,
"(?<=" + replay(beginStr) + ")[\\s\\S]*(?=" + replay(endStr) +
")");
}
return getString(source,
"(?<=" + replay(beginStr) + ")[\\s\\S]*?(?=" + replay(endStr) +
")");
}
private static String replay(String source) {
String result = "";
result = source.replace("\\", "\\\\");
result = source.replace(".", "\\.");
result = result.replace("(", "\\(");
result = result.replace(")", "\\)");
result = result.replace("[", "\\[");
result = result.replace("]", "\\]");
result = result.replace("{", "\\{");
result = result.replace("}", "\\}");
result = result.replace("$", "\\$");
result = result.replace("?", "\\?");
result = result.replace("&", "\\&");
result = result.replace("*", "\\*");
result = result.replace("!", "\\!");
result = result.replace("^", "\\^");
result = result.replace("+", "\\+");
result = result.replace("#", "\\#");
return result;
}
}