分析页面 提取email地址 程序

下载一个论坛的帖子 存有很多email地址 类似那种 给email 散书的那种帖子就可以 到d盘
比如 http://topic.csdn.net/t/20020908/19/1006665.html
emailPage.html



package i.tommy.test.Test;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class emailGetTest
{

public static void main(String[] args)
{
try
{
BufferedReader reader = new BufferedReader(new FileReader("D:\\emailPage.htm"));
String line = "";
while ((line = reader.readLine()) != null)
{
parseLine(line);
}
} catch (FileNotFoundException e)
{
e.printStackTrace();
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}

private static void parseLine(String line)
{
Pattern pattern=Pattern.compile("[\\w[._]]+@[\\w[._]]+\\.[\\w]+");
Matcher m=pattern.matcher(line);
while (m.find())
{
System.out.println(m.group());
}
}
}



结果

[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]

哥下一步是不是要发垃圾邮件了?? 呵呵

你可能感兴趣的:(分析页面 提取email地址 程序)