使用爬虫技术抓取文件或页面中的email并存入数据库

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Crawler {

	public static void main(String[] args) {
		getMail();

	}
	
	public static void getMail() {
		try {
			URL url = new URL("http://localhost:8080/hibernate/");
			URLConnection conn = url.openConnection();
			BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
			
//			BufferedReader reader = new BufferedReader(new FileReader("src/mail.txt"));
			String mailreg = "\\w+@\\w+(\\.\\w+)+";
			Pattern pattern = Pattern.compile(mailreg);
			
			String line;
			Matcher matcher;
			Connection connection = null;
			Statement statement = null;
			Class.forName("com.mysql.jdbc.Driver");
			connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test","root","root");
			statement = connection.createStatement();
			while((line = reader.readLine()) != null) {
				matcher = pattern.matcher(line);
				while(matcher.find()) {
					String sql = "insert into email values(" + null + ",'" + matcher.group() + "')";
					statement.executeUpdate(sql);
					
					System.out.println(matcher.group());
				}
			}
			
			statement.close();
			connection.close();
			reader.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		
	}

}

你可能感兴趣的:(javase)