网络爬虫并存入MySql数据库

这次爬了天气网上北京某月的历史数据,最下面附了表的设计以及最后数据库中的结果

package net;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.util.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class Point{
	private Date date;
	private int zuigao;
	private int zuidi;
	private String tianqi;
	private String fengxiang;
	private String fengli;
	
	public Date getDate() {
		return date;
	}

	public void setDate(Date date) {
		this.date = date;
	}

	public int getZuigao() {
		return zuigao;
	}

	public void setZuigao(int zuigao) {
		this.zuigao = zuigao;
	}

	public int getZuidi() {
		return zuidi;
	}

	public void setZuidi(int zuidi) {
		this.zuidi = zuidi;
	}

	public String getTianqi() {
		return tianqi;
	}

	public void setTianqi(String tianqi) {
		this.tianqi = tianqi;
	}

	public String getFengxiang() {
		return fengxiang;
	}

	public void setFengxiang(String fengxiang) {
		this.fengxiang = fengxiang;
	}

	public String getFengli() {
		return fengli;
	}

	public void setFengli(String fengli) {
		this.fengli = fengli;
	}

	public Point() {
		super();
	}

	public Point(String date, int zuigao, int zuidi, String tianqi, String fengxiang, String fengli) {
		super();
		DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
		try {
			this.date = df.parse(date);
		} catch (ParseException e) {
			e.printStackTrace();
		}
		this.zuigao = zuigao;
		this.zuidi = zuidi;
		this.tianqi = tianqi;
		this.fengxiang = fengxiang;
		this.fengli = fengli;
	}
}

public class SpiderTest {
	
	public static String getContent(String urlStr, String s) {
		StringBuilder sb = new StringBuilder();
		URL url;
		try {
			url = new URL(urlStr);
			BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream(), s));
			String temp = "";
			while((temp=bf.readLine())!=null) {
				sb.append(temp);
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return sb.toString();
		
	}
	
	public static List getStr(String des, String regex){
		Pattern p = Pattern.compile(regex);
		Matcher m = p.matcher(des);
		List list = new ArrayList();
		while(m.find()) {
			Point pp = new Point(m.group(1),Integer.parseInt(m.group(2)),Integer.parseInt(m.group(3)),m.group(4),m.group(5),m.group(6));
			list.add(pp);
		}
		return list;
	}
	
	public static void mySql(String sql,List list) {
		String jdbc = "com.mysql.jdbc.Driver";
		String user = "root";
		String password = "123456";
		String url = "jdbc:mysql://localhost/t_user";
		Connection con = null;
		PreparedStatement ps = null;
		try {
			Class.forName(jdbc);
			con = DriverManager.getConnection(url, user, password);
			
			ps = con.prepareStatement(sql);
			for(int i = 0; i list = getStr(des, "href.*?>([\\d-]+)[\\s\\S]+?(\\d+)[\\s\\S\\w]+?(\\d+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\d\\u4e00-\\u9fa5]+)");
		mySql("insert into tianqi(date,zuigao,zuidi,tianqi,fengxiang,fengli)values(?,?,?,?,?,?)",list);
		for(Point temp: list) {
			System.out.println(prt(temp));
		}
	}
}



网络爬虫并存入MySql数据库_第1张图片
网络爬虫并存入MySql数据库_第2张图片

你可能感兴趣的:(网络爬虫并存入MySql数据库)