获取免费天气(Java抓取百度天气)

因为要用到天气信息,在网上找了免费的api,不是有调用限制就是返回的结果不满意,看了百度的比较合适,决定用爬虫抓下来

在百度上搜索"北京天气",将浏览器的地址复制,作为请求地址



添加jar包

	
    
		org.jsoup
		jsoup
		1.10.3
	

抓取程序

package com.orange.utils;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public class TestWeather {
	private static String weatherUrl = "http://www.baidu.com/baidu?tn=monline_3_dg&ie=utf-8&wd=%E5%8C%97%E4%BA%AC%E5%A4%A9%E6%B0%94";
	
	public static void getWeather() {
		String userAgent = UserAgentUtil.getUserAgents();
		try {
			Document doc = Jsoup.connect(weatherUrl).userAgent(userAgent).timeout(5000).get();
			Elements a = doc.getElementsByClass("op_weather4_twoicon").get(0).getElementsByTag("a");

			for (Element element : a) {
				String quality = "";
				String current = "";
				String today = "";

				//只有当天才有实时温度
				if (!element.getElementsByClass("op_weather4_twoicon_shishi_title").isEmpty()) {
					current = element.getElementsByClass("op_weather4_twoicon_shishi_title").text();
				}
				//空气质量
				if (!element.getElementsByClass("op_weather4_twoicon_aqi_text_today").isEmpty()) {
					quality = element.getElementsByClass("op_weather4_twoicon_aqi_text_today").text();
				}else {
					quality = element.getElementsByClass("op_weather4_twoicon_aqi_text").text();
				}
				//日期
				if (!element.getElementsByClass("op_weather4_twoicon_date").isEmpty()) {
					today = element.getElementsByClass("op_weather4_twoicon_date").text();
				}else {
					today = element.getElementsByClass("op_weather4_twoicon_date_day").text();
				}
				//风
				String wind = element.getElementsByClass("op_weather4_twoicon_wind").text();
				//天气
				String weath = element.getElementsByClass("op_weather4_twoicon_weath").text();
				//气温
				String temp = element.getElementsByClass("op_weather4_twoicon_temp").text();
				
				System.out.println(quality);
				System.out.println(current);
				System.out.println(today);
				System.out.println(wind);
				System.out.println(weath);
				System.out.println(temp);
				System.out.println("=============================");
			}
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
		
	public static void main(String[] args) {
		getWeather();
	}

}、

UserAgentUtil 类 (作用:设置浏览器版本,每次抓取的时候随机取一个浏览器版本,模拟不同浏览器访问,可能并没什么用)

package com.orange.utils;

import java.util.Random;

public class UserAgentUtil {

	private static final String[] USER_AGENTS = {
         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
         "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
         "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
         "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
         "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
         "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
         "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
         "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
         "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
         "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
         "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
         "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
	};
	
	
	public static String getUserAgents() {
		Random random = new Random();
		int index = random.nextInt(USER_AGENTS.length);
		return USER_AGENTS[index];
	}

	public static String getUserAgents(int index) {
		if (index < 0 || index > USER_AGENTS.length) {
			return getUserAgents();
		}
		return USER_AGENTS[index];
	}
	
}
 
  

运行程序:

=============================
轻度
35
06月05日 周二 农历四月廿二 
西南风4-5级
多云
25 ~ 38℃
=============================
中度


周三
北风3级
多云
25 ~ 36℃
=============================



周四
南风3级
多云
23 ~ 34℃
=============================



周五
北风3级
多云转雷阵雨
20 ~ 33℃
=============================



周六
北风3级
雷阵雨转小雨
17 ~ 26℃

你可能感兴趣的:(数据抓取/爬虫)