刚开始做的时候,在网上找了一下代码。发现WebXml.com.cn上面有直接可调用的webServices地址为http://webservice.webxml.com.cn/WebServices/WeatherWS.asmx。不过他对免费调用的用户有一定限制,请看http://www.webxml.com.cn/zh_cn/web_services_user.aspx。所以我打算自己写。
我发现很多网站包括WebXml都是从中国天气网(www.weather.com.cn)抓取的。经过分析,发现可以有2总方法或去数据:
1.直接掉用它的JS获取数据的方式。
这种方法获取简单,但数据量不多。
它本身就一个地址可直接获取JSON数据:http://m.weather.com.cn/data/天气代码.html。获取到的数据如下:
{"weatherinfo":{"city":"广州","city_en":"guangzhou","date_y":"2012年2月8日","date":"","week":"星期三","fchh":"11","cityid":"101280101","temp1":"14℃~10℃","temp2":"16℃~12℃","temp3":"18℃~10℃","temp4":"18℃~13℃","temp5":"20℃~15℃","temp6":"22℃~17℃","tempF1":"57.2℉~50℉","tempF2":"60.8℉~53.6℉","tempF3":"64.4℉~50℉","tempF4":"64.4℉~55.4℉","tempF5":"68℉~59℉","tempF6":"71.6℉~62.6℉","weather1":"阴","weather2":"多云","weather3":"阴转小雨","weather4":"小雨转阴","weather5":"多云转阴","weather6":"多云转雾","img1":"2","img2":"99","img3":"1","img4":"99","img5":"2","img6":"7","img7":"7","img8":"2","img9":"1","img10":"2","img11":"1","img12":"18","img_single":"2","img_title1":"阴","img_title2":"阴","img_title3":"多云","img_title4":"多云","img_title5":"阴","img_title6":"小雨","img_title7":"小雨","img_title8":"阴","img_title9":"多云","img_title10":"阴","img_title11":"多云","img_title12":"雾","img_title_single":"阴","wind1":"微风","wind2":"微风","wind3":"微风","wind4":"微风","wind5":"微风","wind6":"微风","fx1":"微风","fx2":"微风","fl1":"小于3级","fl2":"小于3级","fl3":"小于3级","fl4":"小于3级","fl5":"小于3级","fl6":"小于3级","index":"温凉","index_d":"建议着夹衣或西服套装加薄羊毛衫等春秋服装。年老体弱者宜着夹衣或风衣加羊毛衫。","index48":"温凉","index48_d":"建议着夹衣或西服套装加薄羊毛衫等春秋服装。年老体弱者宜着夹衣或风衣加羊毛衫。","index_uv":"最弱","index48_uv":"弱","index_xc":"较适宜","index_tr":"很适宜","index_co":"较舒适","st1":"14","st2":"9","st3":"16","st4":"12","st5":"19","st6":"10","index_cl":"较适宜","index_ls":"不太适宜","index_ag":"不易发"}}
要获取他,先建立一个实体类WeatherSimpleInfo:
/// <summary>简单天气实体</summary>
public class WeatherSimpleInfo
{
/// <summary>城市</summary>
public string city { get; set; }
/// <summary>时间</summary>
public string date_y { get; set; }
/// <summary>农历时间</summary>
public string date { get; set; }
/// <summary>ID</summary>
public string cityid { get; set; }
/// <summary>温度</summary>
public string temp1 { get; set; }
public string temp2 { get; set; }
/// <summary>天气</summary>
public string weather1 { get; set; }
public string weather2 { get; set; }
/// <summary>图片</summary>
public string img1 { get; set; }
public string img2 { get; set; }
public string img3 { get; set; }
public string img4 { get; set; }
/// <summary>风</summary>
public string wind1 { get; set; }
public string wind2 { get; set; }
/// <summary>穿衣简单指数</summary>
public string index { get; set; }
/// <summary>穿衣指数</summary>
public string index_d { get; set; }
/// <summary>紫外线简单指数</summary>
public string index_uv { get; set; }
/// <summary>洗车简单指数</summary>
public string index_xc { get; set; }
public string index_tr { get; set; }
public override string ToString()
{
var sb = new StringBuilder();
sb.AppendFormat("城市:{0}/r/n", this.city);
sb.AppendFormat("时间:{0}/r/n", this.date_y);
sb.AppendFormat("天气:{0}/r/n", this.weather1);
sb.AppendFormat("温度:{0}/r/n", this.temp1);
sb.AppendFormat("风:{0}/r/n", this.wind1);
sb.AppendFormat("穿衣指数:({0})。{1}/r/n", this.index, this.index_d);
sb.AppendFormat("紫外线指数:{0}/r/n", this.index_uv);
sb.AppendFormat("洗车指数:{0}/r/n", this.index_xc);
return sb.ToString();
}
}
抓取类:WeatherSimple:
/// <summary>简单天气预报数据操作(直接从JSON获取)</summary>
public class WeatherSimple
{
private const string __Url = "http://m.weather.com.cn/data/{0}.html";
public WeatherSimple() { }
public WeatherSimpleInfo weatherinfo { get; set; }
/// <summary>获取简单天气预报</summary>
/// <param name="name">城市代码</param>
/// <returns>天气预报</returns>
public static WeatherSimpleInfo Get(string code)
{
using(WebClient wc = new WebClient()){
wc.Encoding = Encoding.UTF8;
var url=string.Format(__Url,code);
string str = wc.DownloadString(url);
return SerializationHelper.JsonDeserialize<WeatherSimple>(str).weatherinfo;
}
}
}
2.抓取Html并分析提取数据的方式。
这种方法获取复杂,但可以获取较多天气数据。
天气网的http://www.weather.com.cn/html/weather/天气代码.shtml地址可以显示相关地区的天气信息。
用WebClient抓取数据后提取。很多人提取数据是使用正则表达式的方式,不过本人正则表达式用的比较烂,就选择XML的方式了。
众所周知,html本身是不标准的xml。不能直接通过XmlDocumentl来读取。这里我借助了SgmlReaderDll。他可以把Html转换成xml。
转换成功后我们就可以用xpath的方式来提取数据了,更具需求还可以提取不同的天气指数。具体提取请看原代码。
这样就完成了天气预报的提取。但这还不够,还必须有获取城市代码的方法.
获取城市代码可以直接使用http://www.weather.com.cn/data/listinfo/city上级城市代码.xml。去获取到的数据如下
01|北京,02|上海,03|天津,04|重庆,05|黑龙江,06|吉林,07|辽宁
我们可以通过Split他提取成包括名称和代码的城市数组。
有了城市代码后我们还需要通过城市名称获取代码的方法,所以我把它的所有城市和代码都下载到WeatherCity.Xml文件内了。见原代码。
按照公司的需求,还要有通过访问者IP获取天气的功能。这里我就想到了纯真IP库。用它直接获取IP所在的城市,但纯真IP库里面的城市是比较复杂的,他包括县、城市、区、甚至某个网吧的信息。这里我们要分析他,具体代码如下
/// <summary>更具IP获取城市代码</summary>
/// <param name="ipDataPath">IP库地址</param>
/// <param name="ip">IP地址</param>
/// <returns>城市代码</returns>
public static string GetCityCodeByIp(string ipDataPath,string ip){
IPLocation.IPLocation.IPLocate(ipDataPath,ip);
var country = IPLocation.IPLocation.Country;
var regex = new Regex("([^市省]+)市");
var match=regex.Match(country);
var area=string.Empty;
var province = string.Empty;
var city = string.Empty;
var matCity = Regex.Matches(country, "([^市省]+)市");
var matArea = Regex.Matches(country, "([^市省区]+)区");
var matProvince = Regex.Matches(country, "([^省]+)省");
if(matArea.Count>0){
area = matArea[0].Result("$1");
}
if (matProvince.Count > 0)
{
province = matProvince[0].Result("$1");
}
if (matCity.Count > 0)
{
city = matCity[matCity.Count - 1].Result("$1");
}
var code=string.Empty;
if (!string.IsNullOrEmpty(area))
{
code = GetCityCodeByName(area);
if (!string.IsNullOrEmpty(code))
{
return code;
}
}
if (!string.IsNullOrEmpty(city))
{
code = GetCityCodeByName(city);
if (!string.IsNullOrEmpty(code))
{
return code;
}
}
if (!string.IsNullOrEmpty(province))
{
code = GetCityCodeByName(province);
if (!string.IsNullOrEmpty(code))
{
return code;
}
}
if (string.IsNullOrEmpty(code)){
code = GetCityCodeByName(country.Substring(0, 2));
if (!string.IsNullOrEmpty(code))
{
return code;
}
}
return string.Empty;
}
如此整个天气预报获取就完成了,请下载代码Flashado.Weather.WInForm工程是测试程序,下载后大家可直接用它测试。
天气预报源代码 http://www.dyinfor.com/files/Flashado.Weather.rar