使用.NET实现一个从大众点评抓取一些基础数据(商家名称,地址,电话,经纬度)的小程序。
实现逻辑:
1、以异步方式从列表上获取前三项(商家名称、地址、电话);
2、把获取下来的数据保存到数据库表里;
3、把存在数据表里的地址信息读取出来,通过调用QQ地图API把地址转化成经纬度;
4、按行更新GIS信息。
代码如下:
using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Data; using System.Data.SqlClient; using System.Diagnostics; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using System.Xml; namespace DianPing_MeiFa { public partial class test : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { // string url = "http://www.dianping.com/search/category/2/50/p"; //this.lblNames.Text = "begin..."; //for (int i = 1; i < 51; i++)//循环分页 //{ // string url = "http://www.dianping.com/search/category/2/50/p"; // url += i; // this.SaveMeiFaData(url); //} this.setMapLocation(); } /// <summary> /// 获取数据 /// </summary> /// <returns></returns> private DataTable GetMeifa() { string sql = "SELECT * FROM t_meifa"; DataSet ds = SqlHelper.ExecuteDataset(SqlHelper.GetConnection(), CommandType.Text, sql); return ds.Tables[0]; } private void setMapLocation() { DataTable dt = this.GetMeifa(); IList<MeiFa> mfList = new List<MeiFa>(); //将DataTable转化成对象 foreach (DataRow dr in dt.Rows) { MeiFa mf = new MeiFa { Id = int.Parse(dr["id"].ToString()), Name = dr["name"].ToString(), Address = dr["address"].ToString() }; mfList.Add(mf); } //设置获经纬度 foreach (MeiFa mf in mfList) { QQMapGeocoder qmg = this.GetGeocoder(mf.Address); mf.lat = qmg.result.location.lat; mf.lng = qmg.result.location.lng; this.UpdateMeiFaLocation(mf); } } private void UpdateMeiFaLocation(MeiFa mf) { string sql = "UPDATE t_meifa_bak SET lng=@lng,lat=@lat WHERE id=@id"; SqlParameter[] sps ={ new SqlParameter("@lng",mf.lng), new SqlParameter("@lat",mf.lat), new SqlParameter("@id",mf.Id), }; SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps); } /// <summary> /// 根据地理位置获取经纬度 /// </summary> /// <param name="address">地址</param> /// <returns></returns> private QQMapGeocoder GetGeocoder(string address) { string apiMapUrl = "http://apis.map.qq.com/ws/geocoder/v1/?region=北京&address={0}&key=Y5QBZ-DEDR4-3W3U7-XL37W-VVMT6-3KB6K"; apiMapUrl = string.Format(apiMapUrl, address); HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(apiMapUrl); request.Method = "GET"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream newstream = response.GetResponseStream(); StreamReader srRead = new StreamReader(newstream, Encoding.UTF8); string json = srRead.ReadToEnd(); QQMapGeocoder qmg = QQMapGeocoder.DeserializeGeocoder(json); return qmg; } /// <summary> /// 将列表保存到数据库 /// </summary> /// <param name="url"></param> private void SaveMeiFaData(string url) { HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url); request.Method = "GET"; request.KeepAlive = true; request.ContentType = "application/x-www-form-urlencoded"; request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream newstream = response.GetResponseStream(); StreamReader srRead = new StreamReader(newstream, Encoding.UTF8); string outString = srRead.ReadToEnd(); IList<MeiFa> mfList = this.getMeiFaList(outString); this.InsertDb(mfList); } private string GetContent(IList<string> list) { string str = string.Empty; foreach (string s in list) { str += s; } return str; } /// <summary> /// 将列表数据转化成对象 /// </summary> /// <param name="html"></param> /// <returns></returns> public IList<MeiFa> getMeiFaList(string html) { IList<MeiFa> mfList = new List<MeiFa>(); string reg = @"<ul[^>]*class=""detail""[^>]*>[\s\S]*?</ul>"; MatchCollection mc = Regex.Matches(html, reg); foreach (Match m in mc) { string strDom = m.Value; strDom = strDom.Replace(" ", " "); MeiFa mf = MeiFa.CreateMeifa(strDom); mfList.Add(mf); } return mfList; } public string ReplaceHtml(string HTMLStr) { return Regex.Replace(HTMLStr, "<[^>]*>", ""); } public void InsertDb(IList<MeiFa> mfs) { foreach (var mf in mfs) { if (string.IsNullOrEmpty(mf.Name)) continue; try { this.InsertDb(mf); } catch { continue; } } } /// <summary> /// 插入到数据表里 /// </summary> /// <param name="mf"></param> public void InsertDb(MeiFa mf) { string sql = "INSERT INTO t_meifa(name,address,tel) values(@name,@address,@tel)"; SqlParameter[] sps ={ new SqlParameter("@name",mf.Name), new SqlParameter("@address",mf.Address), new SqlParameter("@tel",mf.Tel), }; SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps); } public class MeiFa { public int Id { get; set; } public string Name { get; set; } public string Address { get; set; } public string Tel { get; set; } public float? lng { get; set; } public float? lat { get; set; } public MeiFa() { } public static MeiFa CreateMeifa(string domStr) { MeiFa m = new MeiFa(); try { Debug.WriteLine(domStr); XmlDocument dom = new XmlDocument(); dom.LoadXml(domStr); XmlNode nameNode = dom.SelectSingleNode("//ul/li[@class=\"shopname\"]/a[@data-hippo-type=\"shop\"]"); m.Name = nameNode.InnerText; XmlNode addressNode = dom.SelectSingleNode("//ul/*/li[@class=\"address\"]"); string at = ReplaceAddress(addressNode.InnerText); string[] ats = getArr(at); m.Address = ats[0]; m.Tel = ats[1]; } catch { m = new MeiFa(); } return m; } /// <summary> /// 生成数组 /// </summary> /// <param name="str"></param> /// <returns></returns> static private string[] getArr(string str) { string[] tempArr = str.Split(' '); List<string> ss = new List<string>(); for (int i = 0; i < tempArr.Length; i++) { if (!string.IsNullOrEmpty(tempArr[i])) { ss.Add(tempArr[i]); } } return ss.ToArray(); } static private string ReplaceAddress(string s) { s = s.Replace("地址:", ""); s = s.Replace("\n", ""); return s; } } } }
using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; using System.Web; namespace DianPing_MeiFa { //[JsonProperty] public class QQMapGeocoder { public int status { get; set; } public string message { get; set; } public QQMapResult result { get; set; } public static QQMapGeocoder DeserializeGeocoder(string jsonStr) { JsonSerializerSettings jsz = new JsonSerializerSettings(); QQMapGeocoder qg = JsonConvert.DeserializeObject<QQMapGeocoder>(jsonStr, jsz); return qg; } } public class QQMapResult { public QQMapLocation location { get; set; } public QQMapAddressComponents address_components { get; set; } public string similarity { get; set; } } public class QQMapLocation { public float? lng { get; set; } public float? lat { get; set; } } public class QQMapAddressComponents { public string province { get; set; } public string city { get; set; } public string district { get; set; } public string street { get; set; } public string street_number { get; set; } } }
注:仅供学习使用!