获取大众点评数据

使用.NET实现一个从大众点评抓取一些基础数据(商家名称,地址,电话,经纬度)的小程序。

实现逻辑:

1、以异步方式从列表上获取前三项(商家名称、地址、电话);

2、把获取下来的数据保存到数据库表里;

3、把存在数据表里的地址信息读取出来,通过调用QQ地图API把地址转化成经纬度;

4、按行更新GIS信息。

 

代码如下:

using Newtonsoft.Json;

using System;

using System.Collections.Generic;

using System.Data;

using System.Data.SqlClient;

using System.Diagnostics;

using System.IO;

using System.Linq;

using System.Net;

using System.Text;

using System.Text.RegularExpressions;

using System.Web;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Xml;



namespace DianPing_MeiFa

{

    public partial class test : System.Web.UI.Page

    {

        protected void Page_Load(object sender, EventArgs e)

        {

           // string url = "http://www.dianping.com/search/category/2/50/p";

            //this.lblNames.Text = "begin...";

            //for (int i = 1; i < 51; i++)//循环分页

            //{

            //    string url = "http://www.dianping.com/search/category/2/50/p";

            //    url += i;

            //    this.SaveMeiFaData(url);

            //}

            this.setMapLocation();

        }



        /// <summary>

        /// 获取数据

        /// </summary>

        /// <returns></returns>

        private DataTable GetMeifa()

        {

            string sql = "SELECT * FROM t_meifa";

            DataSet ds = SqlHelper.ExecuteDataset(SqlHelper.GetConnection(), CommandType.Text, sql);

            return ds.Tables[0];

        }





        private void setMapLocation()

        {

            DataTable dt = this.GetMeifa();

            IList<MeiFa> mfList = new List<MeiFa>();

            //将DataTable转化成对象

            foreach (DataRow dr in dt.Rows)

            {

                MeiFa mf = new MeiFa

                {

                    Id = int.Parse(dr["id"].ToString()),

                    Name = dr["name"].ToString(),

                    Address = dr["address"].ToString()

                };

                mfList.Add(mf);

            }



            //设置获经纬度

            foreach (MeiFa mf in mfList)

            {

                QQMapGeocoder qmg = this.GetGeocoder(mf.Address);

                mf.lat = qmg.result.location.lat;

                mf.lng = qmg.result.location.lng;



                this.UpdateMeiFaLocation(mf);

            }



        }



        private void UpdateMeiFaLocation(MeiFa mf)

        {

            string sql = "UPDATE t_meifa_bak SET lng=@lng,lat=@lat WHERE id=@id";

            SqlParameter[] sps ={

                                    new SqlParameter("@lng",mf.lng),

                                    new SqlParameter("@lat",mf.lat),

                                    new SqlParameter("@id",mf.Id),

                                };



            SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);

        }



        /// <summary>

        /// 根据地理位置获取经纬度

        /// </summary>

        /// <param name="address">地址</param>

        /// <returns></returns>

        private QQMapGeocoder GetGeocoder(string address)

        {

            string apiMapUrl = "http://apis.map.qq.com/ws/geocoder/v1/?region=北京&address={0}&key=Y5QBZ-DEDR4-3W3U7-XL37W-VVMT6-3KB6K";

            apiMapUrl = string.Format(apiMapUrl, address);

            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(apiMapUrl);

            request.Method = "GET";



            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            Stream newstream = response.GetResponseStream();

            StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);

            string json = srRead.ReadToEnd();

            QQMapGeocoder qmg = QQMapGeocoder.DeserializeGeocoder(json);

            return qmg;

        }



        /// <summary>

        /// 将列表保存到数据库

        /// </summary>

        /// <param name="url"></param>

        private void SaveMeiFaData(string url)

        {

            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);

            request.Method = "GET";

            request.KeepAlive = true;

            request.ContentType = "application/x-www-form-urlencoded";



            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

            request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8";



            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            Stream newstream = response.GetResponseStream();

            StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);

            string outString = srRead.ReadToEnd();



            IList<MeiFa> mfList = this.getMeiFaList(outString);

            this.InsertDb(mfList);

        }





        private string GetContent(IList<string> list)

        {

            string str = string.Empty;

            foreach (string s in list)

            {

                str += s;

            }

            return str;

        }



        /// <summary>

        /// 将列表数据转化成对象

        /// </summary>

        /// <param name="html"></param>

        /// <returns></returns>

        public IList<MeiFa> getMeiFaList(string html)

        {

            IList<MeiFa> mfList = new List<MeiFa>();

            string reg = @"<ul[^>]*class=""detail""[^>]*>[\s\S]*?</ul>";

            MatchCollection mc = Regex.Matches(html, reg);

            foreach (Match m in mc)

            {

                string strDom = m.Value;

                strDom = strDom.Replace("&nbsp;", " ");

                MeiFa mf = MeiFa.CreateMeifa(strDom);

                mfList.Add(mf);

            }

            return mfList;

        }



        public string ReplaceHtml(string HTMLStr)

        {

            return Regex.Replace(HTMLStr, "<[^>]*>", "");

        }



        public void InsertDb(IList<MeiFa> mfs)

        {

            foreach (var mf in mfs)

            {

                if (string.IsNullOrEmpty(mf.Name))

                    continue;



                try

                {

                    this.InsertDb(mf);

                }

                catch

                {

                    continue;

                }

            }

        }



        /// <summary>

        /// 插入到数据表里

        /// </summary>

        /// <param name="mf"></param>

        public void InsertDb(MeiFa mf)

        {

            string sql = "INSERT INTO t_meifa(name,address,tel) values(@name,@address,@tel)";



            SqlParameter[] sps ={

                                    new SqlParameter("@name",mf.Name),

                                    new SqlParameter("@address",mf.Address),

                                    new SqlParameter("@tel",mf.Tel),

                                };







            SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);

        }





        public class MeiFa

        {

            public int Id { get; set; }

            public string Name { get; set; }

            public string Address { get; set; }

            public string Tel { get; set; }

            public float? lng { get; set; }

            public float? lat { get; set; }



            public MeiFa()

            {



            }



            public static MeiFa CreateMeifa(string domStr)

            {

                MeiFa m = new MeiFa();

                try

                {

                    Debug.WriteLine(domStr);



                    XmlDocument dom = new XmlDocument();

                    dom.LoadXml(domStr);



                    XmlNode nameNode = dom.SelectSingleNode("//ul/li[@class=\"shopname\"]/a[@data-hippo-type=\"shop\"]");

                    m.Name = nameNode.InnerText;



                    XmlNode addressNode = dom.SelectSingleNode("//ul/*/li[@class=\"address\"]");

                    string at = ReplaceAddress(addressNode.InnerText);

                    string[] ats = getArr(at);

                    m.Address = ats[0];

                    m.Tel = ats[1];

                }

                catch

                {

                    m = new MeiFa();

                }

                return m;

            }

            /// <summary>

            /// 生成数组

            /// </summary>

            /// <param name="str"></param>

            /// <returns></returns>

            static private string[] getArr(string str)

            {

                string[] tempArr = str.Split(' ');

                List<string> ss = new List<string>();

                for (int i = 0; i < tempArr.Length; i++)

                {

                    if (!string.IsNullOrEmpty(tempArr[i]))

                    {

                        ss.Add(tempArr[i]);

                    }

                }



                return ss.ToArray();

            }



            static private string ReplaceAddress(string s)

            {

                s = s.Replace("地址:", "");

                s = s.Replace("\n", "");

                return s;

            }

        }

    }

}

 

using Newtonsoft.Json;

using System;

using System.Collections.Generic;

using System.Linq;

using System.Web;



namespace DianPing_MeiFa

{

    //[JsonProperty]

    public class QQMapGeocoder

    {

        public int status { get; set; }

        public string message { get; set; }

        public QQMapResult result { get; set; }



        public static QQMapGeocoder DeserializeGeocoder(string jsonStr)

        {

            JsonSerializerSettings jsz = new JsonSerializerSettings();

            QQMapGeocoder qg = JsonConvert.DeserializeObject<QQMapGeocoder>(jsonStr, jsz);

            return qg;

        }

    }



    public class QQMapResult

    {

        public QQMapLocation location { get; set; }

        public QQMapAddressComponents address_components { get; set; }

        public string similarity { get; set; }

    }



    public class QQMapLocation

    {

        public float? lng { get; set; }

        public float? lat { get; set; }

    }



    public class QQMapAddressComponents

    {

        public string province { get; set; }

        public string city { get; set; }

        public string district { get; set; }

        public string street { get; set; }

        public string street_number { get; set; }



    }

    

}

 

注:仅供学习使用!

你可能感兴趣的:(数据)