多站点图片爬虫框架

调用方法:

 protected void Page_Load(object sender, EventArgs e)
    {
        IDatabaseDAO objBook = DAOFactory.CreateBookDAO();

        string sql = string.Format("select top 1 * from BookInfo with(nolock) where Img is null or img=''");
        DataTable dt = objBook.ExecuteDataSetSQL(sql).Tables[0];
        StringBuilder sb = new StringBuilder();

        for (int i = 0; i < dt.Rows.Count; i++)
        {
            NewImg ni = new NewImg(ImgSpec.imgWidth, ImgSpec.imgHeight);
            ni.filltype = 0;
            ImgManager im = new ImgManager(ni, dt.Rows[i]["Title"].ToString(), 1);
            string msg = "";
            List<NewImg> lstni = new List<NewImg>();
            lstni = im.getImgList(ref msg);
            if (lstni.Count > 0)
            {
                NewImg ni2 = lstni[0];
                ni2.imgid = int.Parse(dt.Rows[i]["bid"].ToString());
                imgSave(ni2);
            }
            else
            {
                if (sb.Length > 0) sb.Append("|");
                sb.Append(HttpUtility.UrlEncode(dt.Rows[i]["Title"].ToString()));
            }
            if (sb.Length > 100)
            {
                configSrv.configSoapClient csc = new configSrv.configSoapClient();
                csc.addWordToQueue(sb.ToString(), "Google", 2239);//2239是用来识别哪个站点发起的,可以是任意整数
                sb = new StringBuilder();
            }
            Thread.Sleep(1000);
        }
        if (sb.Length > 0)
        {
            configSrv.configSoapClient csc = new configSrv.configSoapClient();
            csc.addWordToQueue(sb.ToString(), "Google", 2239);//2239是用来识别哪个站点发起的,可以是任意整数
        }

    }

    private void imgSave(NewImg ni)
    {
        try
        {
            IDatabaseDAO objBook = DAOFactory.CreateBookDAO();
            //string sql = "update BookInfo set Img=@Img,ImgThumb=@ImgThumb where bid=@bid";
            string sql = "update BookInfo set Img=@Img where bid=@bid";
            SqlParameter[] para = {
                                      new SqlParameter("@Img",SqlDbType.VarChar),
                                      new SqlParameter("@ImgThumb",SqlDbType.VarChar),
                                      new SqlParameter("@bid",SqlDbType.Int)
                                      };
            string subPath = "\\Images\\{0}×{1}\\{2}";
            string Img = string.Format(subPath, ni.imgwidth, ni.imghight, ni.imgid + ni.ext);
            string imgPath = Sxmobi.FileHelper.GetMapPath("~") + Img;
            Sxmobi.FileHelper.EnsureDir(Path.GetDirectoryName(imgPath));

            File.Copy(ni.generatePath, imgPath,true);
            File.Delete(ni.generatePath);//删除temp文件

            string ImgThumb = string.Format(subPath, ImgSpec.imgThumbWidth, ImgSpec.imgThumbHeight, ni.imgid + ni.ext);
            ni.imgwidth = ImgSpec.imgThumbWidth;//图片新宽
            ni.imghight = ImgSpec.imgThumbHeight;//图片新高
            ni.generatePath = Sxmobi.FileHelper.GetMapPath("~") + ImgThumb;
            ImgManager.ImgMgrLocal(ni);
            if (File.Exists(ni.srcPath))
                File.Delete(ni.srcPath);//删除temp文件

            para[0].Value = Img;
            para[1].Value = ImgThumb;
            para[2].Value = ni.imgid;
            objBook.ExecuteNonQuerySQL(sql, para);
        }
        catch (Exception ex)
        {
            Sxmobi.LogHelper.Error(this.GetType().ToString(), ex.Message, ex);
        }
    }

 

(以上所涉及的类均可在下面爬虫框架的打包文件中找到)

爬虫框架下载地址http://download.csdn.net/detail/dasihg/4133312

你可能感兴趣的:(框架)