public class JdSkuSpider : ISpiderContext { public SpiderContextBuilder GetBuilder() { Log.TaskId = "JD SKU Weekly" ; SpiderContext context = new SpiderContext { SpiderName = "JD SKU " + DateTimeUtils.MONDAY_RUN_ID, CachedSize = 1, ThreadNum = 8, Site = new Site { EncodingName = "UTF-8" }, Scheduler = new RedisScheduler() { Host = "redis" , Port = 6379, Password = "" }, StartUrls= new Dictionary< string , Dictionary< string , object >> { { "http://list.jd.com/list.html?cat=9987,653,655&page=1&go=0&JL=6_0_0&ms=5" , new Dictionary< string , object > { { "name" , "手机" }, { "cat3" , "9987" } } }, }, Pipeline = new MysqlPipeline() { ConnectString = "" }, Downloader = new HttpDownloader() }; return new SpiderContextBuilder(context, typeof (Product)); } [Schema( "jd" , "sku_v2" , Suffix = TableSuffix.Monday)] [TargetUrl( new [] { @"page=[0-9]+" }, "//*[@id=\"J_bottomPage\"]" )] [TypeExtractBy(Expression = "//div[contains(@class,'j-sku-item')]" , Multi = true )] [Indexes(Primary = "sku" )] public class Product : ISpiderEntity { private static readonly DateTime runId; static Product() { DateTime dt = DateTime.Now; runId = new DateTime(dt.Year, dt.Month, 1); } [StoredAs( "category" , DataType.String, 20)] [PropertyExtractBy(Expression = "name" , Type = ExtractType.Enviroment)] public string CategoryName { get ; set ; } [StoredAs( "cat3" , DataType.String, 20)] [PropertyExtractBy(Expression = "cat3" , Type = ExtractType.Enviroment)] public int CategoryId { get ; set ; } [StoredAs( "url" , DataType.Text)] [PropertyExtractBy(Expression = "./div[1]/a/@href" )] public string Url { get ; set ; } [StoredAs( "sku" , DataType.String, 25)] [PropertyExtractBy(Expression = "./@data-sku" )] public string Sku { get ; set ; } [StoredAs( "commentscount" , DataType.String, 20)] [PropertyExtractBy(Expression = "./div[@class='p-commit']/strong/a" )] public long CommentsCount { get ; set ; } [StoredAs( "shopname" , DataType.String, 100)] [PropertyExtractBy(Expression = "./div[@class='p-shop hide']/span[1]/a[1]" )] public string ShopName { get ; set ; } [StoredAs( "name" , DataType.String, 50)] [PropertyExtractBy(Expression = "./div[@class='p-name']/a/em" )] public string Name { get ; set ; } [StoredAs( "shopid" , DataType.String, 25)] public string ShopId { get ; set ; } [StoredAs( "venderid" , DataType.String, 25)] [PropertyExtractBy(Expression = "./@venderid" )] public string VenderId { get ; set ; } [StoredAs( "jdzy_shop_id" , DataType.String, 25)] [PropertyExtractBy(Expression = "./@jdzy_shop_id" )] public string JdzyShopId { get ; set ; } [StoredAs( "cdate" , DataType.Time)] [PropertyExtractBy(Expression = "now" , Type = ExtractType.Enviroment)] public DateTime CDate => DateTime.Now; } } |