1、json数据采集
1 function getTaobaoShopInfo($serial) { 2 $ch = curl_init(); 3 curl_setopt($ch, CURLOPT_URL, "http://hws.m.taobao.com/cache/wdetail/5.0/?id=".$serial); 4 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 5 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 6 curl_setopt($ch, CURLOPT_MAXREDIRS, 1); 7 $contents = curl_exec($ch); 8 curl_close($ch); 9 10 $data = json_decode($content, true); 11 12 $info = array(); 13 14 $tmp = json_decode($data['data']['apiStack'][0]['value'], true); 15 16 $info['title'] = $data['data']['itemInfoModel']['title']; 17 18 $info['volume'] = $tmp['data']['itemInfoModel']['totalSoldQuantity']; 19 20 $info['coupon_price'] = $tmp['data']['itemInfoModel']['priceUnits'][0]['price']; 21 22 if (substr_count($info['coupon_price'], '-')) { 23 24 $tmp1 = explode('-', $info['coupon_price']); 25 26 $info['coupon_price'] = min($tmp1[0], $tmp1[1]); 27 28 } 29 30 $info['price'] = $tmp['data']['itemInfoModel']['priceUnits'][1]['price']; 31 32 if (substr_count($info['price'], '-')) { 33 34 $tmp = explode("-", $info['price']); 35 36 $info['price'] = min($tmp[0], $tmp[1]); 37 38 } 39 40 $info['pic_url'] = $data['data']['itemInfoModel']['picsPath'][0]; 41 42 $info['pic_url'] = str_replace("_320x320.jpg", "", $info['pic_url']); 43 44 $info['nick'] = $data['data']['seller']['nick']; 45 46 $info['sellerId'] = $data['data']['seller']['userNumId']; 47 48 return $info; 49 }
2、alimama接口采集
1 public function Get_Info($serial){ 2 //$serial:淘宝天猫商品的id 3 require 'taobao/TopSdk.php'; 4 $c = new TopClient(); 5 $c->appkey =$appkey; 6 $c->secretKey = $secretKey; 7 $req = new TbkItemInfoGetRequest(); 8 $req->setFields("num_iid,title,pict_url,small_images,reserve_price,zk_final_price,user_type,provcity,item_url"); 9 $req->setPlatform("2"); 10 $req->setNumIids($serial); 11 $resp = $c->execute($req); 12 $list=$this->object_to_array($resp); 13 return $list['results']['n_tbk_item']; 14 }
3、页面采集
1 function FileGetTaobao($serial){ 2 $text=file_get_contents("https://item.taobao.com/item.htm?id=".$serial); 3 $img_reg='//is'; 4 preg_match_all($img_reg, $text, $img); 5 $data['pic_url']=$img[1][0]; 6 $title_reg='/
(.*?)<\/h3>/is'; 7 preg_match_all($title_reg, $text, $title); 8 $data['title']=iconv("GBK", "UTF-8", trim($title[2][0])); 9 $price_reg='/(.*?)<\/em>/is'; 10 preg_match_all($price_reg, $text, $price); 11 $data['price']=substr($price[1][0],0,strrpos($price[1][0],'-')); 12 $inventory_reg='/([0-9]+)<\/span>/is'; 13 preg_match_all($inventory_reg, $text, $inventory); 14 $data['inventory']=$inventory[1][0]; 15 $small_pic_1='/
(.*?)<\/ul>/is'; 16 preg_match_all($small_pic_1,$text,$pic_1); 17 $small_pic_2='/
/'; 18 preg_match_all($small_pic_2,$pic_1[1][0],$pic_2); 19 $data['small_images']=$pic_2[1]; 20 return $data; 21 } 22 function FileGetTmall($serial){ 23 $text=file_get_contents("https://detail.tmall.com/item.htm?id=".$serial); 24 $img_reg='/
/'; 25 preg_match_all($img_reg,$text,$img); 26 $data['title']=iconv("GBK","UTF-8",trim($img[1][0])); 27 $data['pic_url']=$img[2][0]; 28 $small_pic_1='/
(.*?)<\/ul>/is'; 29 preg_match_all($small_pic_1,$text,$pic_1); 30 $small_pic_2='/
/'; 31 preg_match_all($small_pic_2,$pic_1[1][0],$pic_2); 32 $data['small_images']=$pic_2[1]; 33 $ww_reg='/(.*?)<\/strong>/is'; 34 preg_match_all($ww_reg,$text,$ww); 35 $data['nick']=iconv("GBK","UTF-8",$ww[1][0]); 36 return $data; 37 }