python爬虫系列之爬京东手机数据

python抓京东手机数据

作者:vpoet

mail:[email protected]

 1 #coding=utf-8

 2 

 3 import urllib2

 4 

 5 from lxml import etree

 6 

 7 import re

 8 

 9 

10 if __name__ == '__main__':

11     

12     main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3-                655&psort=3&page=%s"""

13     

14     page_num = 1

15     

16     for page in range(page_num):

17         

18         html_url = main_url % page

19         

20         Res = urllib2.urlopen(html_url)

21         

22         Htm = Res.read()

23         

24         #print Htm

25 

26         tree = etree.HTML(Htm);

27  

28         #phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()")   

29         

30     #x = 1

31         #for phone_name in phone_names: 

32             #print phone_name+'\t'+str(x)+'\n'

33 

34         #x=x+1

35     

36  

37 

38     #phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")

39 

40     #for phone_pic_url in phone_pic_urls: 

41         #print phone_pic_url.values()[3]

42 

43     #phone_prices = tree.xpath("//div[@class='p-price']/strong")  

44     phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong") 

45         

46     x = 1

47 

48         for phone_price in phone_prices: 

49             print phone_price.values()[1]+'\t'+str(x)+'\n'

50         x = x + 1

51 

52 

53     #phone_comments = tree.xpath("//div[@class='extra']/a/text()")  

54         

55         #for phone_comment in phone_comments: 

56             #print "评价数"

57         #comment_num = re.findall(r'.{2}(\d+).{3}',phone_comment)

58         #print comment_num[0]

59 

60 

61     #phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()")  

62         

63         #for phone_good_comment in phone_good_comments: 

64             #print "好评率"

65         #comment_good_num = re.findall(r'\((\d{2})%.{2}\)',phone_good_comment)

66         #print comment_good_num[0]

67 

68 

69     print "over"

 

这个没写完,先保存在这里。有时间再完成

你可能感兴趣的:(python)