1217班1-3自己搜小猪家乡

总结：几乎用了一个晚上，才搞定，很慢，但有收获，终于小猪的数据来了。另外，这次终于在ubuntu下搞定的
成果
20161130.PNG
我的代码如下
#!usr/bin/python3
#-*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import requests

def get_lorder_sex(nannv):
    if nannv==['member_ico1']:
        return 'nv'
    else:
        return 'yemen'

def get_links(url):
    web_data=requests.get(url) #come in
    soup=BeautifulSoup(web_data.text,'lxml')
    links = soup.select('a.resule_img_a')
    for link in links:
        href = link.get("href")
        get_detail(href)

def get_detail(url):
    web_data2=requests.get(url) # use 2 for different
    soup=BeautifulSoup(web_data2.text,'lxml')

    titles  =soup.select("body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em")
    #body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em
    #body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em
    addrs   =soup.select("body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span")
    #body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span
    prices  =soup.select("#pricePart > div.day_l > span")
    ##pricePart > div.day_l > span
    images  =soup.select("#curBigImage")
    ##imgMouseCusor
    #![](http://upload-images.jianshu.io/upload_images/3861610-082d00105572764e.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
    avartars=soup.select("#floatRightBox > div.js_box.clearfix > div.member_pic > a > img")
    ##floatRightBox > div.js_box.clearfix > div.member_pic > a > img

    names   =soup.select("#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a")
    ##floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a
    sexs    =soup.select("#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span")
    ##floatRightBox > div.js_box.clearfix > div.member_pic > div
    ##floatRightBox > div.js_box.clearfix > div.member_pic > a > img
    ##floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span
    print(titles)



    for title,addr,price,image,avartar,name,sex in zip(titles,addrs,prices,images,avartars,names,sexs):
        data={
            "title"  :title.get_text(),
            "addr"   :addr.get_text(),
            "price"  :price.get_text(),
            "image"  :image.get("src"),
            "avartar":avartar.get("src"),
            "name"   :name.get_text(),
            "sex"    :get_lorder_sex(sex.get("class")) #into func change to txt
        }
        print(data)

urls=["http://yantai.xiaozhu.com/penglai-duanzufang-p{}-8/".format(i) for i in range(1,7)]
for one_url in urls:
    get_links(one_url)





'''
#the first test coding
url='http://yantai.xiaozhu.com/penglai-duanzufang-8/'
web_data= requests.get(url)
soup=BeautifulSoup(web_data.text,'lxml')

titles = soup.select('a.resule_img_a')

#print(titles)

for title in titles:
    link=title.get("href")

    print(link)


'''
1217班1-3自己搜小猪家乡

你可能感兴趣的:(1217班1-3自己搜小猪家乡)