抓取电话号码的例子

from lxml import etree
import requests
import os

root_url='http://www.51hao.cc/'
req=requests.get(root_url)
req.encoding='gb2312'
html=req.text
selector=etree.HTML(html)
infos=selector.xpath('//div[@class="fkce"]/div[@class="fkt"][position()>1]')
print(len(infos))
for info in infos:
    province=info.xpath('div[@class="fkbj"]/p/a/text()')[0]
    citys=info.xpath('div[@class="fklk"]/p/a/text()')
    citys_url=info.xpath('div[@class="fklk"]/p/a/@href')
    city_infos=zip(citys,citys_url)#映射函数
    for city_info in city_infos:
        city_name=city_info[0]
        city_url=city_info[1]
        print(province,city_name,city_url)

        req2 = requests.get(city_url)
        req2.encoding = 'gb2312'
        html2 = req2.text
        selector2 = etree.HTML(html2)
        infos2 = selector2.xpath('//div[@class="all"]//div[@class="num_bg"]')  # 运营商
        num = len(infos2)
        for i in range(num):
            first_3 = selector2.xpath('//div[@class="all"]/div[%s]//span[@class="nums"]/text()' % str(i + 2))[0]# 参数化xpath表达式
            types = selector2.xpath('//div[@class="all"]/div[%s]/div[1]/text()' % str(i + 2))[0]
            types1 = types.split('(')[0]
            total = types.split('(')[1].split(')')[0].replace('共', '').replace('个', '')
            mobiles = selector2.xpath('//div[@class="all"]//li[%s]/a/text()' % str(i + 1))
            for mobile in mobiles:
                print(province,city_name,first_3, types1, total, mobile)

你可能感兴趣的:(抓取电话号码的例子)