爬取百度logo2

#!/usr/bin/env python

# _*_ coding:utf-8 _*_

import logging,os,shutil,requests

from lxmlimport etree

logging.basicConfig(level=logging.INFO)

url="https://www.baidu.com"

headers={}

headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"

res=requests.get(url,headers=headers)

try:

if res.status_code==200:

selector=etree.HTML(res.text)#使用etree.HTML处理源代码

        result=etree.tostring(selector)#使用tostring 方法,可以看到刚才建立的 xml文件全貌

        logging.info(result)#输出网页内容

#使用Xpath提取内容

        img_urls=selector.xpath("//*[@id='lg']/img[1]/@src")

print(img_urls)

if img_urls:

img_url=img_urls[0]

img_url=img_urlif img_url.startswith("http")else "http:"+img_url

logging.info(img_url)

#先删除本地在下载

            filename="logo1.png"

            if os.path.isfile(filename):

os.remove(filename)

#获取图片数据流

            res=requests.get(img_url,stream=True)

with open(filename,"wb")as out_file:

shutil.copyfileobj(res.raw,out_file)

else:

logging.info("查找元素失败")

else:

print("网页异常")

except ConnectionError:

print("连接异常")

你可能感兴趣的:(爬取百度logo2)