python爬虫之简单模板

包含异常处理的爬虫模板

如果url访问有问题,则返回异常

import requests
def getHtml(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()#检测状态码
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return "产生状态码异常" #抛出异常
    
url1="http://www.baidu.com"
url2="www.baidu.con"
print(getHtml(url1))
print(getHtml(url2))

添加url参数

import requests
def getHtml(url,data):
    try:
        r=requests.get(url,data,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        print ("爬取有误")

url1="http://www.baidu.com/s"
data={
      'wd':'生活'
      }
print(getHtml(url1,data))

伪装头部

import requests
def getHtml(url,header):
    try:
        r=requests.get(url,timeout=30,headers=header)
        r.raise_for_status()
        r.enconding=r.apparent_encoding
        return r.text
    except:
        print ("爬取有误")

url2="https://www.amazon.cn/dp/B07746N2J9"

header ={
 "Connection": "keep-alive",
 "Upgrade-Insecure-Requests": "1",
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
 "Accept":" text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
 "Accept-Encoding": "gzip,deflate",
 "Accept-Language": "zh-CN,zh;q=0.8"
} #伪装头部
print(getHtml(url2,header))
 

爬取图片

import requests
r=requests.get("https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1542865260406&di=58fd4edef623772af33422d71b8be0ad&imgtype=0&src=http%3A%2F%2Fs6.cdn.deahu.com%2Fshow%2Flfile%2F8E9CDE37D4E5AFBFEB9A6FB2CC594187.jpg")
with open('wallpaper.png','wb')as f:   #write binary以二进制读写文件
    f.write(r.content)

模拟知乎登录

import requests

headers={
        'Cookie':'d_c0="AICk0gizWQ6PTkm_Prxgrk4SpW2vsJ-LFy8=|1539317790"; _zap=51a38663-f556-41e3-a9bb-fc9f051076d4; __utmv=51854390.100--|2=registration_date=20151202=1^3=entry_date=20151202=1; tst=r; __gads=ID=d46da9e018289437:T=1539774107:S=ALNI_MZTBeYxCc0xdsW7aOMfzlBWCX-oBw; _xsrf=5142c5f9-9faf-4231-be03-cc43edf4f953; __utma=51854390.357429689.1539317791.1540120015.1540650169.3; __utmc=51854390; __utmz=51854390.1540650169.3.3.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/people/song-bing-75-75/collections; capsion_ticket="2|1:0|10:1542014542|14:capsion_ticket|44:NThjYTNhMjI2ZTA2NDEzZGJiNTk5MTllYzA1MWRhMjA=|6975888841c5bd8cb4866877d89148937038cf2cd9869bcc7d7e8ed9a74a0f44"; z_c0="2|1:0|10:1542014553|4:z_c0|92:Mi4xVUpwWEFnQUFBQUFBZ0tUU0NMTlpEaVlBQUFCZ0FsVk5XWlRXWEFCME5EaE9JUDNIcHF3ekVfTFRjUEl6end0Zm9n|fee57031f076e80c0d16dc6041df6423bd602e97d17c8633ac1a37a478216dcf"; q_c1=c67f1d517d1540929ea774a2bb66f863|1542183224000|1539317788000; tgw_l7_route=4902c7c12bebebe28366186aba4ffcde',
        'Host':'www.zhihu.com',
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3602.2 Safari/537.36'
        }
r=requests.get('http://www.zhihu.com',headers=headers)
print(r.text)

 

你可能感兴趣的:(学习)