网络爬虫5个小实例

京东商品页面爬取

url = "https://item.jd.com/5821455.html"
try:
    r = requests.get(url)
    r.raise_for_status()
    r.encoding=r.apparent_encoding
    print(r.text)
except:
    print("爬取失败")

亚马逊商品页面爬取

import requests

url = "https://www.amazon.cn/dp/B077V5Q9L2?_encoding=UTF8&ref_=pc_cxrd_658407051_bestTab_658407051_a_best_1&pf_rd_p=797efab3-8794-41b4-a33a-eca4c6cf5ae1&pf_rd_s=merchandised-search-5&pf_rd_t=101&pf_rd_i=658407051&pf_rd_m=A1AJ19PSB66TGU&pf_rd_r=1GD0NPS97SJE6EHCK2K7&pf_rd_r=1GD0NPS97SJE6EHCK2K7&pf_rd_p=797efab3-8794-41b4-a33a-eca4c6cf5ae1"

try:
    kv = {"user-agent":'Mozilla/5.0'}
    r = requests.get(url,headers=kv)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    print(r.text[1000:2000])
except:
    print("爬取失败")

百度搜索殷勤关键字提交借口

百度关键词接口:

https://www.baidu.com/s?wd=关键词

import requests
kv = {'wd':'金刚狼'}
url = "https://www.baidu.com/s"
try:
    r = requests.get(url,params=kv)
    print(r.status_code)
    print(r.url)
    print(len(r.text))
except:
    print("爬取失败")

网络图片的爬取和存储

import requests
import os
url = "https://images-cn.ssl-images-amazon.com/images/I/51zQ8gQpDHL._SX350_BO1,204,203,200_.jpg"
root = "/Users/apple/Desktop/"
path = root + url.split('/')[-1]
try:
    if not os.path.exists(root):
        os.mkdir(root)
    if not os.path.exists(path):
        r = requests.get(url)
        with open(path,"wb+") as f:
            f.write(r.content)
            f.close()
            print("文件保存成功")
    else:
        print("文件已存在")
except:
    print("爬取失败")
import requests
url = "http://m.ip138.com/ip.asp?ip="
try:
    r = requests.get(url + "196.168.6.39")
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    print(r.text[-500:])
except:
    print("爬取失败")


你可能感兴趣的:(爬虫)