python入门学习笔记-day17(6.21)之爬虫详解

python如何访问互联网

URL + lib ----> urllib
URL的一般格式为(带方括号[]的为可选项):
     protocol://hostname[:port]/path/[;parameters][?query]#fragment


     
URL由三部分组成:
第一部分是协议:http,https,ftp,file,ed2k…
第二部分是存放资源的服务器的域名系统或IP地址(有时候要包含端口号,各种传输协议都有默认的端口号,如http的默认端口为80)
第三部分是资源的具体地址:如目录或文件名等


访问网站并保存图片

import urllib.request
response = urllib.request.urlopen('http://placekitten.com/g/500/600')
cat_img = response.read()

with open('cat_500_600.jpg', 'wb') as f:    #二进制形式写入
    f.write(cat_img)

版本二

import urllib.request
req = urllib.request.Request('http://placekitten.com/g/500/600')
response = urllib.request.urlopen(req)

cat_img = response.read()

with open('cat_500_600.jpg', 'wb') as f:    #二进制形式写入
    f.write(cat_img)

python入门学习笔记-day17(6.21)之爬虫详解_第1张图片

>>> response.geturl()      #得到网址
'http://placekitten.com/g/500/600'

>>> response.info()
<http.client.HTTPMessage object at 0x000001E7DF5930D0>

>>> print(response.info())
Date: Sat, 20 Jun 2020 13:10:14 GMT
Content-Type: image/jpeg
Transfer-Encoding: chunked
Connection: close
Set-Cookie: __cfduid=d19f60a1226135d28a59d7e4ed53889281592658614; expires=Mon, 20-Jul-20 13:10:14 GMT; path=/; domain=.placekitten.com; HttpOnly; SameSite=Lax
Cache-Control: public, max-age=86400
Expires: Thu, 31 Dec 2020 20:00:00 GMT
Vary: User-Agent, Accept-Encoding
Access-Control-Allow-Origin: *
CF-Cache-Status: HIT
Age: 33658
cf-request-id: 037372f2000000197ad4357200000001
Server: cloudflare
CF-RAY: 5a65ba966b58197a-HKG

>>> response.getcode()
200



访问有道并翻译

import urllib.request
import urllib.parse
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = 'I love you'
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')

response = urllib.request.urlopen(url, data)
html = response.read().decode('utf-8')

print(html)

python入门学习笔记-day17(6.21)之爬虫详解_第2张图片
python入门学习笔记-day17(6.21)之爬虫详解_第3张图片

================= RESTART: C:\Users\zdb\Desktop\translation.py =================
                          {"type":"EN2ZH_CN","errorCode":0,"elapsedTime":2,"translateResult":[[{"src":"I love you","tgt":"我爱你"}]]}

>>> import json
>>> json.loads(html)
{'type': 'EN2ZH_CN', 'errorCode': 0, 'elapsedTime': 2, 'translateResult': [[{'src': 'I love you', 'tgt': '我爱你'}]]}
>>> target = json.loads(html)
>>> type(target)
<class 'dict'>
>>> target['translateResult']
[[{'src': 'I love you', 'tgt': '我爱你'}]]
>>> target['translateResult'][0][0]
{'src': 'I love you', 'tgt': '我爱你'}
>>> target['translateResult'][0][0]['tgt']
'我爱你'

python入门学习笔记-day17(6.21)之爬虫详解_第4张图片




版本二

import urllib.request
import urllib.parse
import json

content = input('请输入需要翻译的内容:')

url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')

response = urllib.request.urlopen(url, data)
html = response.read().decode('utf-8')

target = json.loads(html) 
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))

python入门学习笔记-day17(6.21)之爬虫详解_第5张图片
python入门学习笔记-day17(6.21)之爬虫详解_第6张图片



import urllib.request
import urllib.parse
import json

content = input('请输入需要翻译的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

head = {}
#多了这个
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'

data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')

req = urllib.request.Request(url, data, head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')

target = json.loads(html) 
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))

python入门学习笔记-day17(6.21)之爬虫详解_第7张图片

================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容:love
翻译结果:>>> req.headers
{'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}



import urllib.request
import urllib.parse
import json

content = input('请输入需要翻译的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

'''
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
'''

data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15926650709527'
data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
data['ts'] = '1592665070952'
data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data = urllib.parse.urlencode(data).encode('utf-8')

req = urllib.request.Request(url, data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')

response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')

target = json.loads(html) 
print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容:爱国
翻译结果: patriotic
>>> req.headers
{'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}

python入门学习笔记-day17(6.21)之爬虫详解_第8张图片
python入门学习笔记-day17(6.21)之爬虫详解_第9张图片

import urllib.request
import urllib.parse
import json
import time

while True:
    content = input('请输入需要翻译的内容(输入‘q!’退出程序:')
    if content == 'q!':
        break

    url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

    '''
    head = {}
    head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
    '''

    data = {}
    data['i'] = content
    data['from'] = 'AUTO'
    data['to'] = 'AUTO'
    data['smartresult'] = 'dict'
    data['client'] = 'fanyideskweb'
    data['salt'] = '15926650709527'
    data['sign'] = '3250ce8e0ffaf134fc2bb3e275c9434f'
    data['ts'] = '1592665070952'
    data['bv'] = '7e14dfdb6b3686cc5af5e5294aaded19'
    data['doctype'] = 'json'
    data['version'] = '2.1'
    data['keyfrom'] = 'fanyi.web'
    data['action'] = 'FY_BY_CLICKBUTTION'
    data = urllib.parse.urlencode(data).encode('utf-8')

    req = urllib.request.Request(url, data)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')

    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')

    target = json.loads(html)
    print('翻译结果: %s' %(target['translateResult'][0][0]['tgt']))
    time.sleep(5)
================= RESTART: C:\Users\zdb\Desktop\translation.py =================
请输入需要翻译的内容(输入‘q!’退出程序:爱
翻译结果: love
请输入需要翻译的内容(输入‘q!’退出程序:爱过
翻译结果: To have loved
请输入需要翻译的内容(输入‘q!’退出程序:

python入门学习笔记-day17(6.21)之爬虫详解_第10张图片
python入门学习笔记-day17(6.21)之爬虫详解_第11张图片

import urllib.request

url = 'http://whatismyip.com.tw'

proxy_support = urllib.request.ProxyHandler({'http':'213.226.11.149:41878'})

opener = urllib.request.build_opener(proxy_support)

urllib.request.install_opener(opener)

response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')

print(html)
import urllib.request
import time
import socket

import urllib.error

socket.setdefaulttimeout(20)  # 设置socket层的超时时间为20秒

url = 'http://whatismyip.com.tw'

proxy_support = urllib.request.ProxyHandler({'http':'213.226.11.149:41878'})

opener = urllib.request.build_opener(proxy_support)

urllib.request.install_opener(opener)

response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')

response.close()  # 注意关闭response

print(html)

time.sleep(1)  # 自定义
import urllib.request
#import random
#import time
#import socket

#import urllib.error

#socket.setdefaulttimeout(20)  # 设置socket层的超时时间为20秒

url = 'http://whatismyip.com.tw'

#iplist = ['171.35.166.80:9999', '113.195.19.107:9999','171.35.162.62:9999']
#proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})

proxy_support = urllib.request.ProxyHandler({'http':'113.121.76.0:9999'})

opener = urllib.request.build_opener(proxy_support)
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36')]

urllib.request.install_opener(opener)

response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')

#response.close()  # 注意关闭response

print(html)

#time.sleep(5)  # 自定义

你可能感兴趣的:(python,python)