爬虫模拟提交文件的时候遇到下面的问题:
点击查看源代码后,发现看不懂:
------WebKitFormBoundarytZTJQrWcjjcJIMVQ
Content-Disposition: form-data; name="upload"; filename="好好学习.txt"
Content-Type: application/octet-stream
×÷ΪTest
------WebKitFormBoundarytZTJQrWcjjcJIMVQ--
还有一个问题就是请求头的 boundary 值问题,看上去是随机值:
Content-Type: multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ
百度了一番,把类似实现提交方法,就直接把链接贴在这里【PS.虽然还是没有解决我原来的问题,难受啊】
HTTP协议之multipart/form-data请求分析
文章4:multipart/form-data详细介绍
Python爬虫杂记 - POST之multipart/form-data请求
python3使用requests和requests_toolbelt上传文件
1.正常上传
import requests
def test():
files = {'upload': open('test.txt', 'rb')}
params = {'path': 'test.txt',
'token': '123456',
'num': 0,
'offset': 0,
'limit': 8}
response = requests.post('http://httpbin.org/post',
params=params,
files=files)
print("1: ", response.text)
print("2: ", response.request.body)
print("3: ", response.request.headers)
if __name__ == '__main__':
test()
1: {
"args": {
"limit": "8",
"num": "0",
"offset": "0",
"path": "test.txt",
"token": "123456"
},
"data": "",
"files": {
"upload": "data:application/octet-stream;base64,1/fOqlRlc3Q="
},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "154",
"Content-Type": "multipart/form-data; boundary=c889e2cd4e2470630d99dc2fe26a443d",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.19.1"
},
"json": null,
"origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx",
"url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}
2: b'--c889e2cd4e2470630d99dc2fe26a443d\r\nContent-Disposition: form-data; name="upload"; filename="test.txt"\r\n\r\n\xd7\xf7\xce\xaaTest\r\n--c889e2cd4e2470630d99dc2fe26a443d--\r\n'
3: {'User-Agent': 'python-requests/2.19.1',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive',
'Content-Length': '154',
'Content-Type': 'multipart/form-data; boundary=c889e2cd4e2470630d99dc2fe26a443d'}
2.使用 requests_toolbelt 库
from requests_toolbelt import MultipartEncoder
import requests
def test():
m = MultipartEncoder(fields={'upload': open('test.txt', 'rb')},
boundary = '----WebKitFormBoundarytZTJQrWcjjcJIMVQ')
params = {'path': 'test.txt',
'token': '123456',
'num': 0, 'offset': 0,
'limit': 8}
response = requests.post('http://httpbin.org/post',
params=params,
data=m,
headers={'Content-Type': m.content_type})
print("1: ", response.text)
print("2: ", response.request.body)
print("3: ", response.request.headers)
if __name__ == '__main__':
test()
1: {
"args": {
"limit": "8",
"num": "0",
"offset": "0",
"path": "test.txt",
"token": "123456"
},
"data": "",
"files": {},
"form": {
"upload": "\ufffd\ufffd\u03aaTest"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "145",
"Content-Type": "multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.19.1"
},
"json": null,
"origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx",
"url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}
2: <MultipartEncoder: {'upload': <_io.BufferedReader name='test.txt'>}>
3: {'User-Agent': 'python-requests/2.19.1',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive',
'Content-Type': 'multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ',
'Content-Length': '145'}
boundary值可以指定
3.使用 encode_multipart_formdata 函数
from collections import OrderedDict
from urllib3 import encode_multipart_formdata
import requests
def test():
files = OrderedDict([("upload", (None, open("test.txt", 'rb').read(), 'application/octet-stream'))])
boundary='----WebKitFormBoundaryKPjN0GYtWEjAni5F'
m = encode_multipart_formdata(files, boundary=boundary)
print("0", m[0])
params = {'path': 'test.txt',
'token': '123456',
'num': 0,
'offset': 0,
'limit': 8}
response = requests.post('http://httpbin.org/post',
params=params,
data=m[0],
headers={'Content-Type': "multipart/form-data; "+boundary})
print("1: ", response.text)
print("2: ", response.request.body)
print("3: ", response.request.headers)
if __name__ == '__main__':
test()
0 b'------WebKitFormBoundaryKPjN0GYtWEjAni5F\r\nContent-Disposition: form-data; name="upload"\r\nContent-Type: application/octet-stream\r\n\r\n\xd7\xf7\xce\xaaTest\r\n------WebKitFormBoundaryKPjN0GYtWEjAni5F--\r\n'
1: {
"args": {
"limit": "8",
"num": "0",
"offset": "0",
"path": "test.txt",
"token": "123456"
},
"data": "",
"files": {},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "185",
"Content-Type": "multipart/form-data; ----WebKitFormBoundaryKPjN0GYtWEjAni5F",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.19.1"
},
"json": null,
"origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx",
"url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}
2: b'------WebKitFormBoundaryKPjN0GYtWEjAni5F\r\nContent-Disposition: form-data; name="upload"\r\nContent-Type: application/octet-stream\r\n\r\n\xd7\xf7\xce\xaaTest\r\n------WebKitFormBoundaryKPjN0GYtWEjAni5F--\r\n'
3: {'User-Agent': 'python-requests/2.19.1',
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive',
'Content-Type': 'multipart/form-data; ----WebKitFormBoundaryKPjN0GYtWEjAni5F',
'Content-Length': '185'}
可通过以上几种办法,修改上传时boundary值,并且完成 multipart/form-data 的 post请求。相关内容整理放在这里,方便以后遇到时候再回来看看。
Fn.