Python requests库处理 multipart/form-data 请求以及 boundary值问题

原因

爬虫模拟提交文件的时候遇到下面的问题:
在这里插入图片描述
点击查看源代码后,发现看不懂:

------WebKitFormBoundarytZTJQrWcjjcJIMVQ
Content-Disposition: form-data; name="upload"; filename="好好学习.txt"
Content-Type: application/octet-stream

×÷ΪTest
------WebKitFormBoundarytZTJQrWcjjcJIMVQ--

还有一个问题就是请求头的 boundary 值问题,看上去是随机值:
Content-Type: multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ

百度了一番,把类似实现提交方法,就直接把链接贴在这里【PS.虽然还是没有解决我原来的问题,难受啊】

链接直通车

HTTP协议之multipart/form-data请求分析
文章4:multipart/form-data详细介绍
Python爬虫杂记 - POST之multipart/form-data请求
python3使用requests和requests_toolbelt上传文件

相关测试

1.正常上传

import requests

def test():
    files = {'upload': open('test.txt', 'rb')}
    params = {'path': 'test.txt',
                    'token': '123456',
                    'num': 0,
                    'offset': 0,
                    'limit': 8}
    response = requests.post('http://httpbin.org/post', 
							 params=params, 
							 files=files)
    print("1: ", response.text)
    print("2: ", response.request.body)
    print("3: ", response.request.headers)
    
if __name__ == '__main__':
    test()
1:  {
  "args": {
    "limit": "8", 
    "num": "0", 
    "offset": "0", 
    "path": "test.txt", 
    "token": "123456"
  }, 
  "data": "", 
  "files": {
    "upload": "data:application/octet-stream;base64,1/fOqlRlc3Q="
  }, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "154", 
    "Content-Type": "multipart/form-data; boundary=c889e2cd4e2470630d99dc2fe26a443d", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx", 
  "url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}

2:  b'--c889e2cd4e2470630d99dc2fe26a443d\r\nContent-Disposition: form-data; name="upload"; filename="test.txt"\r\n\r\n\xd7\xf7\xce\xaaTest\r\n--c889e2cd4e2470630d99dc2fe26a443d--\r\n'
3:  {'User-Agent': 'python-requests/2.19.1', 
	'Accept-Encoding': 'gzip, deflate', 
	'Accept': '*/*', 
	'Connection': 'keep-alive', 
	'Content-Length': '154', 
	'Content-Type': 'multipart/form-data; boundary=c889e2cd4e2470630d99dc2fe26a443d'}

2.使用 requests_toolbelt 库

from requests_toolbelt import MultipartEncoder
import requests

def test():
    m = MultipartEncoder(fields={'upload': open('test.txt', 'rb')},
                        boundary = '----WebKitFormBoundarytZTJQrWcjjcJIMVQ')
    params = {'path': 'test.txt',
              'token': '123456',
              'num': 0, 'offset': 0,
              'limit': 8}
    response = requests.post('http://httpbin.org/post',
                  			  params=params,
                              data=m,
                              headers={'Content-Type': m.content_type})
    
    print("1: ", response.text)
    print("2: ", response.request.body)
    print("3: ", response.request.headers)
 if __name__ == '__main__':
    test()   
1:  {
  "args": {
    "limit": "8", 
    "num": "0", 
    "offset": "0", 
    "path": "test.txt", 
    "token": "123456"
  }, 
  "data": "", 
  "files": {}, 
  "form": {
    "upload": "\ufffd\ufffd\u03aaTest"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "145", 
    "Content-Type": "multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx", 
  "url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}

2:  <MultipartEncoder: {'upload': <_io.BufferedReader name='test.txt'>}>
3:  {'User-Agent': 'python-requests/2.19.1', 
'Accept-Encoding': 'gzip, deflate', 
'Accept': '*/*', 
'Connection': 'keep-alive', 
'Content-Type': 'multipart/form-data; boundary=----WebKitFormBoundarytZTJQrWcjjcJIMVQ', 
'Content-Length': '145'}

boundary值可以指定

3.使用 encode_multipart_formdata 函数

from collections import OrderedDict
from urllib3 import encode_multipart_formdata
import requests

def test():
    files = OrderedDict([("upload", (None, open("test.txt", 'rb').read(), 'application/octet-stream'))])
    boundary='----WebKitFormBoundaryKPjN0GYtWEjAni5F'
    m = encode_multipart_formdata(files, boundary=boundary)
    print("0", m[0])
    params = {'path': 'test.txt',
              'token': '123456',
              'num': 0,
              'offset': 0,
              'limit': 8}
    response = requests.post('http://httpbin.org/post',
                              params=params,
                              data=m[0],
                              headers={'Content-Type': "multipart/form-data; "+boundary})

    print("1: ", response.text)
    print("2: ", response.request.body)
    print("3: ", response.request.headers)

if __name__ == '__main__':
    test()
0 b'------WebKitFormBoundaryKPjN0GYtWEjAni5F\r\nContent-Disposition: form-data; name="upload"\r\nContent-Type: application/octet-stream\r\n\r\n\xd7\xf7\xce\xaaTest\r\n------WebKitFormBoundaryKPjN0GYtWEjAni5F--\r\n'
1:  {
  "args": {
    "limit": "8", 
    "num": "0", 
    "offset": "0", 
    "path": "test.txt", 
    "token": "123456"
  }, 
  "data": "", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "185", 
    "Content-Type": "multipart/form-data; ----WebKitFormBoundaryKPjN0GYtWEjAni5F", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "xxx.xx.xxx.xx, xxx.xx.xxx.xx", 
  "url": "https://httpbin.org/post?path=test.txt&token=123456&num=0&offset=0&limit=8"
}

2:  b'------WebKitFormBoundaryKPjN0GYtWEjAni5F\r\nContent-Disposition: form-data; name="upload"\r\nContent-Type: application/octet-stream\r\n\r\n\xd7\xf7\xce\xaaTest\r\n------WebKitFormBoundaryKPjN0GYtWEjAni5F--\r\n'
3:  {'User-Agent': 'python-requests/2.19.1', 
'Accept-Encoding': 'gzip, deflate', 
'Accept': '*/*', 
'Connection': 'keep-alive', 
'Content-Type': 'multipart/form-data; ----WebKitFormBoundaryKPjN0GYtWEjAni5F', 
'Content-Length': '185'}

总结

可通过以上几种办法,修改上传时boundary值,并且完成 multipart/form-data 的 post请求。相关内容整理放在这里,方便以后遇到时候再回来看看。
 

点我回顶部 ☚

 
 
 
 
 
 
 
Fn.

你可能感兴趣的:(我的爬虫之旅)