Python 模拟app进行访问

网页压缩技术主要两种gzip和deflate

DEFLATE是一个无专利的压缩算法,它可以实现无损数据压缩,有众多开源的实现算法。
GZIP是使用DEFLATE进行压缩数据的另一个压缩库。

现在普遍支持gzip压缩,Deflate只是一种过时的网页压缩

if __name__ == "__main__":
    url='http://www.qq.com/'
    req = request.Request(url)
    response = request.urlopen(req, timeout=120)
    html = response.read()
    encoding = response.info().get('Content-Encoding')
    print(encoding)
    if encoding == 'gzip':
        html = zlib.decompress(html, 16+zlib.MAX_WBITS)
    elif encoding == 'deflate':
        try:
            html = zlib.decompress(html, -zlib.MAX_WBITS)
        except zlib.error:
            html = zlib.decompress(html)

    charset = chardet.detect(html)["encoding"]
    print(charset)
    #print(html)
    print(html.decode(charset,'ignore'))
 

import urllib.request
import zlib

loginUrl = 'https://api.nfapp.southcn.com/nanfang_if/getArticleContent?articleId=2055802&colID=1207&location=%E5%B9%BF%E5%B7%9E'
headers = {
    'Accept-Encoding': 'gzip',
    'User-Agent': 'okhttp/3.11.0',
    'Content-Length': '97',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'androidId': 'NzgwZTJjNTAyM2MxMzViNQ==',
    'bluetooth': '',
    'brand': 'dGVuY2VudA==',
    'channel': 'eWluZ3lvbmdiYW8=',
    'deviceId': 'ZmZmZmZmZmYtZDgzZS05ZWVkLWZmZmYtZmZmZmVmMDVhYzRh',
    'imei': 'NjY2NjYwMDgyMzMzOTg2',
    'latitude': 'MC4w',
    'longitude': 'MC4w',
    'mac': 'MDg6MDA6Mjc6QzQ6NDY6QzA=',
    'manufacturer': 'VGVuY2VudA==',
    'model': 'dmlydHVhbG1hY2hpbmUy',
    'networkType': 'V2lGaQ==',
    'operator':'',
    'os': 'YW5kcm9pZA==', 
    'osVersion': 'MTk=',
    'screen': 'NzIweDEyODA=',
    'version': 'NS4yLjU=',
    'versionCode': 'NTI1MA==',
    'Connection': 'close',
    'Host': 'api.nfapp.southcn.com',
    }
loginData = 'id=2055802&userID=0&siteID=1&userOtherID=ffffffff-d83e-9eed-ffff-ffffef05ac4a&eventType=0&type=0&'.encode('UTF-8')


request = urllib.request.Request(loginUrl, loginData, headers)
res = urllib.request.urlopen(request)
html = zlib.decompress(res.read(), 16+zlib.MAX_WBITS)
data = html.decode('UTF-8','ignore')
print(data)

 

你可能感兴趣的:(教程)