python的web基础应用

简单脚本分享

简单爬行页面 3.x与2.x

    import urllib.request #3.x版本

    url='http://www.baidu.com/' 

    def getHtml(url):

    page=urllib.request.urlopen(url) 

    html=page.read().decode(encoding='utf-8',errors='strict')

    return html

    print(getHtml(url))

    import requests #2.x版本

    import string

    headers = {

        'Connection': 'Keep-Alive',

        'Accept': 'text/html, application/xhtml+xml, */*',

        'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LB'

    }

    url="https://www.qq.com"

    res=requests.get(url,headers)

    print(res.text)

post传参,设置cookie,截取返回页面固定长度 2.x 

    url='http://106.75.72.168:2222/index.php'

    headers = {

        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

        'Accept-Encoding': 'gzip, deflate, compress',

        'Accept-Language': 'en-us;q=0.5,en;q=0.3',

        'Cache-Control': 'max-age=0',

        'Connection': 'keep-alive',

        'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0',

        'cookie': 'Hm_lvt_9d483e9e48ba1faa0dfceaf6333de846=1542198011; role=Zjo1OiJucXp2YSI7'

    }

    payload={'filename':'1.php','data[]':''}

    r=requests.post(url,headers=headers,data=payload)

    url="http://106.75.72.168:2222"+r.content[82:128]

    r=requests.get(url)

    print r.content

字典制作 各版本


    with open('wordlist.txt','w+') as f:

    for i in range(0,10):

        for j in range(0,10):

            for k in range(0,10):

                for h in range(0,10):

                    f.write('1391040'+str(i)+str(j)+str(k)+str(h)+'\n')

    f.close

保存本地到wordlist.txt文件里1391040xxxx生成后四位的字典。

python登陆网站 3.x

    from urllib import request#导入urllib模块里的request

    from urllib import parse#parse模块里的编码 

    from urllib.request import urlopen

    values ={'zhanghao':'admin','mima':'admin'}

    data=parse.urlencode(values).encode('utf-8')#提交类型不能为str,需要为byte类型,parse.urlencode方法的作用是把dict格式的参数转换为url参数,并以utf-8编码,可以拼接为HTTP请求。

    url='http://127.0.0.1/login.php'

    request=request.Request(url,data)

    response=urlopen(request)

    print(response.read().decode())#加入decode才能使网页解码成中文

整理 3.x版本

    import urllib.parse#urlencode

    import urllib.request#Request,urlopen

    '''

    response=urllib.request.urlopen("http://127.0.0.1")

    print(response.read().decode())

    '''

    #设置header和data

    '''

    url='http://127.0.0.1/login.php'

    user_agent='Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'

    values={'zhanghao':'admin','mima':'admin'}

    headers={'User-Agent':user_agent}

    data=urllib.parse.urlencode(values).encode('utf-8')

    request=urllib.request.Request(url,data,headers)

    response=urllib.request.urlopen(request)

    page=response.read()

    print(page.decode())

    '''

    #设置代理 避免因为某个IP的访问次数过多导致的禁止访问

    '''

    enable_proxy = True

    proxy_handler = urllib.request.ProxyHandler({"http":'http://some-proxy.com:8080'})

    null_proxy_handler = urllib.request.ProxyHandler({})

    if enable_proxy:

    opener = urllib.request.build_opener(proxy_handler)

    else:

    opener = urllib.request.build_opener(null_proxy_handler)

    urllib.request.install_opener(opener)

    '''

    #设置timeout 

    # urlopen与Request 区别https://blog.csdn.net/tao3741/article/details/75207879

    '''

    response=urllib.request.urlopen('http://127.0.0.1',timeout=10)

    print(response.read().decode())

    '''

    #post put 等提交方式

    '''

    request=urllib.request.Request(url,data,headers)#post 直接写在data里

    request=urllib.request.Request('http://127.0.0.1?a=1')#get直接写在url里

    request = urllib.request.Request(url, data=data)#put和delete

    request.get_method = lambda:'PUT' #or 'DELETE'#put和delete

    '''

    #使用DebugLog 把收发包的内容在屏幕上打印出来

    '''

    httpHandler = urllib.request.HTTPHandler(debuglevel=1)

    httpsHandler = urllib.request.HTTPSHandler(debuglevel=1)

    opener = urllib.request.build_opener(httpHandler, httpsHandler)

    urllib.request.install_opener(opener)

    response = urllib.request.urlopen('http://127.0.0.1', timeout = 5)

    '''

    #URLError异常属性判断

    '''

    request=urllib.request.Request('http://127.0.0.999')

    try:    

    urllib.request.urlopen(request)

    except urllib.error.URLError as e:

        if hasattr(e, "code"):     #hasattr 判断变量是否有某个属性

            print(e.code)

        if hasattr(e, "reason"):        

            print(e.reason) 

    else:

        print("OK")

    '''

参考:(https://www.cnblogs.com/dplearning/p/4854746.html)

你可能感兴趣的:(python的web基础应用)