记录--函数

抓取页面编码格式


import requests
def monkey_patch():
    prop = requests.models.Response.content
    def content(self):
        _content = prop.fget(self)
        if self.encoding == 'ISO-8859-1':
            encodings = requests.utils.get_encodings_from_content(_content)
            if encodings:
                self.encoding = encodings[0]
            else:
                self.encoding = self.apparent_encoding
            _content = _content.decode(self.encoding, 'replace').encode('utf8', 'replace')
            self._content = _content
        return _content
    requests.models.Response.content = property(content)
monkey_patch()

批量处理url格式

import urlparse
f = open('new_url.txt','w+')
with open('b.txt') as b:
    for i in b.readlines():
        i = i.strip('\r').strip('\n')
        url = urlparse.urlparse(i)
        url = url.scheme + "://" + url.hostname
        print url
        f.write(url)
        f.write('\n')

查找是否存在某个字符串中

import urllib2
#----------------------------------------------------------------------
def waf_url(url):
    """"""
    
resp = urllib2.urlopen('http://www.xxxxx.cn')

print str(resp.headers)
print resp.headers['Server']

if str(resp.headers).upper().find('date'.upper()) == -1:
    print 'NO'
else:
    print 'yes'

wwwww = 'http://www.22222.com'
if "2" in wwwww:
print('111111')


你可能感兴趣的:(记录--函数)