Python【requests】封装自用

文章目录

  • function版
  • class版
    • request . py
    • conf . py
    • mysql . py

function版

import requests
from random import choice
from time import strftime

ua = ['Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',
      'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)']

def write_log(*args):
    with open('%s.txt' % strftime('%Y%m%d'), 'a', encoding='utf-8') as f:
        for a in args:
            f.write('%r\n' % a)
        f.write('\n')

def get(url, encode='utf-8', times=10):
    if times < 0:
        return ''
    try:
        r = requests.get(url, headers={'User-Agent': choice(ua)}, timeout=30)
    except Exception as e:
        write_log(url, e, times)
        return get(url, encode, times - 1)
    if r.status_code == 200:
        r.encoding = encode
        return r.text
    else:
        write_log(url, r.status_code, times)
        return get(url, encode, times - 1)

def post(url, data, encode='utf-8', times=10):
    if times < 0:
        return ''
    try:
        r = requests.post(url, headers={'User-Agent': choice(ua)}, data=data, timeout=30)
    except Exception as error:
        write_log(url, data, error, times)
        return post(url, data, encode, times - 1)
    if r.status_code == 200:
        r.encoding = encode
        return r.text
    else:
        write_log(url, data, r.status_code, times)
        return post(url, data, encode, times - 1)

def get_img(url, times=3):
    """下载图片"""
    if times < 0:
        return b''
    try:
        r = requests.get(url, headers={'User-Agent': choice(ua)}, timeout=30)
    except Exception as e:
        write_log(url, e, times)
        return get_img(url, times - 1)
    if r.status_code == 200:
        return r.content
    else:
        write_log(url, r.status_code, times)
        return get_img(url, times - 1)

class版

Python【requests】封装自用_第1张图片

request . py

from time import time, sleep, strftime
from requests import get, post
from random import choice
from chardet import detect  # 自动检测网页编码
from core.conf import REQUEST as R, Color as C


def write_log(*args, **kwargs):
    C.red(args, kwargs)
    with open('%s.txt' % strftime('%Y%m%d'), 'a', encoding='utf-8') as f:
        for i in args:
            f.write('%r\n' % i)
        for k, v in kwargs.items():
            f.write('%r\t%r\n' % (k, v))
        f.write('\n')


def get_proxies():
    return {}


class Response:
    text = ''

    @classmethod
    def json(cls):
        return dict()


class Request:
    def __init__(self, header=None):
        self.t = time()
        self.headers = header or dict()

    def __del__(self):
        t = (time() - self.t) / 60
        C.yellow('%.2f分钟' % t)

    def get(self, url, times=R.times, **kwargs):
        sleep(choice(list(range(R.sleep))))  # 随机睡眠
        if times < 0:
            return Response()  # 空响应
        self.headers['User-Agent'] = choice(R.ua)
        try:
            r = get(
                url=url,
                headers=self.headers,
                timeout=R.timeout,
                proxies=get_proxies(),
                **kwargs
            )
        except Exception as error:
            write_log(url, error=error, **kwargs)  # 写日志
            return self.get(url, times - 1, **kwargs)
        if r.status_code == 200:
            r.encoding = detect(r.content)['encoding']
            self.headers['Referer'] = url
            return r
        else:
            write_log(url, status_code=r.status_code, **kwargs)  # 写日志
            return self.get(url, times - 1, **kwargs)

    def post(self, url, data, times=R.times, **kwargs):
        sleep(choice(list(range(R.sleep))))  # 随机睡眠
        if times < 0:
            return Response()  # 空响应
        self.headers['User-Agent'] = choice(R.ua)
        try:
            r = post(
                url=url,
                data=data,
                headers=self.headers,
                timeout=R.timeout,
                proxies=get_proxies(),
                **kwargs
            )
        except Exception as error:
            write_log(url, data=data, error=error, **kwargs)  # 写日志
            return self.post(url, data, times - 1, **kwargs)
        if r.status_code == 200:
            r.encoding = detect(r.content)['encoding']
            return r
        else:
            write_log(url, data=data, status_code=r.status_code, **kwargs)  # 写日志
            return self.post(url, data, times - 1, **kwargs)


if __name__ == '__main__':
    r = Request()
    for i in range(2, 4):
        response = r.get('https://blog.csdn.net/Yellow_python/article/list/%d?' % i)
        C.green(response.url)
        C.cyan(response.encoding)
        C.blue(response.cookies)
        C.purple(response.headers)
        C.darkcyan(response.request)
        C.yellow(response.elapsed)  # 请求时长
        C.underline(r.headers)
        print()

conf . py

class REQUEST:
    ua = [
        'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',  # IE9.0
        'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',  # IE8.0
        'Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.0)',  # IE7.0
        'Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)',  # IE6.0
        'Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1',  # Firefox4.0.1–MAC
        'Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1',  # Firefox4.0.1–Windows
        'Opera/9.80(Macintosh;IntelMacOSX10.6.8;U;en)Presto/2.8.131Version/11.11',  # Opera11.11–MAC
        'Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11',  # Opera11.11–Windows
        'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11',
        'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)',  # 傲游(Maxthon)
        'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)',  # 腾讯TT
        'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)',  # 360浏览器
        'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TheWorld)',  # 世界之窗(TheWorld)3.x
        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
    ]
    sleep = 1  # SLEEP >= 1
    timeout = 10
    times = 10


class MySQL:
    host = 'localhost'
    user = 'root'
    password = 'yellow'
    charset = 'utf8'
    db = 'z' * 3


class Color:
    @classmethod
    def _wrap_colour(cls, colour, *args):
        for a in args:
            print(colour + '{}'.format(a) + '\033[0m')

    @classmethod
    def blue(cls, *args):
        return cls._wrap_colour('\033[94m', *args)

    @classmethod
    def bold(cls, *args):
        return cls._wrap_colour('\033[1m', *args)

    @classmethod
    def cyan(cls, *args):
        return cls._wrap_colour('\033[96m', *args)

    @classmethod
    def darkcyan(cls, *args):
        return cls._wrap_colour('\033[36m', *args)

    @classmethod
    def green(cls, *args):
        return cls._wrap_colour('\033[92m', *args)

    @classmethod
    def purple(cls, *args):
        return cls._wrap_colour('\033[95m', *args)

    @classmethod
    def red(cls, *args):
        return cls._wrap_colour('\033[91m', *args)

    @classmethod
    def underline(cls, *args):
        return cls._wrap_colour('\033[4m', *args)

    @classmethod
    def yellow(cls, *args):
        return cls._wrap_colour('\033[93m', *args)

mysql . py

import pymysql, re
from time import strftime
from core.conf import MySQL as M, Color as C


class Mysql:
    def __init__(self, table):
        self.db = pymysql.connect(M.host, M.user, M.password, charset=M.charset, db=M.db)
        self.cursor = self.db.cursor()
        self.table = table

    def __del__(self):
        self.cursor.close()
        self.db.close()

    def fetchall(self, query):
        self.cursor.execute(query)
        return self.cursor.fetchall()

    def commit(self, sql):
        try:
            self.cursor.execute(sql)
            self.db.commit()
            C.yellow(sql)
        except Exception as error:
            C.red(error, sql)

    def insert(self, dt):
        dt['collect_date'] = strftime('%Y-%m-%d')
        clear = lambda x: re.sub('''[,'"();]+''', ' ', repr(x)).strip()
        ls = [(k, clear(v)) for k, v in dt.items() if v is not None]
        sql = 'INSERT %s (' % self.table + ','.join([i[0] for i in ls]) + \
              ') VALUES (' + ','.join(['%r' % i[1] for i in ls]) + ');'
        self.commit(sql)

    def update(self, dt_update, dt_condition):
        sql = 'UPDATE %s SET ' % self.table + ','.join('%s=%r' % (k, v) for k, v in dt_update.items())\
              + ' WHERE ' + ' AND '.join('%s=%r' % (k, v) for k, v in dt_condition.items()) + ';'
        self.commit(sql)

你可能感兴趣的:(爬虫)