几点要素:
1.存储模块的搭建:
需要存储的内容无非就是账号,密码,cookies这三样。
cookies可以存为json字符串格式在redis里,我们需要将cookies和账号对应,以及账号和密码的对应,所以在redis中我们将这两种映射存为hash结构。
hash的key是账号,value是密码
这里考虑到可扩展性,将账户和cookies的映射做了2级分类,即account:weibo,若用于例如知乎的cookies存储可改添加account:zhihu等。
创建存储模块
import random
import redis
class Redisclient():
def __init__(self,type,website,host='192.168.37.38',port='6379'):
self.db=redis.StrictRedis(host=host,port=port,password='',decode_responses=True)
self.type=type
self.website=website
def name(self):
return '{}:{}'.format(self.type,self.website)
def set(self,username,value):
return self.db.hset(self.name(),username,value)
def get(self,username):
return self.db.hget(self.name(),username)
#根据key名获取value
def delete(self,username):
return self.db.hdel(self.name(),username)
def count(self):
return self.db.hlen(self.name())
def random(self):
#随机获取一条value数据
return random.choice(self.db.hvals(self.name()))
def username(self):
#获取所有用户名key
return self.db.hkeys(self.name())
def all(self):
#获取所有key value键值对
return self.db.hgetall(self.name())
这里创造了一个redis数据库类,构造了初始化函数,用来传递存储类别(account还是cookies),以及网站类别,端口等。
然后将数据库的读取进行了封装。
2.账号入库
from cunchu import Redisclient
class zhanghaoruku():
def duquzhanghao(self):
f=open('zhanghao.txt','r',encoding='utf-8')
lines=f.readlines()
for line in lines:
line.replace('\n','')
username=line.split('----')[0]
password=line.split('----')[1]
yield username,password
def ruku(self):
db=Redisclient('account','weibo')
shuju=self.duquzhanghao()
for u,p in shuju:
print(f'正在存储账号:{u},密码:{p}')
db.set(u,p)
if __name__ == '__main__':
start=zhanghaoruku()
start.ruku()
这一步就是读取txt文件里的账号存入数据库
3.cookies生成模块的搭建
from selenium import webdriver
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from cunchu import Redisclient
import json
class new_cookies():
def __init__(self):
self.account_db=Redisclient('account','weibo')
self.cookies_db=Redisclient('cookies','weibo')
def duquzhanghao(self):
users=self.account_db.username()
for user in users:
yield user
def baocun(self,username,cookies):
self.cookies_db.set(username,json.dumps(cookies))
def n_cookies(self, username):
uuser=username
pas=self.account_db.get(uuser)
driver=webdriver.Chrome()
driver.delete_all_cookies()
time.sleep(1)
driver.get('https://weibo.com/')
WebDriverWait(driver,20,0.5).until(EC.visibility_of_element_located((By.ID,'loginname'))).send_keys(uuser)
driver.find_element_by_name('password').send_keys(pas)
driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a/span').click()
time.sleep(1)
driver.find_element_by_name('verifycode').click()
WebDriverWait(driver,100,0.5).until(EC.visibility_of_element_located((By.CLASS_NAME,'B_index')))
#这里为登录后页面,等待定位到新页面的独有class判定登录完成,然后进行下一步cookies的获取
cookies = driver.get_cookies()
self.baocun(uuser,cookies)
driver.close()
def main(self):
duqu=self.duquzhanghao()
for x in duqu:
print(x)
if not x in self.cookies_db.username():
print(f'正在生成cookies...用户名为:{x}')
self.n_cookies(x)
if __name__ == '__main__':
start=new_cookies()
start.main()
这里我们创造了一个获取cookies的类,初始化连接两类数据库的方法。
首先从account数据库中获取账号,和cookies数据库中数据最对比,如果不在cookies数据库中即进行cookies的获取。
由于微博登录验证经常改变,一会滑块一会点击验证码,所以这里在验证模块时手动。
4.验证模块
import json
import requests
from cunchu import Redisclient
from requests.exceptions import ConnectionError
class tester():
def __init__(self,website):
self.website=website
self.cookies_db=Redisclient('cookies',self.website)
self.account_db=Redisclient('account',self.website)
def test(self):
raise NotImplementedError#要求其子类一定要实现,不实现的时候会导致问题,采用raise的方式
def run(self):
cookies_group=self.cookies_db.all()
for username, cookies in cookies_group.items():
self.test(username,cookies)
class Weibotester(tester):
def __init__(self,website='weibo'):
tester.__init__(self,website)#由于重写了初始函数,这里继承父类的初始函数
def test(self,username,cookies):
print('正在测试Cookies','用户名',username)
try:
coo_dict={}
cookies=json.loads(cookies)
#对cookies格式进行处理,以便request.get()的使用
for coo in cookies:
coo_dict[coo['name']]=coo['value']
except TypeError:
print('Cookies不合法',username)
self.cookies_db.delete(username)
print('删除Cookies', username)
try:
test_url='https://weibo.com/p/1005055091382550/follow?relate=fans&from=100505&wvr=6&mod=headfans¤t=fans#place'
#此url为登陆后才能访问的页面,使用allow_redirects=False可以强制不301跳转到登录页面
response=requests.get(test_url,cookies=coo_dict,allow_redirects=False)
if response.status_code==200:
print('Cookies有效',username,'\n-----------------------\n')
else:
print(response.status_code, response.headers)
print('Cookies失效', username)
shanfou=input('是否删除该cookies?输入回车删除,输入n 不删除') #想删就删不想删就留着
if shanfou=='':
self.cookies_db.delete(username)
print('删除Cookies', username)
else:
print('没有删除该cookies')
except ConnectionError as e:
print('发生异常', e.args)
if __name__ == '__main__':
Weibotester().run()
考虑到模块的可扩展性,这里写了一个子类weibotester继承父类tester。