功能:
要求:
根据不同的系统下载相应的geckodriver
将geckodriver放在firefox的文件夹下,如下图
项目效果展示:可以看到我的鼠标可是没有动的
from selenium import webdriver
import numpy as np
import time
# 1、登录csdn
# 模拟登录
def login():
# 以下这个executable_path由自己安装的geckodriver位置决定
drive = webdriver.Firefox(executable_path='C:\Program Files\Mozilla Firefox\geckodriver.exe')
drive.get("https://passport.csdn.net/login?code=public")
drive.maximize_window()
drive.find_element_by_xpath("/html/body/div[2]/div/div/div[1]/div[2]/div[5]/ul/li[2]/a").click()
drive.find_element_by_xpath('//*[@id="all"]').send_keys("你的csdn账号")
drive.find_element_by_xpath('//*[@id="password-number"]').send_keys("你的csdn密码")
drive.find_element_by_xpath('/html/body/div[2]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button').click()
time.sleep(4)
return drive
login()
运行结果:可能csdn能检测到我这是自动化,运行完有如下的一个验证页面,不要慌,自己登录进去即可
登录进去就是如下图,这时候已经获取的session,之后的代码如果操作页面,就是默认以登录的状态进行
# 2、生成评论
def generate_review():
csdn_review = ['大佬,厉害厉害!!!',"看的出来博主用心了","博客很全面,很仔细,点赞","有所收获,感谢","内容不错,三连了",'八千里路的云与月,全部在阅读中走进了我心灵的深处'
,"文章本天成,妙手偶得之。粹然无疵瑕,岂复须人为?","学习了","哈哈 很棒棒哦","感觉很厉害的亚子","好文章","博主写的好好,一起加油吖,向Python大佬学习","大佬牛啊,再下佩服",
"支持,帮大忙了","好像很腻害的样子大佬有空带带我",'博主写的非常好,有理论有例子,非常容易看懂,感谢博主!',"学起来,坚持~欢迎回访一起交流!","看三遍也看不够的好文,mark~","哈哈",'学到了,收藏一波~欢迎回访一起交流!',
"看完大佬的文章,我的心情竟是久久不能平静。正如老子所云:大音希声,大象无形。我现在终于明白我缺乏的是什么了。","牛蛙牛蛙,以后跟着大佬学习","收藏了,趁着春节好好学习",'总结的太棒了,这是一位宝藏博主啊,mark一波,小弟期待您的关注哦。',
"风雨过后天空几度平静的苍白,你走后我的心几分速bai跳的空白","欲寄彩笺兼尺素,山长水阔知何处。——晏殊《鹊踏枝》","红豆生南国,春来发几枝。",'人生苦短,我用python',"没有字母的日子,如同一堆温暖的木头,被人们记住,是一种大脑的烟雾。","行到水穷处 坐看云起时",
"三十功名尘与土,八千里路云和月",'所爱隔山海 山海不可平',"代码之路任重道远,愿跟博主努力习之。","爱了爱了","览君荆山作,江鲍堪动色","快进我的收藏夹吃灰吧","七月的风,八月的云。"]
# print (len(csdn_review))
random = np.random.randint(len(csdn_review))
return csdn_review[random]
def scroll2Bottom(drive,times=3):
js = "var q=document.documentElement.scrollTop=20000"
for i in range(times):
drive.execute_script(js)
time.sleep(1)
这一步就是我们模拟评论的核心代码喽。这部分比较多,我讲一下我的思路。
def category_handle(url,sleep_time):
article_times = 0
drive.get(url)
scroll2Bottom(drive,5)
# 定位一组元素,使用elements即可进行定位
all_articles = drive.find_elements_by_xpath('/html/body/div[2]/div/div/div/div/div/div/div/div[2]/div[1]/div/div[2]/div')
for article in all_articles:
article_times = article_times + 1
a = article.find_element_by_tag_name("a")
href = a.get_attribute('href')
js='window.open("'+href+'");'
drive.execute_script(js)
# 将句柄转换为新的页面
current_window = drive.current_window_handle
allHandles = drive.window_handles
for handle in allHandles:
if handle != drive.current_window_handle:
drive.switch_to_window(handle)
break;
# 进行评论点赞,二连
try:
time.sleep(2)
# 1、评论
toolbox = drive.find_element_by_class_name("toolbox-list")
toolbox.find_element_by_xpath("li[2]/a").click()
drive.find_element_by_xpath('//*[@id="comment_content"]').send_keys(generate_review())
time.sleep(2)
# 1.1、点击确认评论
# 使用如下定位,否则很容易出问题,具有局部特殊性
rightBox = drive.find_element_by_xpath('//*[@id="rightBox"]')
rightBox.find_element_by_tag_name("input").click()
# 2、点赞
time.sleep(3)
drive.find_element_by_xpath('//*[@id="is-like-span"]').click()
print ("自动化第"+str(article_times)+"个页面,该页面成功了")
except:
# print (drive.title)
print ("自动化第"+str(article_times)+"个页面,该页面失败了")
finally:
time.sleep(sleep_time)
drive.close()
drive.switch_to_window(current_window)
articles = article_times
url_lists = ["https://blog.csdn.net/rank/list?type=c%2Fc%2B%2B","https://blog.csdn.net/rank/list?type=java","https://blog.csdn.net/rank/list?type=javascript","https://blog.csdn.net/rank/list?type=php","https://blog.csdn.net/rank/list?type=python","https://blog.csdn.net/rank/list?type=%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD","https://blog.csdn.net/rank/list?type=%E5%8C%BA%E5%9D%97%E9%93%BE","https://blog.csdn.net/rank/list?type=%E5%A4%A7%E6%95%B0%E6%8D%AE","https://blog.csdn.net/rank/list?type=%E7%A7%BB%E5%8A%A8%E5%BC%80%E5%8F%91","https://blog.csdn.net/rank/list?type=%E5%B5%8C%E5%85%A5%E5%BC%8F","https://blog.csdn.net/rank/list?type=%E5%BC%80%E5%8F%91%E5%B7%A5%E5%85%B7","https://blog.csdn.net/rank/list?type=%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84%E4%B8%8E%E7%AE%97%E6%B3%95","https://blog.csdn.net/rank/list?type=%E6%B5%8B%E8%AF%95","https://blog.csdn.net/rank/list?type=%E6%B8%B8%E6%88%8F","https://blog.csdn.net/rank/list?type=%E7%BD%91%E7%BB%9C","https://blog.csdn.net/rank/list?type=%E8%BF%90%E7%BB%B4"]
articles = 0
# 设置每评论一次的睡眠时间,防止检测到机操,这里设置为了10秒
sleep_time = 10
# 设置从第几个url链接开始自动化
start_url_num = 6
time_start=time.time()
for i in range(start_url_num,len(url_lists)):
cur_review_success_times = 0
cur_like_success_times = 0
articles = 0
print ("第"+str(i)+"个链接进行selenium :"+url_lists[i])
category_handle(url_lists[i],sleep_time)
print ("访问共"+str(articles)+"条")
time_end=time.time()
print('all time cost',(time_end-time_start)/60,'min')
运行结果:
from selenium import webdriver
import numpy as np
import time
# 1、登录csdn
# 模拟登录
def login():
# 以下这个executable_path由自己安装的geckodriver位置决定
drive = webdriver.Firefox(executable_path='C:\Program Files\Mozilla Firefox\geckodriver.exe')
drive.get("https://passport.csdn.net/login?code=public")
drive.maximize_window()
drive.find_element_by_xpath("/html/body/div[2]/div/div/div[1]/div[2]/div[5]/ul/li[2]/a").click()
# 注意这里当然要你的账号和密码
# 要不然怎么可能可以直接跑
drive.find_element_by_xpath('//*[@id="all"]').send_keys("你的csdn账号")
drive.find_element_by_xpath('//*[@id="password-number"]').send_keys("你的csdn密码")
drive.find_element_by_xpath('/html/body/div[2]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button').click()
time.sleep(4)
return drive
drive = login()
# 2、生成评论
def generate_review():
csdn_review = ['大佬,厉害厉害!!!',"看的出来博主用心了","博客很全面,很仔细,点赞","有所收获,感谢","内容不错,三连了",'八千里路的云与月,全部在阅读中走进了我心灵的深处'
,"文章本天成,妙手偶得之。粹然无疵瑕,岂复须人为?","学习了","哈哈 很棒棒哦","感觉很厉害的亚子","好文章","博主写的好好,一起加油吖,向Python大佬学习","大佬牛啊,再下佩服",
"支持,帮大忙了","好像很腻害的样子大佬有空带带我",'博主写的非常好,有理论有例子,非常容易看懂,感谢博主!',"学起来,坚持~欢迎回访一起交流!","看三遍也看不够的好文,mark~","哈哈",'学到了,收藏一波~欢迎回访一起交流!',
"看完大佬的文章,我的心情竟是久久不能平静。正如老子所云:大音希声,大象无形。我现在终于明白我缺乏的是什么了。","牛蛙牛蛙,以后跟着大佬学习","收藏了,趁着春节好好学习",'总结的太棒了,这是一位宝藏博主啊,mark一波,小弟期待您的关注哦。',
"风雨过后天空几度平静的苍白,你走后我的心几分速bai跳的空白","欲寄彩笺兼尺素,山长水阔知何处。——晏殊《鹊踏枝》","红豆生南国,春来发几枝。",'人生苦短,我用python',"没有字母的日子,如同一堆温暖的木头,被人们记住,是一种大脑的烟雾。","行到水穷处 坐看云起时",
"三十功名尘与土,八千里路云和月",'所爱隔山海 山海不可平',"代码之路任重道远,愿跟博主努力习之。","爱了爱了","览君荆山作,江鲍堪动色","快进我的收藏夹吃灰吧","七月的风,八月的云。"]
# print (len(csdn_review))
random = np.random.randint(len(csdn_review))
return csdn_review[random]
def scroll2Bottom(drive,times=3):
js = "var q=document.documentElement.scrollTop=20000"
for i in range(times):
drive.execute_script(js)
time.sleep(1)
# 3、主要内容
def category_handle(url,sleep_time):
article_times = 0
drive.get(url)
scroll2Bottom(drive,5)
# 定位一组元素,使用elements即可进行定位
all_articles = drive.find_elements_by_xpath('/html/body/div[2]/div/div/div/div/div/div/div/div[2]/div[1]/div/div[2]/div')
for article in all_articles:
article_times = article_times + 1
a = article.find_element_by_tag_name("a")
href = a.get_attribute('href')
js='window.open("'+href+'");'
drive.execute_script(js)
# 将句柄转换为新的页面
current_window = drive.current_window_handle
allHandles = drive.window_handles
for handle in allHandles:
if handle != drive.current_window_handle:
drive.switch_to_window(handle)
break;
# 进行评论点赞,二连
try:
time.sleep(2)
# 1、评论
toolbox = drive.find_element_by_class_name("toolbox-list")
toolbox.find_element_by_xpath("li[2]/a").click()
drive.find_element_by_xpath('//*[@id="comment_content"]').send_keys(generate_review())
time.sleep(2)
# 1.1、点击确认评论
# 使用如下定位,否则很容易出问题,具有局部特殊性
rightBox = drive.find_element_by_xpath('//*[@id="rightBox"]')
rightBox.find_element_by_tag_name("input").click()
# 2、点赞
time.sleep(3)
drive.find_element_by_xpath('//*[@id="is-like-span"]').click()
print ("自动化第"+str(article_times)+"个页面,该页面成功了")
except:
# print (drive.title)
print ("自动化第"+str(article_times)+"个页面,该页面失败了")
finally:
time.sleep(sleep_time)
drive.close()
drive.switch_to_window(current_window)
articles = article_times
url_lists = ["https://blog.csdn.net/rank/list?type=c%2Fc%2B%2B","https://blog.csdn.net/rank/list?type=java","https://blog.csdn.net/rank/list?type=javascript","https://blog.csdn.net/rank/list?type=php","https://blog.csdn.net/rank/list?type=python","https://blog.csdn.net/rank/list?type=%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD","https://blog.csdn.net/rank/list?type=%E5%8C%BA%E5%9D%97%E9%93%BE","https://blog.csdn.net/rank/list?type=%E5%A4%A7%E6%95%B0%E6%8D%AE","https://blog.csdn.net/rank/list?type=%E7%A7%BB%E5%8A%A8%E5%BC%80%E5%8F%91","https://blog.csdn.net/rank/list?type=%E5%B5%8C%E5%85%A5%E5%BC%8F","https://blog.csdn.net/rank/list?type=%E5%BC%80%E5%8F%91%E5%B7%A5%E5%85%B7","https://blog.csdn.net/rank/list?type=%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84%E4%B8%8E%E7%AE%97%E6%B3%95","https://blog.csdn.net/rank/list?type=%E6%B5%8B%E8%AF%95","https://blog.csdn.net/rank/list?type=%E6%B8%B8%E6%88%8F","https://blog.csdn.net/rank/list?type=%E7%BD%91%E7%BB%9C","https://blog.csdn.net/rank/list?type=%E8%BF%90%E7%BB%B4"]
articles = 0
# 设置每评论一次的睡眠时间,防止检测到机操,这里设置为了10秒
sleep_time = 10
# 设置从第几个url链接开始自动化
start_url_num = 1
time_start=time.time()
for i in range(start_url_num,len(url_lists)):
cur_review_success_times = 0
cur_like_success_times = 0
articles = 0
print ("第"+str(i)+"个链接进行selenium :"+url_lists[i])
category_handle(url_lists[i],sleep_time)
print ("访问共"+str(articles)+"条")
time_end=time.time()
print('all time cost',(time_end-time_start)/60,'min')
函数内部做了异常处理,兼容性应该很强,直接跑代码试试。有疑问欢迎评论区下留言,尽量及时回复。