本文主要参考:https://blog.csdn.net/Oh_Python/article/details/126899483
因为参考的文章是以前的,CSDN对于格式做了重新的排版,之前这个代码跑不起来了,我根据现在最新的版本进行了代码修正,能够完成功能
原理:通过python库selenium实现网页组件控制,模拟人手动点击过程
步骤:先执行test1.py,再执行test2.py
test1.py是为了获取登录信息,要在10s内完成登录。test2.py就是模拟手动操作全过程
from selenium import webdriver
from time import sleep
import json
if __name__ == '__main__':
driver = webdriver.Chrome()
driver.maximize_window()
driver.get('https://passport.csdn.net/login?code=public')
sleep(10)
dictCookies = driver.get_cookies() # 获取list的cookies
jsonCookies = json.dumps(dictCookies) # 转换成字符串保存
with open('csdn_cookies.txt', 'w') as f:
f.write(jsonCookies)
print('cookies保存成功!')
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json
class CSDN(object):
def __init__(self):
# 实例化driver对象
self.driver = webdriver.Chrome()
def login(self):
self.driver.get('https://mp.csdn.net/mp_blog/manage/article?')
# 获取保存下的cookie值
with open('csdn_cookies.txt', 'r', encoding='utf8') as f:
listCookies = json.loads(f.read())
# 往browser里添加cookies
for cookie in listCookies:
cookie_dict = {
'domain': '.csdn.net',
'name': cookie.get('name'),
'value': cookie.get('value'),
"expires": '',
'path': '/',
'httpOnly': False,
'HostOnly': False,
'Secure': False
}
self.driver.add_cookie(cookie_dict)
self.driver.refresh() # 刷新网页,cookies才成功
# 放大窗口
self.driver.maximize_window()
# 等待内容管理加载完毕后点击
WebDriverWait(self.driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]'))
)
# 点击内容管理
self.driver.find_element(By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]').click()
# 分析网页
def parse_page(self):
"""
用户选择好分栏并点击后,输入1,程序继续运行
:return:
"""
user1 = input('输入1继续:')
# 等待文章url加载
WebDriverWait(self.driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//p[@class="article-list-item-txt"]/a'))
)
time.sleep(2)
# 用try来进行下一页的搜索
page_urls = [] # 存储文章url
page_types = [] # 存储文章标签
while True:
try:
# 先添加文章url和文章标签
page_urls += [ele.get_attribute('href') for ele in self.driver.find_elements(By.XPATH, '//p[@class="article-list-item-txt"]/a')]
page_types += [ele.text for ele in self.driver.find_elements(By.XPATH, '//div[@class="article-list-type"]/span[1]')]
# 点击下一页
self.driver.find_element(By.XPATH, '//*[@id="view-containe"]/div/div/div[4]/div/button[2]').click()
time.sleep(3)
except:
break
li_page_urls = []
# 获取文章标签为原创的文章url
for index in range(len(page_types)):
if page_types[index] == '原创':
li_page_urls.append(page_urls[index])
# 调用修改文章为粉丝可见的函数
self.change_fans(li_page_urls)
# 获取文章的url并点击,修改文章为粉丝可见
def change_fans(self, page_urls):
print(len(page_urls))
for page_url in page_urls:
self.driver.get(page_url)
print(page_url)
while True:
Non_blocking = None
try:
if self.driver.switch_to.alert:
Non_blocking = False
# 如果alert弹窗是需要点击确定或关闭,那么做相应操作
self.driver.switch_to.alert.accept()
self.driver.switch_to.alert.dismiss()
except Exception as e:
Non_blocking = True
if Non_blocking:
# 将文章滚动条拉到底
if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
time.sleep(2)
break
else:
break
# time.sleep(0.3)
# time.sleep(3)
# 用try来进行markdown文章的编辑
try:
self.driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/button').click()
self.driver.execute_script('document.documentElement.scrollTop=1000000')
time.sleep(2)
# while True:
# Non_blocking = None
# try:
# if self.driver.switch_to.alert:
# Non_blocking = False
# # 如果alert弹窗是需要点击确定或关闭,那么做相应操作
# self.driver.switch_to.alert.accept()
# # self.driver.switch_to.alert.dismiss()
#
# except Exception as e:
# Non_blocking = True
# if Non_blocking:
# # 将文章滚动条拉到底
# if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
# time.sleep(2)
# break
# else:
# time.sleep(0.3)
# 如果后续按键改版了,就改这个格式就可以
# 点击粉丝可见
self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[1]/div[7]/div/div[3]').click()
# 点击初级
# self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[1]/div[9]/div/div/label[1]').click()
# 点击发送文章
self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[2]/button[2]').click()
# 防止出现提示,阻碍进程
try:
self.driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div[1]/div[2]/button[2]').click()
except:
pass
except:
pass
# # 用try来进行富文本编辑文章的发送
# try:
# # while True:
# # Non_blocking = None
# # try:
# # if self.driver.switch_to.alert:
# # Non_blocking = False
# # # 如果alert弹窗是需要点击确定或关闭,那么做相应操作
# # self.driver.switch_to.alert.accept()
# # # self.driver.switch_to.alert.dismiss()
# #
# # except Exception as e:
# # Non_blocking = True
# # if Non_blocking:
# # # 将文章滚动条拉到底
# # if self.driver.execute_script('document.documentElement.scrollTop=1000000'):
# # time.sleep(2)
# # break
# # else:
# # time.sleep(0.3)
#
# # 点击粉丝可见
# self.driver.execute_script('document.documentElement.scrollTop=1000000')
# time.sleep(2)
# self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[8]/div/label[3]/span[1]/span').click()
#
# # 点击初级
# self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[9]/div/label[1]/span[2]').click()
#
# # 点击发布博客
# self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[10]/div/div/div[2]/button').click()
#
# time.sleep(1)
# # 防止出现提示,阻碍进程
# try:
# self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[10]/div/div/div[2]/button').click()
# except:
# pass
# except:
# pass
time.sleep(2)
def run(self):
self.login()
self.parse_page()
csdn = CSDN()
csdn.run()