专栏 |
---|
Python零基础入门篇 |
Python网络蜘蛛 |
Python数据分析 |
Django基础入门宝典 |
小玩意儿 |
Web前端学习 |
tkinter学习笔记 |
Excel自动化处理 |
相逢即是缘,来者皆是客,确定不看完再走?
几日不见,如隔几日呀!近段因为课业繁忙,《小玩意儿》专栏停更了一段时间,实在是Sorry
不知道为什么,总想着更新《小玩意儿》专栏的文章,可能是因为很多人喜欢,也有可能是因为写这类的文章比较放松,总而言之,就是喜欢……
不过写文章的过程是轻松的,打代码就不一样了,你看那头发,长得就不轻松……
有时候一个Bug出现,可以成功拿走我生命中的两个小时,最重要的是代码的测试,测试的过程是宛如……不说了不说了,它直接拿走我的生命!
不过!经历了Bug无数次的疯狂抽打,这点对我来说不痛不痒(浅浅装一下)
今天给大家带来的是一个搬家脚本,搬家搬家,顾名思义就是搬家……
大概一周前,我无意间接触到文章搬家的这个“领域”,当时我还不知道搬家是啥,请原谅我没见过世面。后来慢慢了解,才知道“搬家”原来是将一个社区的文章搬到另一个社区中,悟了悟了!
buling~的一下,突然就想到,诶,我之前写过一个CSDN文章转移到印象笔记中的一个脚本,那我是不是也可以从这个基础上做一个自动搬家的脚本,将CSDN的文章搬到其他社区发表,后来看了一下其他社区,就随机抽了一下,一只羊、两只羊、三只羊……就抽到了51CTO(不要问为什么,因为我也不知道)。
咱们说干就干,搬!!!
根据之前写的文章:CSDN文章自动转移到印象笔记?一怒之下的我“揍”出了代码~
本次的设计思路也差不多:
(1)用户输入搬家的文章数,获取目标文章的链接和标题;
(2)通过循环,进入每一篇文章,然后搬家;
(3)将搬家成功的文章标题保存下载,写入excel文件中,以后在搬家的过程中就会读取已搬过的文章,筛选出未搬的文章,这就避免了重复搬家的情况。
一一实现的过程请往下看
《小玩意儿》专栏的文章看来被Get_cookie.py下了咒,没错,又是它,实现自动登录的基石,想要了解可以看上边提到的文章哦!
话不多说,直接贴代码:
from selenium import webdriver
from time import sleep
import json
if __name__ == '__main__':
driver = webdriver.Chrome()
driver.maximize_window()
driver.get('https://passport.csdn.net/login?code=public')
sleep(10)
dictCookies = driver.get_cookies() # 获取list的cookies
jsonCookies = json.dumps(dictCookies) # 转换成字符串保存
with open('csdn_cookies.txt', 'w') as f:
f.write(jsonCookies)
print('cookies保存成功!')
本次搬家的文章主要是对Markdown文章进行搬家,如果不是Markdown编辑器编写的文章搬不了哦~
提供思路,大家自由发挥哈~
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pyautogui
import openpyxl
import os
import win32api
import win32con
import json
class CSDN(object):
def __init__(self, artical_num):
# 实例化driver对象
self.driver = webdriver.Chrome()
self.artical_num = artical_num
self.page_urls = [] # 存储文章url
self.all_titles = [] # 存储文章的url
self.To_transfer_url = [] # 存储要转移的文章url
self.To_transfer_title = [] # 存储要转移的文章标题
self.max_row = 0 # 设置最大行
# --------------------------- 登录模块 ---------------------------
def login(self):
self.driver.get('https://mp.csdn.net/mp_blog/manage/article?')
# 获取保存下的cookie值
with open('csdn_cookies.txt', 'r', encoding='utf8') as f:
listCookies = json.loads(f.read())
# 往browser里添加cookies
for cookie in listCookies:
cookie_dict = {
'domain': '.csdn.net',
'name': cookie.get('name'),
'value': cookie.get('value'),
"expires": '',
'path': '/',
'httpOnly': False,
'HostOnly': False,
'Secure': False
}
self.driver.add_cookie(cookie_dict)
self.driver.refresh() # 刷新网页,cookies才成功
# --------------------------- 文件操作模块 ---------------------------
# 打开json文件,读取已经转移的文章
def open_excel(self):
self.f = openpyxl.load_workbook('./已转移文章.xlsx')
self.sheet = self.f['Trans_artical']
self.Transferred_titles = [i.value for i in self.sheet['A']]
self.max_row = self.sheet.max_row
# 保存已转移文章的标题
def save_excel(self):
for i in range(len(self.To_transfer_title)):
self.sheet.cell(self.max_row+i+1, 1).value = self.To_transfer_title[i]
self.f.save('./已转移文章.xlsx')
print('文件保存成功!')
# --------------------------- 文章转移模块 ---------------------------
# 筛选要转移的文章
def Filter_articles(self):
n = 0
for i in range(len(self.all_titles)):
if self.all_titles[i] not in self.Transferred_titles: # 如果获取的文章中的标题不存在已转移文章中
self.To_transfer_url.append(self.page_urls[i]) # 将要转移的文章url存到To_transfer_url中
self.To_transfer_title.append(self.all_titles[i])
if n == self.artical_num:
break
n += 1
# 分析网页:获取文章url和标题
def parse_page(self):
"""
用户选择好分栏并点击后,输入1,程序继续运行
:return:
"""
# 等待内容管理加载完毕后点击
WebDriverWait(self.driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]'))
)
# 点击内容管理
self.driver.find_element(By.XPATH, '//ul[@role="menu"]/li/a[text()="内容管理"]').click()
print('\n请确认是否已选择专栏,点击搜索……')
user1 = input('确认无误后请输入1,进行下一步操作……:')
# 等待文章url加载
WebDriverWait(self.driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//p[@class="article-list-item-txt"]/a'))
)
time.sleep(2)
while True:
try:
# 获取文章元素
article = self.driver.find_elements(By.XPATH, '//p[@class="article-list-item-txt"]/a')
# 先添加文章url和文章标签
self.page_urls += [ele.get_attribute('href') for ele in article]
# 获取文章的标题
self.all_titles += [i.text for i in article]
# 点击下一页
self.driver.find_element(By.XPATH, '//*[@id="view-containe"]/div/div/div[4]/div/button[2]').click()
time.sleep(3)
except:
break
# 翻转
self.page_urls = self.page_urls[::-1]
self.all_titles = self.all_titles[::-1]
def get_urls(self):
for i in range(self.artical_num):
self.driver.get(self.To_transfer_url[i])
time.sleep(3)
try:
# 获取富文本的“发送博客”元素
button = self.driver.find_element(By.XPATH, '//div[@id="moreDiv"]/div[10]/div/div/div[2]/button')
if button.is_enabled():
continue
except: # 如果不是富文本则进行转移文章
self.move_to_51()
print(f'{self.To_transfer_title[i]} 搬家成功!')
# 将CSDN中的文章转移到51cto
def move_to_51(self):
# 点击CSDN标题
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/1.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
pyautogui.leftClick(x, y)
# 复制标题
pyautogui.hotkey('ctrl', 'c')
# 点击印51cto的标题
time.sleep(1)
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/2.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
# 粘贴复制的文本
pyautogui.hotkey('ctrl', 'v')
# 点击CSDN文章的目录
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/3.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
# 按下alt + a
pyautogui.hotkey('ctrl', 'a')
pyautogui.hotkey('ctrl', 'c')
# 点击印象51cto的文章编辑处
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/4.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 粘贴
pyautogui.hotkey('ctrl', 'v')
time.sleep(1)
# 点击“发布文章”
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/5.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
for i in range(4): # 出现敏感词提醒
r = pyautogui.locateOnScreen('./photo_51/19.png')
if r is not None:
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
break
# 点击分类栏
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/6.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 点击分类
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/7.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
for i in range(1, 800): # 向下滚动
win32api.mouse_event(win32con.MOUSEEVENTF_WHEEL, 0, 0, -1)
time.sleep(1) # 等待两秒
# 点击话题
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/9.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 选择话题
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/10.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 点击单图
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/11.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 点击添加图片选框
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/12.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 点击图片
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/17.png')
x, y = pyautogui.center(r)
pyautogui.leftClick(x, y)
# 按下回车键
pyautogui.press('enter')
# 点击“发布”
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/14.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
try: # 如果出现点击过快图片未加载完全的情况
r = None
for i in range(5):
r = pyautogui.locateOnScreen('./photo_51/18.png')
if r is not None:
pyautogui.doubleClick()
else:
break
except:
pass
pyautogui.leftClick()
# 点击首页
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/15.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
# 点击写文章
r = None
while r is None:
r = pyautogui.locateOnScreen('./photo_51/16.png')
x, y = pyautogui.center(r)
pyautogui.doubleClick(x, y)
# --------------------------- 总流程控制 ---------------------------
def run(self):
self.login() # 登录CSDN
self.parse_page() # 分析网页获取文章的标题和url
if os.path.exists('./已转移文章.xlsx'): # 判断如果存在json文件则打开文件进行筛选
self.open_excel() # 打开文件
self.Filter_articles() # 筛选url
else:
self.To_transfer_url = self.page_urls[:self.artical_num] # 将获取的所有url给到self.To_transfer_url
self.To_transfer_title = self.all_titles[:self.artical_num]
# 创建文件
self.f = openpyxl.Workbook()
self.sheet = self.f.create_sheet('Trans_artical')
self.get_urls() # 调用self.get_urls()函数,将文章进行转移
self.save_excel() # 保存json文件
self.driver.quit()
if __name__ == '__main__':
print('------ 搬家开始!请做好准备! ------')
artical_num = int(input('\n请输入搬家文章的数量:'))
time.sleep(2)
csdn = CSDN(artical_num)
csdn.run()
非常感谢大家一直以来的支持 今天的分享就到这里啦!
如果喜欢这篇文章,那就旋个三连吧~ 点赞 收藏 关注哦 您的支持,就是我更新的最大动力!感谢
“See you next time”
peace~