cnki论文爬取自动下载,基于selenium
import time
import os
from selenium import webdriver
def download(key, page):
url = "https://web.bit.edu.cn/login"
username = ""
password = ""
opt = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting.popups': 0,
'download.default_directory': path}
opt.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(options=opt)
driver.get(url)
time.sleep(2)
driver.find_element_by_id('cas_login').click()
time.sleep(1)
driver.find_element_by_id('username').send_keys(username)
driver.find_element_by_id('password').send_keys(password)
driver.find_element_by_id('login_submit').click()
time.sleep(1)
zhiwang = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[4]/div[18]/div/div[1]/div/span')
driver.execute_script("arguments[0].click();", zhiwang)
time.sleep(0.5)
windows = driver.window_handles
driver.switch_to.window(windows[-1])
driver.find_element_by_id('txt_SearchText').send_keys(key)
search = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[1]/input[2]')
driver.execute_script("arguments[0].click();", search)
time.sleep(1)
try:
for i in range(1,21):
windows = driver.window_handles
driver.switch_to.window(windows[1])
download = '//*[@id="gridTable"]/table/tbody/tr['+str(i)+']/td[9]/a[1]/i'
download_click = driver.find_element_by_xpath(download)
driver.execute_script("arguments[0].click();", download_click)
time.sleep(1)
time.sleep(1)
for k in range(2,page):
page = 'page' + str(k)
driver.find_element_by_id(page).click()
for i in range(1, 21):
windows = driver.window_handles
driver.switch_to.window(windows[1])
download = '//*[@id="gridTable"]/table/tbody/tr[' + str(i) + ']/td[9]/a[1]/i'
download_click = driver.find_element_by_xpath(download)
driver.execute_script("arguments[0].click();", download_click)
time.sleep(1)
time.sleep(1)
except Exception as e:
print(e)
time.sleep(5)
driver.close()
if __name__ == '__main__':
kw = input("请输入要搜索的关键词:")
pg = int(input("请输入要下载的页数:"))
path = r'F:\论文下载'+'\\'+kw
if not os.path.exists(path):
os.makedirs(path)
try:
download(kw, pg)
except Exception as e:
print(e)