selenium+python爬虫实现-爬取兼职猫兼职信息

侵删,仅供学习使用

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
URL = "https://guangzhou.jianzhimao.com/dbx_zbx_5/"
class main(object):
    def __init__(self):
        self.driver = webdriver.Chrome()
        #调整浏览器分辨率大小
        self.driver.set_window_size(1920, 1080)
        self.driver.get(URL) 
        #等待浏览器加载完成                 
        self.driver.implicitly_wait(5)    
    def run(self):
    #获取页面大小
        elementPage = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/div/ul/li/a")
        elementPageCount = len(elementPage)
        print(elementPageCount)
        if elementPageCount == 1:
            elementPageCount = 2
    #从第一页到最后一页
        for j in range(2,elementPageCount):
            #获取兼职标题个数
            elementTitle = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li/a")
            elementTitleCount = len(elementTitle)
            print(elementTitleCount)
    #从第一条到第最后一条
            for i in range(1,elementTitleCount+1):
                print(i)
                if i>=9:
                    autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a')
                    self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)
                time.sleep(1)
                self.driver.find_element_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a").click()
                time.sleep(2)
                self.driver.switch_to_window(self.driver.window_handles[1])
                #标题
                title = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/h1').text
                #企业名
                company_name = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/p').text
                time.sleep(0.5)
                #招人数量
                use_num = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[1]/span[2]').text
                #工作详情
                description = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/div[1]/div[2]').text
                #结算方式
                way = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[1]/span[2]').text
                #薪酬
                wage = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[2]/span[2]').text
                #上班时间
                timee = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[2]/li[2]/span[2]').text
                #上班地点
                place = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[2]/span[2]').text
                time.sleep(1)
                self.driver.close()
                time.sleep(2)
                self.driver.switch_to_window(self.driver.window_handles[0])
            autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li[1]')
            self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)   
            if elementPageCount != 2:           
                self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li['+str(j)+']').click()  
if __name__=='__main__':
    mn = main()
    mn.run()     

 

你可能感兴趣的:(Python)