selenium java 分析boss直聘岗位要求

pom.xml配置

        <dependency>
            <groupId>org.seleniumhq.seleniumgroupId>
            <artifactId>selenium-javaartifactId>
            <version>3.141.59version>
            <classifier>sourcesclassifier>
        dependency>

        <dependency>
            <groupId>org.seleniumhq.seleniumgroupId>
            <artifactId>selenium-apiartifactId>
            <version>3.141.59version>
            <classifier>sourcesclassifier>
        dependency>
        
       <dependency>
            <groupId>cn.hutoolgroupId>
            <artifactId>hutool-allartifactId>
            <version>5.1.2version>
        dependency>

selenium

语法学习参考1
语法学习参考2

驱动版本号要和chrome浏览器一致
驱动下载地址-老版本
驱动下载地址-最新版本

JAVA代码


import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.RandomUtil;
import org.openqa.selenium.By;
import org.openqa.selenium.PageLoadStrategy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

public class BossJobTest {
    public static void main(String[] args) throws Exception {
        String url = "https://www.zhipin.com/wuhan";
//         检测浏览器
//        String url = "https://bot.sannysoft.com";
        String loginComputerName="XXXXX";
        String searchKey="JAVA";
        int uaHeadIndex=2;
        String fileNamePre = DateUtil.format(new Date(), DatePattern.PURE_DATE_FORMAT);

        ChromeOptions chromeOptions = new ChromeOptions();
        chromeOptions.setPageLoadStrategy(PageLoadStrategy.EAGER);  //  急速加载模式
        chromeOptions.addArguments("--incognito"); // 隐私窗口模式
        chromeOptions.addArguments("--disable-blink-features=AutomationControlled"); //window.navigator.webdriver=false
        chromeOptions.addArguments("--disable-infobars"); // 关闭Chrome正在受到自动软件的控制
        //  添加UA请求头
        String[] arr = {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15",
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
                "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)"};
        chromeOptions.addArguments("User-Agent=" + arr[uaHeadIndex]);
        System.setProperty("webdriver.chrome.driver", "C:\\Users\\"+loginComputerName+"\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
        WebDriver driver = new ChromeDriver(chromeOptions);
        //  设置全局等待时间
        driver.manage().timeouts().implicitlyWait(15, TimeUnit.SECONDS);
        // 获取当前窗口的句柄
        String currentWindowHandle = driver.getWindowHandle();
        try {
            driver.get(url);
            WebElement ipt_search = driver.findElement(By.className("ipt-search"));
            ipt_search.sendKeys(searchKey);
            WebElement btn_search = driver.findElement(By.cssSelector(".btn-search"));
            btn_search.click();
        } catch (Exception e) {
            e.printStackTrace();
            while (true) {
                System.out.println("打开网页失败 》》》》");
                Thread.sleep(5000);
            }
        }
        do {
                Thread.sleep(5000);
                int nowPage=1;
                int pageSize=30;
                List<WebElement> job_card_wrappers = driver.findElements(By.className("job-card-wrapper"));
                for (int i = 0; i < job_card_wrappers.size(); i++) {
                    WebElement job_card_wrapper=job_card_wrappers.get(i);
                    WebElement a = null;
                    while (null == a) {
                        try {
                            a = job_card_wrapper.findElement(By.tagName("a"));
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                        System.out.println(">>>>> 手动操作验证码 1111");
                        Thread.sleep(8000);
                    }

                    String jobUrl = a.getAttribute("href");
                    String salary = job_card_wrapper.findElement(By.className("salary")).getText();
                    String jobName = job_card_wrapper.findElement(By.className("job-name")).getText();
                    String jobArea = job_card_wrapper.findElement(By.className("job-area-wrapper")).getText();

                    boolean aClickRepeat=false;
                    do {
                        try {
                            a.click();
                        } catch (Exception e) {
                            e.printStackTrace();
                            aClickRepeat=true;
                            Thread.sleep(6000);
                            System.out.println(">>>>> 点击不了 4444");
                        }
                    }while (aClickRepeat);

                    // 获取所有窗口的句柄
                    Set<String> windowHandles = driver.getWindowHandles();
                    // 切换到新窗口
                    for (String windowHandle : windowHandles) {
                        if (!windowHandle.equals(currentWindowHandle)) {
                            driver.switchTo().window(windowHandle);
                            break;
                        }
                    }
                    Thread.sleep(10000);
                    WebElement elementJobText = null;
                    while (null == elementJobText) {
                        try {
                            elementJobText = driver.findElement(By.className("job-sec-text"));
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                        System.out.println(">>>>> 手动操作验证码 22222");
                        Thread.sleep(8000);
                    }
                    String jobText = elementJobText.getText();
                    System.out.println(jobArea + " || " + jobName + " || " + salary + " || " + jobUrl + " || " + jobText);

                    List<String> ll = new ArrayList<>();
                    ll.add(jobArea);
                    ll.add(jobName);
                    ll.add(salary);
                    ll.add(jobUrl);
                    ll.add(jobText);
                    ll.add(" || ");

                    FileUtil.writeUtf8Lines(ll,"D:\\bossJobFile\\"+fileNamePre+"\\"+(i+pageSize*(nowPage-1))+".txt");
                    driver.close();
                    driver.switchTo().window(currentWindowHandle);
                }
                nowPage++;
            } while (driver.findElement(By.xpath("//li[class='ui-icon-arrow-right']/..")).isEnabled());
//            下一页的标签
//
            while (true) {
//                System.out.println("结束了!!!但是 不想关掉浏览器");
                Thread.sleep(1000);
            }

    }
}

合并文本信息到一个文件里

通过CMD 进入文件所在目录,执行以下代码

type *.txt >> total.txt

出现BOSS验证码

手动点击验证

你可能感兴趣的:(爬虫,selenium,java,测试工具)