<dependency>
<groupId>org.seleniumhq.seleniumgroupId>
<artifactId>selenium-javaartifactId>
<version>3.141.59version>
<classifier>sourcesclassifier>
dependency>
<dependency>
<groupId>org.seleniumhq.seleniumgroupId>
<artifactId>selenium-apiartifactId>
<version>3.141.59version>
<classifier>sourcesclassifier>
dependency>
<dependency>
<groupId>cn.hutoolgroupId>
<artifactId>hutool-allartifactId>
<version>5.1.2version>
dependency>
语法学习参考1
语法学习参考2
驱动版本号要和chrome浏览器一致
驱动下载地址-老版本
驱动下载地址-最新版本
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.RandomUtil;
import org.openqa.selenium.By;
import org.openqa.selenium.PageLoadStrategy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
public class BossJobTest {
public static void main(String[] args) throws Exception {
String url = "https://www.zhipin.com/wuhan";
// 检测浏览器
// String url = "https://bot.sannysoft.com";
String loginComputerName="XXXXX";
String searchKey="JAVA";
int uaHeadIndex=2;
String fileNamePre = DateUtil.format(new Date(), DatePattern.PURE_DATE_FORMAT);
ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.setPageLoadStrategy(PageLoadStrategy.EAGER); // 急速加载模式
chromeOptions.addArguments("--incognito"); // 隐私窗口模式
chromeOptions.addArguments("--disable-blink-features=AutomationControlled"); //window.navigator.webdriver=false
chromeOptions.addArguments("--disable-infobars"); // 关闭Chrome正在受到自动软件的控制
// 添加UA请求头
String[] arr = {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)"};
chromeOptions.addArguments("User-Agent=" + arr[uaHeadIndex]);
System.setProperty("webdriver.chrome.driver", "C:\\Users\\"+loginComputerName+"\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
WebDriver driver = new ChromeDriver(chromeOptions);
// 设置全局等待时间
driver.manage().timeouts().implicitlyWait(15, TimeUnit.SECONDS);
// 获取当前窗口的句柄
String currentWindowHandle = driver.getWindowHandle();
try {
driver.get(url);
WebElement ipt_search = driver.findElement(By.className("ipt-search"));
ipt_search.sendKeys(searchKey);
WebElement btn_search = driver.findElement(By.cssSelector(".btn-search"));
btn_search.click();
} catch (Exception e) {
e.printStackTrace();
while (true) {
System.out.println("打开网页失败 》》》》");
Thread.sleep(5000);
}
}
do {
Thread.sleep(5000);
int nowPage=1;
int pageSize=30;
List<WebElement> job_card_wrappers = driver.findElements(By.className("job-card-wrapper"));
for (int i = 0; i < job_card_wrappers.size(); i++) {
WebElement job_card_wrapper=job_card_wrappers.get(i);
WebElement a = null;
while (null == a) {
try {
a = job_card_wrapper.findElement(By.tagName("a"));
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(">>>>> 手动操作验证码 1111");
Thread.sleep(8000);
}
String jobUrl = a.getAttribute("href");
String salary = job_card_wrapper.findElement(By.className("salary")).getText();
String jobName = job_card_wrapper.findElement(By.className("job-name")).getText();
String jobArea = job_card_wrapper.findElement(By.className("job-area-wrapper")).getText();
boolean aClickRepeat=false;
do {
try {
a.click();
} catch (Exception e) {
e.printStackTrace();
aClickRepeat=true;
Thread.sleep(6000);
System.out.println(">>>>> 点击不了 4444");
}
}while (aClickRepeat);
// 获取所有窗口的句柄
Set<String> windowHandles = driver.getWindowHandles();
// 切换到新窗口
for (String windowHandle : windowHandles) {
if (!windowHandle.equals(currentWindowHandle)) {
driver.switchTo().window(windowHandle);
break;
}
}
Thread.sleep(10000);
WebElement elementJobText = null;
while (null == elementJobText) {
try {
elementJobText = driver.findElement(By.className("job-sec-text"));
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(">>>>> 手动操作验证码 22222");
Thread.sleep(8000);
}
String jobText = elementJobText.getText();
System.out.println(jobArea + " || " + jobName + " || " + salary + " || " + jobUrl + " || " + jobText);
List<String> ll = new ArrayList<>();
ll.add(jobArea);
ll.add(jobName);
ll.add(salary);
ll.add(jobUrl);
ll.add(jobText);
ll.add(" || ");
FileUtil.writeUtf8Lines(ll,"D:\\bossJobFile\\"+fileNamePre+"\\"+(i+pageSize*(nowPage-1))+".txt");
driver.close();
driver.switchTo().window(currentWindowHandle);
}
nowPage++;
} while (driver.findElement(By.xpath("//li[class='ui-icon-arrow-right']/..")).isEnabled());
// 下一页的标签
//
while (true) {
// System.out.println("结束了!!!但是 不想关掉浏览器");
Thread.sleep(1000);
}
}
}
通过CMD 进入文件所在目录,执行以下代码
type *.txt >> total.txt
手动点击验证