java爬虫(一)用selenium驱动IE和谷歌浏览器模拟点击网页

我们公司的需求总是那么折磨人,最近要做模拟人一步步点击拼多多商品发起支付然后得到最后一步的链接。那么之前学的selenium就派上用场了,在本篇博客我将带大家学会Java爬虫的第一步,用selenium模拟人点击一个个的标签,得到我们想要的网页或者链接。

一、驱动IE浏览器

1.1 准备工作 下载ie浏览器驱动,IEDriverServer的版本号和Selenium的版本号一定要一致,然后我把IEDriverServer.exe充命名Quark.exe放在本地C:\Program Files下面(也可以写个方法把Quark.exe放在项目里启动项目时把它拷贝到C:\Program Files下面)。

    public static boolean doPrepare(String path) {
        boolean flag = false;
        ClassPathResource classPathResource = new ClassPathResource("tool/QuarkGG.exe");
        String filename = classPathResource.getFilename();

        try {
            InputStream inputStream = classPathResource.getInputStream();
            byte[] bytes = FileCopyUtils.copyToByteArray(inputStream);
            String fileName = new String(filename.getBytes("UTF-8"), "iso-8859-1");
            String filePath = path + File.separator + fileName;
            File file = new File(filePath);
            if (!file.exists()){
                FileOutputStream output = new FileOutputStream(file);
                output.write(bytes);
            }
            flag = true;
        } catch (IOException e){
            e.printStackTrace();
        }
        return flag;
    }

1.2 创建Springboot项目

1.2.1添加maven依赖

  <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-ie-driver</artifactId>
            <version>3.141.59</version>
  </dependency>

1.2.2写核心类

import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.ie.InternetExplorerDriver;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

import java.io.File;
import java.util.List;
import java.util.concurrent.TimeUnit;

@RestController
@Slf4j
public class OrderController {
    private WebDriver driver;
    private static String path = "C:\\Program Files";
    private boolean ifonce = true;

    @GetMapping("/order")
    public String openAndLogin() throws InterruptedException {
        //找到本地ie驱动
        if (StringUtils.isEmpty(path)) {
            System.out.println("没有找到Quark");
            return null;
        }
        //设置驱动位置属性
        System.setProperty("webdriver.ie.driver", path + File.separator + "Quark.exe");
        driver = new InternetExplorerDriver();

//        System.setProperty("webdriver.chrome.driver", path + File.separator + "QuarkGG.exe");
//        driver = new ChromeDriver();
        driver.manage().window().maximize();
        driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
        if (driver == null) {
            return null;
        }
        //TODO 模拟手点
        driver.get("https://mobile.yangkeduo.com/goods2.html?goods_id=7419459408&page_from=23&share_uin=S7GZQW52Z3VAWAYRWXMUOOWTI4_GEXDA&refer_share_id=d04900312af648d38e3d3cc3a49f0f1f&refer_share_uid=4480713700&refer_share_channel=");

        //TODO 窗口切换
//        if(ifonce) {
//            System.out.println("URL1:" + driver.getCurrentUrl());
//            String url1=driver.getCurrentUrl();
//            Set winHandels = driver.getWindowHandles();
//            List it = new ArrayList(winHandels);
//            driver.switchTo().window((String) it.get(0));
//            Thread.sleep(1000);
//            if(driver.getCurrentUrl().equals(url1)){
//                driver.switchTo().window((String) it.get(1));
//            }
//            System.out.println("URL2:" + driver.getCurrentUrl());
//            ifonce=false;
//        }
        Thread.sleep(500);
        //找到购买标签,点击
        driver.findElement(By.className("goods-buy-price")).click();
        Thread.sleep(1000);
        //选择商品属性,点击
        List<WebElement> list = driver.findElements(By.className("sku-spec-value"));
        for (WebElement element : list) {
            Thread.sleep(500);
            element.click();
        }
        Thread.sleep(1000);

        //点击购买,点击
        driver.findElement(By.className("sku-selector-bottom")).click();
        Thread.sleep(1000);
        //得到购买链接
        String url = driver.getCurrentUrl();
        log.info(url);
        return url;
    }
}

1.3 测试,启动项目浏览器地址栏输入:localhost:8080/order
java爬虫(一)用selenium驱动IE和谷歌浏览器模拟点击网页_第1张图片
最后得到支付链接, 搞定。

二、驱动谷歌浏览器

2.1下载驱动,本地谷歌浏览器和驱动的版本匹配很苛刻,根据下面图片来,我是讲下载好的ChromeSetup.exe重命名QuarkGG,然后放C:\Program Files下面。
java爬虫(一)用selenium驱动IE和谷歌浏览器模拟点击网页_第2张图片
2.2.1 在Springboot项目中添加谷歌驱动依赖

<dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-chrome-driver</artifactId>
            <version>3.141.59</version>
        </dependency>

2.2.2 把核心类的驱动换到谷歌驱动

import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.ie.InternetExplorerDriver;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.File;
import java.util.List;
import java.util.concurrent.TimeUnit;

@RestController
@Slf4j
public class OrderController {
    private WebDriver driver;
    private static String path = "C:\\Program Files";
    private boolean ifonce = true;

    @GetMapping("/order")
    public String openAndLogin() throws InterruptedException {
        //找到本地ie驱动
        if (StringUtils.isEmpty(path)) {
            System.out.println("没有找到Quark");
            return null;
        }
        //设置驱动位置属性
//        System.setProperty("webdriver.ie.driver", path + File.separator + "Quark.exe");
//        driver = new InternetExplorerDriver();

        System.setProperty("webdriver.chrome.driver", path + File.separator + "QuarkGG.exe");
        driver = new ChromeDriver();
        driver.manage().window().maximize();
        driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
        if (driver == null) {
            return null;
        }
        //TODO 模拟手点
        driver.get("https://mobile.yangkeduo.com/goods2.html?goods_id=7419459408&page_from=23&share_uin=S7GZQW52Z3VAWAYRWXMUOOWTI4_GEXDA&refer_share_id=d04900312af648d38e3d3cc3a49f0f1f&refer_share_uid=4480713700&refer_share_channel=");

        //TODO 窗口切换
//        if(ifonce) {
//            System.out.println("URL1:" + driver.getCurrentUrl());
//            String url1=driver.getCurrentUrl();
//            Set winHandels = driver.getWindowHandles();
//            List it = new ArrayList(winHandels);
//            driver.switchTo().window((String) it.get(0));
//            Thread.sleep(1000);
//            if(driver.getCurrentUrl().equals(url1)){
//                driver.switchTo().window((String) it.get(1));
//            }
//            System.out.println("URL2:" + driver.getCurrentUrl());
//            ifonce=false;
//        }
        Thread.sleep(500);
        //找到购买标签,点击
        driver.findElement(By.className("goods-buy-price")).click();
        Thread.sleep(1000);
        //选择商品属性,点击
        List<WebElement> list = driver.findElements(By.className("sku-spec-value"));
        for (WebElement element : list) {
            Thread.sleep(500);
            element.click();
        }
        Thread.sleep(1000);

        //点击购买,点击
        driver.findElement(By.className("sku-selector-bottom")).click();
        Thread.sleep(1000);
        //得到购买链接
        String url = driver.getCurrentUrl();
        log.info(url);
        return url;
    }
}

2.3 测试,启动项目浏览器地址栏输入:localhost:8080/order
java爬虫(一)用selenium驱动IE和谷歌浏览器模拟点击网页_第3张图片
测试成功。

你可能感兴趣的:(爬虫)