java娱乐-利用JXBrower进行网页数据抓取

java进行web网页抓取最常见的方式,老司机们应该都知道,那就是使用JSOUP.这个工具之前也有玩过,但是发现一个问题,比如你需要去抓取的数据,必须要登录才能抓取,如果涉及到验证码,就更难操作了.
很早以前,就用jsoup尝试实现自动下载彼岸图网的高清图,但是他们的登录方式只有第三方登录,这样只用jsoup就会很难实现.
后面发现了JXbrower工具,他可以很轻松的实现这样的需求.

下面通过抓取彼岸图网的案例开始详细介绍JX.
首先下载
https://jxbrowser.support.teamdev.com/support/home
具体如何下载这里就不详细说明了,注意一点我这里用的是 6.18版本,一定要使用6.18版本

将下载下来的压缩包解压,
获取如下两个jar包
jxbrowser-6.18.jar
jxbrowser-win32-6.18.jar

项目结构如下:
java娱乐-利用JXBrower进行网页数据抓取_第1张图片
新建文件 teamdev.licenses

Product: JxBrowser
Version: 6.x
Licensed to:
License type: Enterprise
License info: JxBrowser License
Expiration date: 01-01-9999
Support expiration date: NO SUPPORT
Generation date: 01-01-1970
Platforms: win32/x86;win32/x64;mac/x86;mac/x64;linux/x86;linux/x64
Company name: TeamDev Ltd.
SigB: 1
SigA: 1

main代码

这里提一下,博主装过WindowBuilder插件了,具体使用参考如下链接
https://blog.csdn.net/xiaoxiao123jun/article/details/77330734

package code;

import java.awt.BorderLayout;
import java.awt.EventQueue;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.math.BigInteger;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import javax.swing.JFrame;

import com.teamdev.jxbrowser.chromium.Browser;
import com.teamdev.jxbrowser.chromium.DownloadHandler;
import com.teamdev.jxbrowser.chromium.DownloadItem;
import com.teamdev.jxbrowser.chromium.az;
import com.teamdev.jxbrowser.chromium.dom.By;
import com.teamdev.jxbrowser.chromium.dom.DOMElement;
import com.teamdev.jxbrowser.chromium.events.DownloadEvent;
import com.teamdev.jxbrowser.chromium.events.DownloadListener;
import com.teamdev.jxbrowser.chromium.events.LoadAdapter;
import com.teamdev.jxbrowser.chromium.events.StatusEvent;
import com.teamdev.jxbrowser.chromium.events.StatusListener;
import com.teamdev.jxbrowser.chromium.swing.BrowserView;
import javax.swing.JButton;
import java.awt.Button;
import javax.swing.JMenuBar;
import javax.swing.JMenu;
import javax.swing.JPanel;
import java.awt.event.ActionListener;
import java.awt.event.ActionEvent;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.io.File;
import java.util.concurrent.*;
public class Windos {

	private JFrame frame;
	/**
	 * Launch the application.
	 */
    static {
        try {
            Field e = az.class.getDeclaredField("e");
            e.setAccessible(true);
            Field f = az.class.getDeclaredField("f");
            f.setAccessible(true);
            Field modifersField = Field.class.getDeclaredField("modifiers");
            modifersField.setAccessible(true);
            modifersField.setInt(e, e.getModifiers() & ~Modifier.FINAL);
            modifersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
            e.set(null, new BigInteger("1"));
            f.set(null, new BigInteger("1"));
            modifersField.setAccessible(false);
        } catch (Exception e1) {
            e1.printStackTrace();
        }
    }
	public static void main(String[] args) {
		EventQueue.invokeLater(new Runnable() {
			public void run() {
				try {
					Windos window = new Windos();
					window.frame.setVisible(true);
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		});
	}

	/**
	 * Create the application.
	 */
	public Windos() {
		initialize();
	}

	/**
	 * Initialize the contents of the frame.
	 */
	   volatile boolean flag=true;
	   int i = 194;
	private void initialize() {
		frame = new JFrame();
		frame.getContentPane().setEnabled(false);
	 	final String url = "http://pic.netbian.com/tupian/1.html";  
        final String title = "彼岸抓图工具"; 
     
  
        //不显示标题栏,最大化,最小化,退出按钮
        //frame.setUndecorated(true);  
        frame.setSize(1500, 1500);
        frame.getContentPane().setLayout(null);
       
        frame.setExtendedState(JFrame.MAXIMIZED_BOTH);  
        frame.setLocationByPlatform(true);  
        frame.setVisible(true); 
        Browser browser = new Browser();  
        BrowserView view = new BrowserView(browser);  
        view.setBounds(152, 39, 1322, 989);
        frame.getContentPane().add(view);
        Lock lock=  new ReentrantLock();;
        JButton button = new JButton("开始抓图");
    	browser.setDownloadHandler(new DownloadHandler() {
    		//下载事件监听
		    public boolean allowDownload(DownloadItem download) {
		    
		    	 File file = new File("D:/file/"+i+download.getDestinationFile().getName());
	                download.setDestinationFile(file);
		        download.addDownloadListener(new DownloadListener() {
		            public void onDownloadUpdated(DownloadEvent event) {
		                DownloadItem download = event.getDownloadItem();
		                File file = new File("D:/file/"+download.getDestinationFile().getName());
		                download.setDestinationFile(file);
		                if (download.isCompleted()) {
		                    System.out.println("Download is completed!");
		                }
		            }
		        });
		        System.out.println("Dest file: " + download.getDestinationFile().getAbsolutePath());
		        return true;
		    }
		});
        button.addMouseListener(new MouseAdapter() {
        	@Override
        	public void mouseClicked(MouseEvent e) {

        		for (; i <50000; i++) {
							browser.loadURL("http://pic.netbian.com/tupian/"+i+".html");
							//执行JS的代码
		        			//browser.executeJavaScript(jString);
		        			//添加加载监听
							browser.addLoadListener(new LoadAdapter() {
								@Override
								public void onFinishLoadingFrame(com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent event) {
									while (true) {
										try {
											DOMElement findElement = event.getBrowser().getDocument().findElement(By.id("img"));
											findElement.click();
											break;
										} catch (Exception e2) {
											e2.printStackTrace();
											try {
												Thread.sleep(60*1000*5);
												System.err.println("下载出错");
												browser.loadURL("http://pic.netbian.com/tupian/"+i+".html");
												DOMElement findElement = event.getBrowser().getDocument().findElement(By.id("img"));
												findElement.click();
												break;
											} catch (Exception e) {
												e.printStackTrace();
											}
										}
									}
									
								
									
								};
								@Override
								public void onDocumentLoadedInFrame(com.teamdev.jxbrowser.chromium.events.FrameLoadEvent event) {
							};
							});
						
						
							try {
								Thread.sleep(15000);
							} catch (InterruptedException e1) {
								// TODO Auto-generated catch block
								e1.printStackTrace();
							}
				}
        	}
        });
        button.setBounds(49, 311, 93, 23);
        frame.getContentPane().add(button);
        browser.loadURL(url);  
	}
}

效果如下


java娱乐-利用JXBrower进行网页数据抓取_第2张图片

这只是一个简单的案例,更强大的功能大家可以去看他的API.

你可能感兴趣的:(娱乐)