java进行web网页抓取最常见的方式,老司机们应该都知道,那就是使用JSOUP.这个工具之前也有玩过,但是发现一个问题,比如你需要去抓取的数据,必须要登录才能抓取,如果涉及到验证码,就更难操作了.
很早以前,就用jsoup尝试实现自动下载彼岸图网的高清图,但是他们的登录方式只有第三方登录,这样只用jsoup就会很难实现.
后面发现了JXbrower工具,他可以很轻松的实现这样的需求.
下面通过抓取彼岸图网的案例开始详细介绍JX.
首先下载
https://jxbrowser.support.teamdev.com/support/home
具体如何下载这里就不详细说明了,注意一点我这里用的是 6.18版本,一定要使用6.18版本
将下载下来的压缩包解压,
获取如下两个jar包
jxbrowser-6.18.jar
jxbrowser-win32-6.18.jar
Product: JxBrowser
Version: 6.x
Licensed to:
License type: Enterprise
License info: JxBrowser License
Expiration date: 01-01-9999
Support expiration date: NO SUPPORT
Generation date: 01-01-1970
Platforms: win32/x86;win32/x64;mac/x86;mac/x64;linux/x86;linux/x64
Company name: TeamDev Ltd.
SigB: 1
SigA: 1
main代码
这里提一下,博主装过WindowBuilder插件了,具体使用参考如下链接
https://blog.csdn.net/xiaoxiao123jun/article/details/77330734
package code;
import java.awt.BorderLayout;
import java.awt.EventQueue;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.math.BigInteger;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.swing.JFrame;
import com.teamdev.jxbrowser.chromium.Browser;
import com.teamdev.jxbrowser.chromium.DownloadHandler;
import com.teamdev.jxbrowser.chromium.DownloadItem;
import com.teamdev.jxbrowser.chromium.az;
import com.teamdev.jxbrowser.chromium.dom.By;
import com.teamdev.jxbrowser.chromium.dom.DOMElement;
import com.teamdev.jxbrowser.chromium.events.DownloadEvent;
import com.teamdev.jxbrowser.chromium.events.DownloadListener;
import com.teamdev.jxbrowser.chromium.events.LoadAdapter;
import com.teamdev.jxbrowser.chromium.events.StatusEvent;
import com.teamdev.jxbrowser.chromium.events.StatusListener;
import com.teamdev.jxbrowser.chromium.swing.BrowserView;
import javax.swing.JButton;
import java.awt.Button;
import javax.swing.JMenuBar;
import javax.swing.JMenu;
import javax.swing.JPanel;
import java.awt.event.ActionListener;
import java.awt.event.ActionEvent;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.io.File;
import java.util.concurrent.*;
public class Windos {
private JFrame frame;
/**
* Launch the application.
*/
static {
try {
Field e = az.class.getDeclaredField("e");
e.setAccessible(true);
Field f = az.class.getDeclaredField("f");
f.setAccessible(true);
Field modifersField = Field.class.getDeclaredField("modifiers");
modifersField.setAccessible(true);
modifersField.setInt(e, e.getModifiers() & ~Modifier.FINAL);
modifersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
e.set(null, new BigInteger("1"));
f.set(null, new BigInteger("1"));
modifersField.setAccessible(false);
} catch (Exception e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
EventQueue.invokeLater(new Runnable() {
public void run() {
try {
Windos window = new Windos();
window.frame.setVisible(true);
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
/**
* Create the application.
*/
public Windos() {
initialize();
}
/**
* Initialize the contents of the frame.
*/
volatile boolean flag=true;
int i = 194;
private void initialize() {
frame = new JFrame();
frame.getContentPane().setEnabled(false);
final String url = "http://pic.netbian.com/tupian/1.html";
final String title = "彼岸抓图工具";
//不显示标题栏,最大化,最小化,退出按钮
//frame.setUndecorated(true);
frame.setSize(1500, 1500);
frame.getContentPane().setLayout(null);
frame.setExtendedState(JFrame.MAXIMIZED_BOTH);
frame.setLocationByPlatform(true);
frame.setVisible(true);
Browser browser = new Browser();
BrowserView view = new BrowserView(browser);
view.setBounds(152, 39, 1322, 989);
frame.getContentPane().add(view);
Lock lock= new ReentrantLock();;
JButton button = new JButton("开始抓图");
browser.setDownloadHandler(new DownloadHandler() {
//下载事件监听
public boolean allowDownload(DownloadItem download) {
File file = new File("D:/file/"+i+download.getDestinationFile().getName());
download.setDestinationFile(file);
download.addDownloadListener(new DownloadListener() {
public void onDownloadUpdated(DownloadEvent event) {
DownloadItem download = event.getDownloadItem();
File file = new File("D:/file/"+download.getDestinationFile().getName());
download.setDestinationFile(file);
if (download.isCompleted()) {
System.out.println("Download is completed!");
}
}
});
System.out.println("Dest file: " + download.getDestinationFile().getAbsolutePath());
return true;
}
});
button.addMouseListener(new MouseAdapter() {
@Override
public void mouseClicked(MouseEvent e) {
for (; i <50000; i++) {
browser.loadURL("http://pic.netbian.com/tupian/"+i+".html");
//执行JS的代码
//browser.executeJavaScript(jString);
//添加加载监听
browser.addLoadListener(new LoadAdapter() {
@Override
public void onFinishLoadingFrame(com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent event) {
while (true) {
try {
DOMElement findElement = event.getBrowser().getDocument().findElement(By.id("img"));
findElement.click();
break;
} catch (Exception e2) {
e2.printStackTrace();
try {
Thread.sleep(60*1000*5);
System.err.println("下载出错");
browser.loadURL("http://pic.netbian.com/tupian/"+i+".html");
DOMElement findElement = event.getBrowser().getDocument().findElement(By.id("img"));
findElement.click();
break;
} catch (Exception e) {
e.printStackTrace();
}
}
}
};
@Override
public void onDocumentLoadedInFrame(com.teamdev.jxbrowser.chromium.events.FrameLoadEvent event) {
};
});
try {
Thread.sleep(15000);
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
}
});
button.setBounds(49, 311, 93, 23);
frame.getContentPane().add(button);
browser.loadURL(url);
}
}
效果如下
这只是一个简单的案例,更强大的功能大家可以去看他的API.