<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.2</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
@AllArgsConstructor(staticName = "n")
public class UrlScanner {
private Document document;
public Elements getAllAHref(){
return document.select("a[href]");
}
public Elements getAllImg(){
return document.select("img[src]");
}
public Elements getAllMedia(){
return document.select("[src]");
}
public Elements getAllImports(){
return document.select("link[href]");
}
}
public static void main(String[] args) throws Exception{
Document document = Jsoup.connect("https://www.baidu.com").get();
UrlScanner scanner = UrlScanner.n(document);
scanner.getAllImg().forEach(System.out::println);
scanner.getAllAHref().forEach(System.out::println);
scanner.getAllMedia().forEach(System.out::println);
scanner.getAllImports().forEach(System.out::println);
}
输出结果
<img hidefocus="true" class="index-logo-src" src="//www.baidu.com/img/dong1_a1c52951c1f40e1496b46b9ae415c121.gif" width="270" height="129" usemap="#mp">
<img hidefocus="true" class="index-logo-srcnew" src="//www.baidu.com/img/dong1_a1c52951c1f40e1496b46b9ae415c121.gif" width="270" height="129" usemap="#mp">
<img class="index-logo-src" src="//www.baidu.com/img/baidu_jgylogo3.gif" alt="到百度首页" title="到百度首页">
<img class="index-logo-srcnew" src="//www.baidu.com/img/[email protected]" alt="到百度首页" title="到百度首页">
<a href="/" id="result_logo" onmousedown="return c({'fm':'tab','tab':'logo'})"><img class="index-logo-src" src="//www.baidu.com/img/baidu_jgylogo3.gif" alt="到百度首页" title="到百度首页"><img class="index-logo-srcnew" src="//www.baidu.com/img/[email protected]" alt="到百度首页" title="到百度首页"></a>
<a href="javascript:;" name="ime_hw">手写</a>
<a href="javascript:;" name="ime_py">拼音</a>
<a href="javascript:;" name="ime_cl">关闭</a>
<a class="toindex" href="/">百度首页</a>
<a href="javascript:;" name="tj_settingicon" class="pf">设置<i class="c-icon c-icon-triangle-down"></i></a>
<a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F&sms=5" name="tj_login" class="lb" onclick="return false;">登录</a>
<a href="http://news.baidu.com" name="tj_trnews" class="mnav">新闻</a>
<a href="https://www.hao123.com" name="tj_trhao123" class="mnav">hao123</a>
<a href="http://map.baidu.com" name="tj_trmap" class="mnav">地图</a>
<a href="http://v.baidu.com" name="tj_trvideo" class="mnav">视频</a>
<a href="http://tieba.baidu.com" name="tj_trtieba" class="mnav">贴吧</a>
<a href="http://xueshu.baidu.com" name="tj_trxueshu" class="mnav">学术</a>
<a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F&sms=5" name="tj_login" class="lb" onclick="return false;">登录</a>
<a href="http://www.baidu.com/gaoji/preferences.html" name="tj_settingicon" class="pf">设置</a>
<a href="http://www.baidu.com/more/" name="tj_briicon" class="bri" style="display: block;">更多产品</a>
<a href="//www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&word=" wdfield="word" onmousedown="return c({'fm':'tab','tab':'news'})" sync="true">资讯</a>
<a href="http://tieba.baidu.com/f?kw=&fr=wwwt" wdfield="kw" onmousedown="return c({'fm':'tab','tab':'tieba'})">贴吧</a>
<a href="http://zhidao.baidu.com/q?ct=17&pn=0&tn=ikaslist&rn=10&word=&fr=wwwt" wdfield="word" onmousedown="return c({'fm':'tab','tab':'zhidao'})">知道</a>
<a href="http://music.taihe.com/search?fr=ps&ie=utf-8&key=" wdfield="key" onmousedown="return c({'fm':'tab','tab':'music'})">音乐</a>
<a href="http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=" wdfield="word" onmousedown="return c({'fm':'tab','tab':'pic'})">图片</a>
<a href="http://v.baidu.com/v?ct=301989888&rn=20&pn=0&db=0&s=25&ie=utf-8&word=" wdfield="word" onmousedown="return c({'fm':'tab','tab':'video'})">视频</a>
<a href="http://map.baidu.com/m?word=&fr=ps01000" wdfield="word" onmousedown="return c({'fm':'tab','tab':'map'})">地图</a>
<a href="http://wenku.baidu.com/search?word=&lm=0&od=0&ie=utf-8" wdfield="word" onmousedown="return c({'fm':'tab','tab':'wenku'})">文库</a>
<a href="//www.baidu.com/more/" onmousedown="return c({'fm':'tab','tab':'more'})">更多»</a>
<a id="setf" href="//www.baidu.com/cache/sethelp/help.html" onmousedown="return ns_c({'fm':'behs','tab':'favorites','pos':0})" target="_blank">把百度设为主页</a>
<a onmousedown="return ns_c({'fm':'behs','tab':'tj_about'})" href="http://home.baidu.com">关于百度</a>
<a onmousedown="return ns_c({'fm':'behs','tab':'tj_about_en'})" href="http://ir.baidu.com">About Baidu</a>
<a onmousedown="return ns_c({'fm':'behs','tab':'tj_tuiguang'})" href="http://e.baidu.com/?refer=888">百度推广</a>
<a href="http://www.baidu.com/duty/" onmousedown="return ns_c({'fm':'behs','tab':'tj_duty'})">使用百度前必读</a>
<a href="http://jianyi.baidu.com/" class="cp-feedback" onmousedown="return ns_c({'fm':'behs','tab':'tj_homefb'})">意见反馈</a>
<a id="jgwab" target="_blank" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=11000002000001">京公网安备11000002000001号</a>
<img hidefocus="true" class="index-logo-src" src="//www.baidu.com/img/dong1_a1c52951c1f40e1496b46b9ae415c121.gif" width="270" height="129" usemap="#mp">
<img hidefocus="true" class="index-logo-srcnew" src="//www.baidu.com/img/dong1_a1c52951c1f40e1496b46b9ae415c121.gif" width="270" height="129" usemap="#mp">
<img class="index-logo-src" src="//www.baidu.com/img/baidu_jgylogo3.gif" alt="到百度首页" title="到百度首页">
<img class="index-logo-srcnew" src="//www.baidu.com/img/[email protected]" alt="到百度首页" title="到百度首页">
<script type="text/javascript" src="https://ss1.bdstatic.com/5eN1bjq8AAUYm2zgoY3K/r/www/cache/static/protocol/https/jquery/jquery-1.10.2.min_65682a2.js"></script>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="search" type="application/opensearchdescription+xml" href="/content-search.xml" title="百度搜索">
<link rel="icon" sizes="any" mask href="//www.baidu.com/img/baidu_85beaf5496f291521eb75ba38eacbd87.svg">
<link rel="dns-prefetch" href="//s1.bdstatic.com">
<link rel="dns-prefetch" href="//t1.baidu.com">
<link rel="dns-prefetch" href="//t2.baidu.com">
<link rel="dns-prefetch" href="//t3.baidu.com">
<link rel="dns-prefetch" href="//t10.baidu.com">
<link rel="dns-prefetch" href="//t11.baidu.com">
<link rel="dns-prefetch" href="//t12.baidu.com">
<link rel="dns-prefetch" href="//b1.bdstatic.com">