jsoup爬取虎牙直播的信息

jsoup爬取虎牙直播的信息,实时得到主播名字,直播间链接,推荐位,标题,人气的数据

主要代码如下:

package main;

import java.util.ArrayList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import Entity.HuyaDetails;
import Service.HuyaImpl;
import Service.IHuya;

public class QueryHuya {
          public static void main(String[] args) {
        	  
        	System.out.println("小乌龟开始爬了。。。。");
        	IHuya ihuya = new HuyaImpl();
			Document document = null;
			String name;
			String link;
			String tag;
			String title;
			String js_num;
		
			try {
				String url="http://www.huya.com/g/";
				HuyaDetails huyaDetails = new HuyaDetails();
				String absolute = "#js-live-list > li";
				//王者荣耀版块为wzry,英雄联盟为lol,还有其他模块,修改下面的 url + " "就可以了
				document = (Document) Jsoup.connect(url + "wzry").get();
				Elements video = document.select(absolute);
	            
	            ArrayList strArray = new ArrayList ();
		for(Element v : video) {
				   name = v.select("span > span.avatar.fl > i").text();     //主播名字
				   link = v.select("a.video-info.new-clickstat").attr("href");    //直播间链接
				   tag = v.select("a.video-info.new-clickstat > em").text();    //推荐位
				   title = v.select("a.title.new-clickstat").text();      //标题
				   js_num = v.select("span > span.num > i.js-num").text();   //人气
		//		   System.out.println("name = " + name + "\t link = " + link +
		//				   "\t tag = " + tag + "\t title = " + title + "\t js_num = " + js_num);
				   huyaDetails.setName(name);
				   huyaDetails.setTag(tag);
				   huyaDetails.setTitle(title);
				   huyaDetails.setJs_num(js_num);
				   huyaDetails.setLink(link);
				   strArray.add(link);
				   System.out.println(huyaDetails.toString());
				   //储存到数据库中,不用储存的话把这行代码注释就可以了
				  ihuya.SaveDetails(huyaDetails);
				}
				System.out.println("小乌龟爬完了。。。。");
				// 写一个数组储存并遍历输出直播间链接
				for(int i = 0; i < strArray.size() ; i++) {
	    			System.out.println("第" + (i+1) + "个 : "+ strArray.get(i));
	    		}
			}catch (Exception e) {
				  e.printStackTrace();
			}
		}
}

你可能感兴趣的:(记录)