package com.teamdev.jxbrowser.chromium.demo_sanya12.xiecheng.evment;
import java.awt.BorderLayout;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import javax.swing.JFrame;
import javax.swing.WindowConstants;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.hyjx.common.CommonUtil;
import com.hyjx.orclJdbcUtil.JDBCUtils;
import com.hyjx.xcUtil.XcTool;
import com.teamdev.jxbrowser.chromium.Browser;
import com.teamdev.jxbrowser.chromium.BrowserPreferences;
import com.teamdev.jxbrowser.chromium.JSValue;
import com.teamdev.jxbrowser.chromium.LoggerProvider;
import com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent;
import com.teamdev.jxbrowser.chromium.events.LoadAdapter;
import com.teamdev.jxbrowser.chromium.swing.BrowserView;
/**
* 携程的评论 酒店
* */
public class sy_good_xc_comment {
public static void main(String[] args) throws Exception {
//初始化浏览器
LoggerProvider.getBrowserLogger().setLevel(Level.SEVERE);
LoggerProvider.getIPCLogger().setLevel(Level.SEVERE);
LoggerProvider.getChromiumProcessLogger().setLevel(Level.SEVERE);
final Browser browser = new Browser();
BrowserView browserView = new BrowserView(browser);
BrowserPreferences preferences = browser.getPreferences();
preferences.setImagesEnabled(false);
JFrame frame = new JFrame();
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
frame.add(browserView, BorderLayout.CENTER);
frame.setSize(700, 800);
frame.setLocationRelativeTo(null);
frame.setVisible(true);
//创建数据库
java.sql.Connection conOrcale = null;
try {
conOrcale = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps = null;
String sql1 = null;
String sql2 = null;
Statement st2 = null;
ResultSet rs2 = null;
Document doc = null;
String url = null;
try{
//艺龙
sql1 = " select * from a_a_nm_good_12 where plat_name='携程' and type ='酒店' and state is null ";
sql2 = " update a_a_nm_good_12 set state = '评论已抓' where good_id = ? ";
ps = conOrcale.prepareStatement(sql2);
st2 = (java.sql.Statement) conOrcale.createStatement();
rs2 = st2.executeQuery(sql1);
}catch (Exception e) {
e.printStackTrace();
}
int i = 1;
for(;rs2.next();){
System.out.println(i);
i++;
final String good_url = rs2.getString("good_url");
String good_id = rs2.getString("good_id");
String shop_id = rs2.getString("shop_id");
search( good_url,shop_id,good_id, frame, browser);
ps.setString(1,good_id);
//ps.executeUpdate();
}
}
static void search(final String good_url, String shop_id,String good_id ,JFrame frame, final Browser browser)throws Exception{
java.sql.Connection conOrcale = null;
try {
conOrcale = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps2 = null;
try{
sql = " insert into a_a_nm_evment_12 "+
" (ev_id, shop_id, ev_time, ev_user, ev_content,good_id) "+
" values "+
" (? , ? , ? , ? , ? ,? ) ";
ps2 = conOrcale.prepareStatement(sql);
}catch (Exception e) {
e.printStackTrace();
}
System.out.println("good_url:"+good_url);
invokeAndWaitReady(browser, new Runnable() {
public void run() {
browser.loadURL(good_url);
//browser.loadURL("http://hotel.elong.com/sanya/90574280/#review");
}
});
try {
Thread.sleep(1000*1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
// //设置滚动条滚动速度
// for(int i = 1;i<10;i++){
// browser.executeJavaScriptAndReturnValue("window.scrollTo(100,"+i*200+")");
// try {
// Thread.sleep(200*1);
// } catch (InterruptedException e1) {
// e1.printStackTrace();
// }
// }
int num = 1;
//得到html和document对象
String html = browser.getHTML();
Document document = Jsoup.parse(html);
int cooment_view=document.getElementsByAttributeValue("id", "id_comment_view").size();
if(cooment_view>0){
browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('comment_view')[0].click();");
try {
Thread.sleep(2000*1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('select_sort')[0].options[1].selected=true");
browser.executeJavaScriptAndReturnValue("document.getElementById('cPageBtn').click()");
try {
Thread.sleep(2000*1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
//重新得到html和document对象
html = browser.getHTML();
document = Jsoup.parse(html);
Elements elements = document.getElementsByAttributeValue("class", "comment_block J_asyncCmt");
System.out.println("elements.size()"+elements.size());//输入条数
String EV_TIME="";
String EV_USER="";
String EV_CONTENT="";
String shijian ="";
for(Element e : elements){
Document parse = Jsoup.parse(e.html());
String EV_NUM = null;
try {
EV_USER = parse.getElementsByAttributeValue("class","name").text();
} catch (Exception e4) {
// TODO Auto-generated catch block
EV_USER = null;
}
try {
EV_CONTENT = parse.getElementsByAttributeValue("class","J_commentDetail").text();
} catch (Exception e3) {
// TODO Auto-generated catch block
EV_CONTENT = null;
}
try {
EV_TIME =parse.getElementsByAttributeValue("class","time").text();
if(EV_TIME.contains("发表于")){
EV_TIME=EV_TIME.replace("发表于", "");
}
} catch (Exception e2) {
// TODO Auto-generated catch block
EV_TIME = null;
}
shijian = EV_TIME.substring(0,7);
if( (shijian).equals("2016-12") ){
System.out.println("good_url "+good_url);
System.out.println("评论内容 "+EV_CONTENT);
System.out.println("评论人 "+EV_USER);
System.out.println("评论时间 "+EV_TIME);
System.out.println("评论分数 "+EV_NUM);
try {
//(ev_id, shop_id, ev_time, ev_user, ev_content,good_id) "+
ps2.setString(1, CommonUtil.getUUID32());
ps2.setString(2, shop_id);
ps2.setString(3, EV_TIME);
ps2.setString(4, EV_USER);
ps2.setString(5, EV_CONTENT);
ps2.setString(6, good_id);
//ps2.executeUpdate();
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
System.out.println("---------------------------------------------------------------------------------------------");
}else{return;}
}
//判断是否有下一页,如果有下一页,继续插入数据
while(jumpNext(browser)){
num++;
//超过20页的不抓
// if(num>=20){
// break;
// }
try {
Thread.sleep(2000*1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
// for(int i = 1;i<10;i++){
// browser.executeJavaScriptAndReturnValue("window.scrollTo(100,"+i*200+")");
// try {
// Thread.sleep(200*1);
// } catch (InterruptedException e1) {
// e1.printStackTrace();
// }
// }
html = browser.getHTML();
document = Jsoup.parse(html);
Elements elementss = document.getElementsByAttributeValue("class", "comment_block J_asyncCmt");
System.out.println("elements.size()"+elementss.size());//输入条数
for(Element e : elementss){
Document parse = Jsoup.parse(e.html());
try {
EV_USER = parse.getElementsByAttributeValue("class","name").text();
} catch (Exception e4) {
// TODO Auto-generated catch block
EV_USER = null;
}
try {
EV_CONTENT = parse.getElementsByAttributeValue("class","J_commentDetail").text();
} catch (Exception e3) {
// TODO Auto-generated catch block
EV_CONTENT = null;
}
try {
EV_TIME =parse.getElementsByAttributeValue("class","time").text();
if(EV_TIME.contains("发表于")){
EV_TIME=EV_TIME.replace("发表于", "");
}
} catch (Exception e2) {
// TODO Auto-generated catch block
EV_TIME = null;
}
shijian = EV_TIME.substring(0,7);
if( (shijian).equals("2016-12") ){
System.out.println("客体url:"+good_url);
System.out.println("评论内容:"+EV_CONTENT);
System.out.println("评论人: "+EV_USER);
System.out.println("评论时间:"+EV_TIME);
try {
ps2.setString(1, CommonUtil.getUUID32());
ps2.setString(2, shop_id);
ps2.setString(3, EV_TIME);
ps2.setString(4, EV_USER);
ps2.setString(5, EV_CONTENT);
ps2.setString(6, good_id);
//ps2.executeUpdate();
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
System.out.println("---------------------------------------------------------------------------------------------");
}else{return;}
}
}
}else{
return;
}
}
/**
* 首先判断是否有下一页按钮
* @param browser
* @return
*/
public static boolean jumpNext(Browser browser){
String html = browser.getHTML();
Document document = Jsoup.parse(html);
int num_page_next=0;
try {
num_page_next = document.getElementsByAttributeValue("class", "c_down").size();
} catch (Exception e) {
return false;
}
if(num_page_next>0){
System.out.println("点击下一页");
browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('c_down')[0].click()");
//browser.executeJavaScriptAndReturnValue("document.getElementsById('comment_paging').getElementsByClassName('page_next')[0].click()");
// browser.executeJavaScriptAndReturnValue("document.select('#comment_paging').getElementsByTagName('a')["+(num_pagesize-1)+"].click()");
}else{
return false;
}
return true;
}
// JSValue nextButtonNum = browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('h-pagination pb20')[0].getElementsByClassName('a')[0].length");
//
// System.out.println("下一页按钮数量:"+nextButtonNum.getNumber());
// int num = ((Number)nextButtonNum.getNumber()).intValue();
// if(num == 0){
// return false;
// }
// browser.executeJavaScriptAndReturnValue("document.getElementsByClassName('h-pagination pb20')[0].getElementsByClassName('a')[0].click()");
// return true;
//}
public static void invokeAndWaitReady(Browser browser, Runnable runnable) {
final CountDownLatch latch = new CountDownLatch(1);
LoadAdapter listener = new LoadAdapter() {
@Override
public void onFinishLoadingFrame(FinishLoadingEvent event) {
if (event.isMainFrame()) {
latch.countDown();
}
}
};
browser.addLoadListener(listener);
try {
runnable.run();
try {
if (!latch.await(60, TimeUnit.SECONDS)) {
//throw new RuntimeException(new TimeoutException());
}
} catch (InterruptedException ignore) {
ignore.printStackTrace();
Thread.currentThread().interrupt();
}
} finally {
browser.removeLoadListener(listener);
}
}
}