本贴已经无用了,需要爬数据的可以去携程app端,未加密可以直接请求接口获取数据 下面给出一个简单的思路:
访问页面https://m.ctrip.com/webapp/hotel/hoteldetail/dianping/7500321.html?&fr=detail&atime=20190510&days=1
请求地址:https://m.ctrip.com/restapi/soa2/16765/gethotelcomment?_fxpcqlniredt=09031069110301833861
带入json参数 ,参数可以自己去看一下:
String json = "{\"hotelId\":" + hotelId + ",\"pageIndex\":" + pageIndex + ",\"tagId\":0,\"pageSize\":10,\"groupTypeBitMap\":2,\"needStatisticInfo\":0,\"order\":0,\"basicRoomName\":\"\",\"travelType\":-1,\"head\":{\"cid\":\"09031121310402803767\",\"ctok\":\"\",\"cver\":\"1.0\",\"lang\":\"01\",\"sid\":\"8888\",\"syscode\":\"09\",\"auth\":\"\",\"extension\":[]}}";
酒店 id 和 pageIndex 传一下就行了
当此值为1就说明是最后一一页。爬取的时候注意让程序休眠一会。否则会可能限流以及封ip.
以下方法已经不可用
源码可以在我的资源中下载,谢谢!https://download.csdn.net/download/qq_39477018/10764634
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.http.*;
import org.springframework.http.HttpMethod;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import javax.swing.plaf.metal.OceanTheme;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Matcher;
/**
* Created by Administrator on 2018/9/28.
*/
public class GetHotelJudge {
private static BlockingQueue
get 或者post 请求代码 好几个类写的比较复杂,可以直接复制使用。
import javax.net.ssl.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Author: liaog
* @Date: 2018-08-01 13:28
* @description:
*/
public class JavaHttpClient extends AbstractHttpClient implements HttpClient{
protected String defaultCharset = "utf-8";
protected int timeout = 20000;
protected boolean useProxy;
protected String hostname;
protected int port;
public HttpResponse doRequest(HttpMethod method, String url, Map userHeaders, InputStream data,Map cookies) throws IOException {
URL urlObject = new URL(url);
HttpURLConnection urlConnection = openConnection(urlObject);
if (urlObject.getProtocol().equalsIgnoreCase("https")) {
HttpsURLConnection httpsURLConnection = (HttpsURLConnection) urlConnection;
prepareForHttps(httpsURLConnection);
}
String sessionid="";
if (cookies != null) {
for (Map.Entry entry : cookies.entrySet()) {
//urlConnection.addRequestProperty(entry.getKey(), entry.getValue());
sessionid=sessionid+entry.getKey()+"="+entry.getValue()+";";
}
urlConnection.addRequestProperty("Cookie", sessionid);
}
urlConnection.setRequestMethod(method.name());
urlConnection.setConnectTimeout(timeout);
urlConnection.setReadTimeout(timeout);
urlConnection.setInstanceFollowRedirects(false);
if (userHeaders != null) {
for (Map.Entry entry : userHeaders.entrySet()) {
urlConnection.addRequestProperty(entry.getKey(), entry.getValue());
}
}
if (data != null) {
int len = data.available();
urlConnection.addRequestProperty("Content-Length", String.valueOf(len));
urlConnection.setDoInput(true);
urlConnection.setDoOutput(true);
OutputStream outputStream = urlConnection.getOutputStream();
IOUtil.copyAndClose(data, outputStream);
}
urlConnection.setInstanceFollowRedirects( false );
InputStream responseInputStream = urlConnection.getInputStream();
int responseCode = urlConnection.getResponseCode();
ByteArrayOutputStream bos = new ByteArrayOutputStream(64);
IOUtil.copyAndClose(responseInputStream, bos);
Map> headers = new HashMap>(urlConnection.getHeaderFields());
String sessionId = "";
String cookieVal = "";
String key = null;
Map map=new HashMap();
for(int i = 1; (key = urlConnection.getHeaderFieldKey(i)) != null; i++){//获取cookies
if(key.equalsIgnoreCase("set-cookie")){
cookieVal = urlConnection.getHeaderField(i);
cookieVal = cookieVal.substring(0, cookieVal.indexOf(";"));
String s[]=cookieVal.split("=");
map.put(s[0],s[1]);
sessionId = sessionId + cookieVal + ";";
System.out.println("==="+cookieVal);
}
}
// System.out.println("session"+sessionId);
String redirect=urlConnection.getHeaderField( "location" );//获得302转发地址
//System.out.println("location:"+redirect);
/*if(redirect!=null){
doRequest(HttpMethod.GET,redirect,userHeaders,null,map);
}*/
return new HttpResponse(defaultCharset, responseCode, headers, bos.toByteArray(),map);
}
private HttpURLConnection openConnection(URL url) throws IOException {
if (isUseProxy()) {
return (HttpURLConnection)url.openConnection(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(getHostname(), getPort())));
} else {
return (HttpURLConnection)url.openConnection();
}
}
private void prepareForHttps(HttpsURLConnection httpsURLConnection) {
try {
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, new TrustManager[]{new AbstractHttpClient.TrustAnyTrustManager()}, secureRandom);
httpsURLConnection.setSSLSocketFactory(sslContext.getSocketFactory());
httpsURLConnection.setHostnameVerifier(new TrustAnyHostnameVerifier());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
} catch (KeyManagementException e) {
throw new RuntimeException(e);
}
}
public String getDefaultCharset() {
return defaultCharset;
}
public void setDefaultCharset(String defaultCharset) {
this.defaultCharset = defaultCharset;
}
public int getTimeout() {
return timeout;
}
public void setTimeout(int timeout) {
this.timeout = timeout;
}
public boolean isUseProxy() {
return useProxy;
}
public void setUseProxy(boolean useProxy) {
this.useProxy = useProxy;
}
public String getHostname() {
return hostname;
}
public void setHostname(String hostname) {
this.hostname = hostname;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
}
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
/**
* @Author: liaogk
* @Date: 2018-08-01 13:18
* @description:
*/
public interface HttpClient {
HttpResponse doRequest(HttpMethod method, String url, Map userHeaders, InputStream data,Map cookies)throws IOException;
byte[] doGet(String url) throws IOException;
HttpResponse doGet(String url, Map headers) throws IOException;
HttpResponse doGet2(String url, Map headers,Map cookies) throws IOException;
HttpResponse dopost(String url, Map headers,InputStream data,Map cookies) throws IOException;
}
/**
* @Author: liaogk
* @Date: 2018-08-01 13:22
* @description:
*/
public enum HttpMethod {
POST, GET, PUT, DELETE, HEADER, OPTIONS;
}
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Map;
/**
* @Author: liaog
* @Date: 2018-08-01 13:24
* @description:
*/
public class HttpResponse {
private int responseCode;
private Map> headers;
private byte[] responseData;
private String defaultCharset;
private Map cookies;
public HttpResponse(String defaultCharset, int responseCode, Map> headers, byte[] responseData,Map cookies) {
this.defaultCharset = defaultCharset;
this.responseCode = responseCode;
this.headers = headers;
this.responseData = responseData;
this.cookies = cookies;
}
public int getResponseCode() {
return responseCode;
}
public byte[] getResponseData() {
return responseData;
}
public String getResponseString() {
try {
return new String(responseData, getResponseCharset());
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public List getHeaders(String name) {
return headers.get(name);
}
public String getHeader(String name) {
List theHeaders = getHeaders(name);
if (theHeaders == null || theHeaders.isEmpty()) {
return null;
}
return theHeaders.get(0);
}
public String getResponseCharset() {
String contentType = getHeader("Content-Type");
if (contentType == null || contentType.length() == 0) {
return defaultCharset;
}
String[] parts = contentType.split(";");
for (String part : parts) {
part = part.trim();
String[] kvParts = part.split("=");
if (kvParts.length < 2) {
continue;
}
String key = kvParts[0].trim();
String value = kvParts[1].trim();
if (key.equals("charset")) {
return value;
}
}
return defaultCharset;
}
public Map getCookies() {
return cookies;
}
public void setCookies(Map cookies) {
this.cookies = cookies;
}
}
import java.io.*;
/**
* @Author: liaog
* @Date: 2018-08-01 13:39
* @description:
*/
public class IOUtil {
private static final int BUF_LEN = 1024 * 8;
/**
* 通过threadLocal做cache优化,避免重复申请内存
*/
private static final ThreadLocal bufTl = new ThreadLocal() {
@Override
protected byte[] initialValue() {
return new byte[BUF_LEN];
}
};
private static byte[] getBuf() {
return bufTl.get();
}
public static void copyAndClose(InputStream is, OutputStream os) throws IOException {
byte[] buf = getBuf();
while (true) {
int len = is.read(buf);
if (len < 0) {
break;
}
os.write(buf, 0, len);
}
close(is);
close(os);
}
public static byte[] readAsBytes(File file) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(BUF_LEN);
copyAndClose(readAsStream(file), bos);
return bos.toByteArray();
}
public static void writeBytesToFile(File file, byte[] data) throws IOException{
FileOutputStream fos = new FileOutputStream(file);
copyAndClose(new ByteArrayInputStream(data), fos);
}
public static InputStream readAsStream(File file) throws IOException {
return new FileInputStream(file);
}
public static void close(Closeable c) {
if (c != null) {
try {
c.close();
} catch (Exception e) {
// ignore
}
}
}
public static String getFileNameSuffix(String name) {
if (name == null) {
return null;
}
int pos = name.lastIndexOf('.');
if (pos < 0) {
return null;
}
return name.substring(pos);
}
}
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;
import java.io.IOException;
import java.io.InputStream;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Map;
/**
* @Author: liaogk
* @Date: 2018-08-01 13:19
* @description:
*/
public abstract class AbstractHttpClient implements HttpClient{
@Override
public abstract HttpResponse doRequest(HttpMethod method, String url, Map userHeaders, InputStream data ,Map cookies) throws IOException;
@Override
public byte[] doGet(String url) throws IOException {
return doGet(url, null).getResponseData();
}
@Override
public HttpResponse doGet(String url, Map headers) throws IOException {
return doRequest(HttpMethod.GET, url, headers, null,null);
}
@Override
public HttpResponse doGet2(String url, Map headers,Map cookies) throws IOException {
return doRequest(HttpMethod.GET, url, headers, null,cookies);
}
@Override
public HttpResponse dopost(String url, Map headers,InputStream data, Map cookies) throws IOException {
return doRequest(HttpMethod.POST, url, headers, data,cookies);
}
protected static class TrustAnyTrustManager implements X509TrustManager {
@Override
public void checkClientTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public void checkServerTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
}
protected static class TrustAnyHostnameVerifier implements HostnameVerifier {
@Override
public boolean verify(String s, SSLSession sslSession) {
return true;
}
}
protected final SecureRandom secureRandom = new SecureRandom();
}