htmlunit 获取不到 某些页面form 表单, 来讨论一下

下面代码中 能获取到百度页面的Form 表单, 但是获取不到另外一个 网页的表单, 此时该如何处理?

ps:获取另外一个网页的源码保存html后,用浏览器打开,能看到 form 表单。


package com.xttx.cn.fetchpro.fetchImp;

import java.io.IOException;
import java.net.URL;
import java.util.List;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.CollectingAlertHandler;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.HttpWebConnection;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.xttx.cn.fetchpro.exception.PageNotFoundException;

public class SimulationFetch {
	protected static WebClient webClient = null;
	public  WebClient getWebClient() {
		return webClient;
	}

	public  void setWebClient(WebClient webClient) {
		this.webClient = webClient;
	}
	static WebRequest request = null;

	public static void main(String[] args) throws Exception{
		// TODO Auto-generated method stub
		BrowserVersion.FIREFOX_24.setBrowserLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setSystemLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setUserLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0");
		BrowserVersion.FIREFOX_24.setBrowserVersion(46.0f);
		BrowserVersion.FIREFOX_24.setCpuClass("x64");
		webClient = new WebClient(BrowserVersion.FIREFOX_24);

		webClient.setWebConnection(new HttpWebConnection(webClient));
		webClient.getCache().clear();
		webClient.getCookieManager().clearCookies();
		webClient.getOptions().setJavaScriptEnabled(true);
		webClient.setJavaScriptTimeout(60*1000);
		webClient.getOptions().setCssEnabled(true);
		webClient.getOptions().setActiveXNative(true);
		webClient.getOptions().setPopupBlockerEnabled(true);
		webClient.getOptions().setRedirectEnabled(true);
		webClient.getOptions().setTimeout(10000);
		webClient.getOptions().setDoNotTrackEnabled(true);
		webClient.getCookieManager().setCookiesEnabled(true);
		webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
		webClient.getOptions().setThrowExceptionOnScriptError(false);
		webClient.getOptions().setUseInsecureSSL(true);
		webClient.getOptions().setSSLInsecureProtocol("TLSv1.2");
		webClient.setAjaxController(new NicelyResynchronizingAjaxController());
		webClient.setAlertHandler(new CollectingAlertHandler());
		
		
		HtmlPage loginPageWithForm =(HtmlPage)webClient.getPage(new WebRequest(new URL("https://login.youzan.com/sso/index?service=kdt&from_source=pzshouye")));
		List htmlForm0 = loginPageWithForm.getForms();
			
		HtmlPage loginPageNoForm = (HtmlPage)webClient.getPage(new WebRequest(new URL("https://www.baidu.com/")));
		HtmlForm htmlForm = loginPageNoForm.getForms().get(0);
			
		System.out.println("---");
		
	}
}

你可能感兴趣的:(JAVA)