【日常问题】jsoup爬虫代理报错java.net.UnknownHostException

问题:

java.net.UnknownHostException: www.cnblogs.com
	at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:195)
	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:366)
	at java.net.Socket.connect(Socket.java:529)
	at sun.net.NetworkClient.doConnect(NetworkClient.java:158)
	at sun.net.www.http.HttpClient.openServer(HttpClient.java:411)
	at sun.net.www.http.HttpClient.openServer(HttpClient.java:525)
	at sun.net.www.http.HttpClient.(HttpClient.java:208)
	at sun.net.www.http.HttpClient.New(HttpClient.java:291)
	at sun.net.www.http.HttpClient.New(HttpClient.java:310)
	at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:987)
	at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:923)
	at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:841)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:425)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:410)
	at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:164)
	at org.jsoup.helper.HttpConnection.get(HttpConnection.java:153)
	at jsoup.JavaInterviewQuestions.JavaInterviewTi.internetTest(JavaInterviewTi.java:42)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
	at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
	at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
	at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
	at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
	at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
	at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)

代码:
@Test
public void  internetTest() throws Exception{
	System.setProperty("http.maxRedirects", "50");
	System.getProperties().setProperty("proxySet", "true");
	System.getProperties().put("https.proxyHost", "abc.com.cn");  
	System.getProperties().put("https.proxyPort", "3128");//注意端口为String类型。
	
	String  agent="Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0";  
	Document doc = Jsoup.connect("http://www.cnblogs.com/mvc/blog/news.aspx?blogApp=kdy11")
	        .userAgent(agent)
	        .ignoreHttpErrors(true)//这个很重要 否则会报HTTP error fetching URL. Status=404
	        .timeout(3000).get();  
	if (doc!=null) {
		System.err.println(doc.body().html());  
	}
} 

解决:设置代理的协议和目标网址协议一致。


你可能感兴趣的:(jsoup,代理报错,爬虫,日常问题,爬虫)