提取meta中的charset值,得到正确的网页源码


import java.io.*;
import java.io.IOException;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;

public class GetSample {

	public static void main(String[] args) {
		HttpClient httpClient = new HttpClient();
		GetMethod getMethod = new GetMethod("http://www.baidu.com");
		getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
				new DefaultHttpMethodRetryHandler());
		try {
			int statusCode = httpClient.executeMethod(getMethod);
			if (statusCode != HttpStatus.SC_OK) {
				System.err.println("Method failed: "
						+ getMethod.getStatusLine());
			}
			byte[] responseBody = getMethod.getResponseBody();
			byte[] btemp=new byte[200];
			for (int i = 0; i < btemp.length; i++) {
				btemp[i]=responseBody[i];
			}
			
			String temp=new String(btemp);
			int start =temp.indexOf("charset");
			System.out.println(start);
			int end =temp.indexOf("\"",start);
			System.out.println(end);
			String charset=temp.substring(start+8,end);
			System.out.println(charset);
			System.out.println(temp);
			
			String sresponseBody=new String(responseBody,charset);
			System.out.println(sresponseBody)
			// 处理内容
			System.out.println("OK!");
		} catch (HttpException e) {
			// 发生致命的异常,可能是协议不对或者返回的内容有问题
			System.out.println("Please check your provided http address!");
			e.printStackTrace();
		} catch (IOException e) {
			// 发生网络异常
			e.printStackTrace();
		} finally {
			// 释放连接
			getMethod.releaseConnection();
		}
	}
}




这样得到了正确的网页源码是不是很麻烦啊,大家帮我改进下代码和效率好么?或者有什么好的java工具类么?

你可能感兴趣的:(java,apache,网络协议)