本文采用 HttpClient 来模拟站点的登录发帖回复,介绍 HttpClient 的用法和常见问题的解决方案。
HttpClient 是 Apache Jakarta Common 下的子项目,可以用来提供支持 HTTP 协议的客户端编程工具包,模拟浏览器的行为。它提供了很多的方法来简化网络的访问,虽然大部分的功能可以使用较底层的 java.net.HttpURLConnection 来实现。例如:
- 实现了所有 HTTP 的方法( GET,POST 等)
- 支持 HTTPS 协议
- 支持代理服务器
- 自动维护 Cookies 等
我们知道, http 协议是面向无连接的,要维持会话,现在基本上都是采用基于 Cookies 的方式( Session 机制也是通过 Cookies 实现的),所以 HttpClient 的自动维护 Cookies 的方式对我们的登录发帖回复非常有用(一般网站都需要先登录再发帖回复)。
下面的 例子都是采用 commons-httpclient-3.1.jar 包来实现的(虽然 commons-httpclient-4.0 已经发布,但是代码发生了较大的重构,调用方式也发生了很大的改变)。
下载 jar 包的路径为: http://hc.apache.org/downloads.cgi
由于 httpclient 使用了 Apache Jakarta common 下的子项目 logging 和 codec ,所以也需要在 http://commons.apache.org/ 下载这两个包:
commons-logging.jar
commons-codec-1.3.jar
方法调用的时序图如下:
其中, BrowserContext 类代表浏览器上下文对象,维护 HttpClient 链接和 Cookies 。 KaixinSitePost 是负责实现开心网的具体登录发帖回复逻辑的类。
BrowserContext 的代码如下:
2 * Copyright (C): 2009
3 * @author 陈新汉
4 * Aug 24, 2009 3:09:00 PM
5 */
6
7 /** */ /**
8 * 浏览器进程上下文
9 */
10 public class BrowserContext
11 {
12 private HttpClient client; // 注意:每个站点和每个用户,对应一个单独的BrowserContext对象
13 private Cookie[] cookies = new Cookie[ 0 ]; // 维护Cookies
14 private Proxyips proxyip = null ; // 当前的代理IP
15 private Siteusers user = null ; // 当前的登录用户
16
17 public Cookie[] getCookies() {
18 return cookies;
19 }
20
21 public void setCookies(Cookie[] cookies) {
22 this .cookies = cookies;
23 }
24
25 public void addCookie(Cookie c) {
26 if (cookies != null && cookies.length > 0 ) {
27 Cookie[] others = new Cookie[cookies.length + 1 ];
28 System.arraycopy(cookies, 0 , others, 0 , cookies.length);
29 others[others.length - 1 ] = c;
30 cookies = others;
31 } else {
32 cookies = new Cookie[ 1 ];
33 cookies[ 0 ] = c;
34 }
35 }
36
37 public Proxyips getProxyip() {
38 return proxyip;
39 }
40
41 public void setProxyip(Proxyips proxyip) {
42 this .proxyip = proxyip;
43 if ( this .proxyip != null ) {
44 client.getHostConfiguration().setProxy(proxyip.getIp(),proxyip.getPort());
45 client.getParams().setAuthenticationPreemptive( true );
46 // 如果代理需要密码验证,这里设置用户名密码
47 // client.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("",""));
48 }
49 }
50
51 public HttpClient getClient() {
52 return client;
53 }
54
55 public Siteusers getUser() {
56 return user;
57 }
58
59 public void setUser(Siteusers user) {
60 this .user = user;
61 }
62
63 private BrowserContext(Site site) {
64 super ();
65 Protocol myhttps = new Protocol( " https " , new MySecureProtocolSocketFactory(), 443 );
66 Protocol.registerProtocol( " https " , myhttps);
67 client = new HttpClient();
68 client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
69 HttpConnectionManagerParams managerParams = client.getHttpConnectionManager().getParams();
70 // 设置连接超时时间(单位毫秒)
71 // managerParams.setConnectionTimeout(50000);
72 // 设置读数据超时时间(单位毫秒)
73 // managerParams.setSoTimeout(120000);
74 initForSiteVisit(site.getSite(),site.getPort(),site.getCharset());
75 }
76
77 public BrowserContext(Site site,Proxyips proxyip) {
78 this (site);
79 this .setProxyip(proxyip);
80 }
81
82 private void initForSiteVisit(String siteurl, int port,String charset) {
83 client.getHostConfiguration().setHost(siteurl, port, " http " );
84 // 解决中文乱码问题,和指定网站的页面编码一致
85 client.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset);
86 }
87
88 // 查看cookie信息
89 public void printCookies()
90 {
91 System.out.println( " ---------------Cookie---------------- " );
92 if (cookies != null ) {
93 for (Cookie c:cookies) {
94 System.out.println(c.getName() + " : " + c.getValue());
95 }
96 } else {
97 System.out.println( " 没有设置Cookies " );
98 }
99 System.out.println( " ---------------Cookie---------------- " );
100 }
101
102 public void setCommonMethodRequestHeaders(HttpMethodBase method)
103 {
104 method.setRequestHeader( " Accept " , " */* " );
105 // method.setRequestHeader("Accept-Language", "zh-cn");
106 // method.setRequestHeader("Accept-Encoding", "gzip,deflate");
107 method.setRequestHeader( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;) " );
108 // 设置非常重要
109 method.setRequestHeader( " Connection " , " Keep-Alive " );
110 }
111
112 public String redirectToURL(String url) throws IOException
113 {
114 if (url != null ) {
115 try {
116 System.out.println( " 页面重定向到: " + url);
117 String responseString = this .doCommonVisitWithURL(url);
118 // System.out.println(responseString);
119 return responseString;
120 } catch (IOException e) {
121 System.out.println( " 重定向: " + url + " 出错 " );
122 }
123 } else {
124 System.out.println( " redirect url is null " );
125 }
126 return null ;
127 }
128
129 public String doCommonVisitWithURL(String url) throws IOException {
130 GetMethod get = new GetMethod(url);
131 return this .doGet(get);
132 }
133
134 public String doPost(ExpectContinueMethod post) throws IOException
135 {
136 if (post == null )
137 return null ;
138 try
139 {
140 if (getCookies() != null ) {
141 // printCookies();
142 client.getState().addCookies(cookies);
143 post.addRequestHeader( " Cookie " ,getCookies().toString());
144 // System.out.println(post.getRequestHeader("Cookie").getValue());
145 }
146 setCommonMethodRequestHeaders(post);
147 int statusCode = client.executeMethod(post);
148 cookies = client.getState().getCookies();
149 System.out.println(statusCode);
150 // System.out.println(post.getResponseHeader("Location"));
151 String responseString = post.getResponseBodyAsString();
152 System.out.println(responseString);
153 printCookies();
154 post.releaseConnection();
155 if (statusCode == 301 || statusCode == 302 ) {
156 redirectToURL(post.getResponseHeader( " Location " ).getValue());
157 }
158 return responseString;
159 }
160 finally {
161 if (post != null )
162 post.releaseConnection();
163 }
164 }
165
166 public String doGet(GetMethod get) throws IOException
167 {
168 if (get == null )
169 return null ;
170 if (cookies != null ) {
171 // printCookies();
172 client.getState().addCookies(cookies);
173 get.addRequestHeader( " Cookie " ,cookies.toString());
174 }
175 try {
176 setCommonMethodRequestHeaders(get);
177 int statusCode = client.executeMethod(get);
178 cookies = client.getState().getCookies(); // 重新保存Cookies
179 printCookies();
180 System.out.println(statusCode);
181 if (statusCode == 301 || statusCode == 302 ) {
182 redirectToURL(get.getResponseHeader( " Location " ).getValue());
183 }
184 String responseString = get.getResponseBodyAsString();
185 // System.out.println(responseString);
186 return responseString;
187 }
188 finally {
189 if (get != null )
190 get.releaseConnection();
191 }
192 }
193
194 public String getRedirectURL(String content)
195 {
196 if (content != null && content.indexOf( " window.location=\ "" )!=-1){
197 int begin = content.indexOf( " window.location=\ "" );
198 int end = content.indexOf( " \ "" , begin+17);
199 return content.substring(begin + 17 ,end);
200 }
201 return null ;
202 }
203 }
2 * Copyright (C): 2009
3 * @author 陈新汉
4 * Aug 14, 2009 11:16:32 AM
5 */
6
7 /** */ /**
8 * 模拟测试网站(不需要验证码)
9 * 开心网(www.kaixin.com)
10 */
11 public class KaixinSitePost implements ISitePost
12 {
13 private static final String LOGON_SITE = "www.kaixin.com";
14 private static final int LOGON_PORT = 80;
15 private static final String CHARSET="UTF-8";
16 private BrowserContext context=null;
17
18
19 //单个用户登录
20 public String login(Siteusers userinfo, Proxyips ip)
21 {
22 if(userinfo!=null)
23 {
24 SiteLogin login=new SiteLogin(context,"http://login.kaixin.com/Login.do");
25 if(ip!=null)
26 login.getContext().setProxyip(ip);
27 Map<String,String> params=new HashMap<String,String>();
28 params.put("ss", "10106");
29 params.put("loginregFrom", "index");
30 params.put("origURL", "http://www.kaixin.com/SysHome.do");
31 params.put("email", userinfo.getUsername());
32 params.put("password", userinfo.getUserpwd());
33 login.addRequestParameters(params);
34 return login.login(userinfo);
35 }
36 return null;
37 }
38
39 public List<Siteboards> parseBoard(Siteboards data) {
40
41 return null;
42 }
43
44 public String post(Postinfos postinfo,List<Siteboards> siteboards)
45 {
46 if(postinfo!=null && siteboards!=null){
47 SitePost sport=new SitePost(context);
48 context.getClient().getHostConfiguration().setHost("blog.kaixin.com");
49 Map<String,String> params=new HashMap<String,String>();
50 params.put("categoryId", "0");
51 params.put("blogControl", "1");
52 params.put("title", postinfo.getTitle());
53 params.put("body",postinfo.getContent());
54 sport.addRequestParameters(params);
55
56 for(Siteboards sb:siteboards){
57 sb.setPostUrl("http://blog.kaixin.com/NewEntry.do");
58 try{
59 sport.post(postinfo, sb);
60 }catch(IOException e){
61 e.printStackTrace();
62 }
63 }
64 }
65 return null;
66 }
67
68
69 public String reply(Postinfos postinfo,List<Articleinfos> arts)
70 {
71
72 return null;
73 }
74
75 /** *//**
76 * @param args
77 */
78 public static void main(String[] args)
79 {
80 try
81 {
82 Siteusers userinfo=new Siteusers();
83 userinfo.setUsername("xxxx");
84 userinfo.setUserpwd("xxxx");
85 Proxyips ips = new Proxyips();
86 ips.setIp("218.56.64.210");
87 ips.setPort(8080);
88 KaixinSitePost sp=new KaixinSitePost();
89 sp.login(userinfo,ips);
90 Postinfos post=new Postinfos();
91 post.setContent("<p>lllllllllllllllllllllll</p>");
92 post.setTitle("中文测试");
93 List<Siteboards> siteboards=new ArrayList<Siteboards>();
94 siteboards.add(new Siteboards());
95 siteboards.add(new Siteboards());
96 sp.post(post,siteboards);
97 }
98 catch(Exception e){
99 e.printStackTrace();
100 }
101 }
102}
2 * Copyright (C): 2009
3 * @author 陈新汉
4 * Aug 24, 2009 3:03:00 PM
5 */
6
7 /** */ /**
8 * 站点登录
9 */
10 public class SiteLogin extends AbstractMethodAdapter
11 {
12 private HttpMethodBase method;
13 private boolean ispost=true;
14 protected BrowserContext context; //当前的浏览器进程上下文
15
16 public BrowserContext getContext() {
17 return context;
18 }
19
20 /** *//**
21 * 构造函数
22 * @param context
23 * @param url
24 * @param ispost 设置是否POST方式提交,默认为POST
25 */
26 public SiteLogin(BrowserContext context,String url,boolean ispost) {
27 super();
28 this.context = context;
29 this.ispost=ispost;
30 method = this.ispost?new PostMethod(url):new GetMethod(url);
31 }
32
33 public SiteLogin(BrowserContext context,String url) {
34 this(context,url,true);
35 }
36
37 public String login(Siteusers user)
38 {
39 int statusCode=0;
40 if(this.ispost && this.hasRequestParameters()){
41 ((PostMethod)method).setRequestBody(this.getRequestParams());
42 }
43
44 if(this.hasExtraRequestHeaders()){
45 this.addExtraRequestHeaders(method,this.getExtraRequestHeaders());
46 }
47 context.setCommonMethodRequestHeaders(method);
48 try
49 {
50 if(context.getCookies()!=null){
51 //printCookies();
52 context.getClient().getState().addCookies(context.getCookies());
53 method.addRequestHeader("Cookie", context.getCookies().toString());
54 }
55 statusCode = context.getClient().executeMethod(method);
56 context.setCookies(context.getClient().getState().getCookies());
57 String responseString = method.getResponseBodyAsString();
58 //System.out.println(responseString);
59 method.releaseConnection();
60 if(statusCode==HttpStatus.SC_OK){
61 System.out.println("登录成功");
62 return responseString;
63 }
64 else if(statusCode==302 ||statusCode==301){
65 System.out.println("登录成功,页面重定向");
66 String url=method.getResponseHeader("Location").getValue();
67 return context.redirectToURL(url);
68 }
69 else{
70 System.out.println("登录失败,状态码:"+statusCode);
71 }
72 }catch(Exception e){
73 e.printStackTrace();
74 }finally{
75 if(method!=null)
76 method.releaseConnection();
77 }
78 return null;
79 }
80}
81
2 * Copyright (C): 2009
3 * @author 陈新汉
4 * Aug 24, 2009 5:05:55 PM
5 */
6
7 /** */ /**
8 * 站点发帖新帖
9 */
10 public class SitePost extends CommonSitePost
11 {
12
13 public SitePost(BrowserContext context) {
14 super();
15 this.context=context;
16 }
17
18 public String post(Postinfos postinfo,Siteboards siteboard) throws IOException
19 {
20 if (postinfo != null && siteboard != null) {
21 if (StringUtils.isNotEmpty(siteboard.getPostUrl())) {
22 PostMethod post = new PostMethod(siteboard.getPostUrl());
23 if(this.hasRequestParameters()){
24 post.setRequestBody(this.getRequestParams());
25 }
26 if(this.hasExtraRequestHeaders()){
27 this.addExtraRequestHeaders(post,this.getExtraRequestHeaders());
28 }
29 context.setCommonMethodRequestHeaders(post);
30 this.context.doPost(post);
31 }else{
32 System.out.println("版面的新帖提交地址不能为空!");
33 }
34 }else{
35 System.out.println("帖子或者版面信息输入都不能为空");
36 }
37 return null;
38 }
39}
40