Java网络爬虫crawler4j学习笔记 AuthInfo类

源代码

package edu.uci.ics.crawler4j.crawler.authentication;

import javax.swing.text.html.FormSubmitEvent.MethodType;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * Created by Avi Hayun on 11/23/2014.
 *
 * Abstract class containing authentication information needed to login into a user/password protected site
* This class should be extended by specific authentication types like form authentication and basic authentication etc
*
* This class contains all of the mutual authentication data for all authentication types */
// 用于登录验证的抽象类,主要包括form和base的authentication。 public abstract class AuthInfo { public enum AuthenticationType { BASIC_AUTHENTICATION, FORM_AUTHENTICATION } protected AuthenticationType authenticationType; //验证类型 protected MethodType httpMethod; // 包括MethodType.GET和MethodType.Post protected String protocol; // 协议 protected String host; // 主机域名 protected String loginTarget; // path protected int port; // 端口 protected String username; protected String password; /** Constructs a new AuthInfo. */ public AuthInfo() { } /** * This constructor should only be used by extending classes * * @param authenticationType Pick the one which matches your authentication * @param httpMethod Choose POST / GET * @param loginUrl Full URL of the login page * @param username Username for Authentication * @param password Password for Authentication * * @throws MalformedURLException Make sure your URL is valid */ protected AuthInfo(AuthenticationType authenticationType, MethodType httpMethod, String loginUrl, String username, String password) throws MalformedURLException { this.authenticationType = authenticationType; this.httpMethod = httpMethod; URL url = new URL(loginUrl); this.protocol = url.getProtocol(); this.host = url.getHost(); this.port = url.getDefaultPort(); this.loginTarget = url.getFile(); this.username = username; this.password = password; } /** * @return Authentication type (BASIC, FORM) */ public AuthenticationType getAuthenticationType() { return authenticationType; } /** * * @param authenticationType Should be set only by extending classes (BASICAuthInfo, FORMAuthInfo) */ public void setAuthenticationType(AuthenticationType authenticationType) { this.authenticationType = authenticationType; } /** * * @return httpMethod (POST, GET) */ public MethodType getHttpMethod() { return httpMethod; } /** * @param httpMethod Should be set by extending classes (POST, GET) */ public void setHttpMethod(MethodType httpMethod) { this.httpMethod = httpMethod; } /** * @return protocol type (http, https) */ public String getProtocol() { return protocol; } /** * @param protocol Don't set this one unless you know what you are doing (protocol: http, https) */ public void setProtocol(String protocol) { this.protocol = protocol; } /** * @return host (www.sitename.com) */ public String getHost() { return host; } /** * @param host Don't set this one unless you know what you are doing (sets the domain name) */ public void setHost(String host) { this.host = host; } /** * @return file/path which is the rest of the url after the domain name (eg: /login.php) */ public String getLoginTarget() { return loginTarget; } /** * @param loginTarget Don't set this one unless you know what you are doing (eg: /login.php) */ public void setLoginTarget(String loginTarget) { this.loginTarget = loginTarget; } /** * @return port number (eg: 80, 443) */ public int getPort() { return port; } /** * @param port Don't set this one unless you know what you are doing (eg: 80, 443) */ public void setPort(int port) { this.port = port; } /** * @return username used for Authentication */ public String getUsername() { return username; } /** * @param username username used for Authentication */ public void setUsername(String username) { this.username = username; } /** * @return password used for Authentication */ public String getPassword() { return password; } /** * @param password password used for Authentication */ public void setPassword(String password) { this.password = password; } }

分析

如果需要登录验证,可以继承AuthInfo类来实现自定义的爬虫登录功能。

你可能感兴趣的:(网络爬虫,crawler4j)