主要是运用httpclient+htmlparser。
程序写得比较粗糙,抽空会写得更加完善
/**
* @author Tony Shen
*
*/
public class CompareBook {
private String bookName;
private String author;
private String publish;
private String originalPrice;
private String price;
private String desc;
private String publishDate;
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getPublish() {
return publish;
}
public void setPublish(String publish) {
this.publish = publish;
}
public String getOriginalPrice() {
return originalPrice;
}
public void setOriginalPrice(String originalPrice) {
this.originalPrice = originalPrice;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取当当图书的信息
* @author Tony Shen
*
*/
public class FecthDangDang {
private String bookName;
public FecthDangDang(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://search.dangdang.com/search.php?catalog=&key="
+ paramStr + "&SearchFromTop=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "list_r_list");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "list_r_list_h4_info3");
NodeList nodeList2 = P(nodelist, "class", "gray del");
NodeList nodeList3 = P(nodelist, "class", "red");
Node dateNode = nodeList1.elementAt(0);
Span datelink = (Span) dateNode;
book.setPublishDate(datelink.toPlainTextString());
Node originalNode = nodeList2.elementAt(0);
Span originallink = (Span) originalNode;
book.setOriginalPrice(originallink.toPlainTextString());
Node priceNode = nodeList3.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
filter = new NodeClassFilter(LinkTag.class);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(1);
LinkTag desclink = (LinkTag) descNode;
book.setDesc(desclink.getLinkText());
Node nameNode = nodelist.elementAt(4);
LinkTag namelink = (LinkTag) nameNode;
book.setAuthor(namelink.getLinkText());
Node publishingNode = nodelist.elementAt(5);
LinkTag publishinglink = (LinkTag) publishingNode;
book.setPublish(publishinglink.getLinkText());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取卓越图书的信息
* @author Tony Shen
*
*/
public class FetchZhuoYue {
private String bookName;
public FetchZhuoYue(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.amazon.cn/s/ref=nb_ss?url=search-alias%3Dbooks&keywords="
+ paramStr + "&Go.x=15&Go.y=13&searchKind=name";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "n2");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "saleprice");
Node priceNode = nodeList1.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
/**
* 抓取China-Pub图书的信息
* @author Tony Shen
*
*/
public class FecthChinaPub {
private String bookName;
public FecthChinaPub(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.china-pub.com/s/?key1="
+ paramStr + "&type=&pz=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "listview");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
filter = new TagNameFilter("ul");
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(0);
String[] strsStrings = descNode.toPlainTextString().trim().split("\\s+");
book.setPrice(strsStrings[0]);
return book;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
/**
* @author Tony Shen
*
*/
public class ComparePrice {
private static String bookName = "我的奋斗";
public static void main(String[] args) {
FecthDangDang dd = new FecthDangDang(bookName);
FetchZhuoYue zy = new FetchZhuoYue(bookName);
FecthChinaPub cp = new FecthChinaPub(bookName);
try {
String responseDD = dd.getResponse();
CompareBook book1 = dd.fetchData(responseDD);
System.out.println("++++++当当抓取结果+++++");
System.out.println("书名:"+book1.getBookName());
System.out.println("作者:"+book1.getAuthor());
System.out.println("出版社:"+book1.getPublish());
System.out.println("原价:"+book1.getOriginalPrice());
System.out.println("现价:"+book1.getPrice());
System.out.println("描述:"+book1.getDesc());
System.out.println(book1.getPublishDate());
String responseZY = zy.getResponse();
CompareBook book2 = zy.fetchData(responseZY);
System.out.println("++++++卓越抓取结果+++++");
System.out.println("书名:"+book2.getBookName());
System.out.println("现价:"+book2.getPrice());
String responseCP = cp.getResponse();
CompareBook book3 = cp.fetchData(responseCP);
System.out.println("++++++China-Pub抓取结果+++++");
System.out.println("书名:"+book3.getBookName());
System.out.println("现价:"+book3.getPrice());
} catch (Exception e) {
e.printStackTrace();
}
}
}
程序的运行结果