目标:TOP100榜,最受期待榜,热映口碑榜,国内票房榜,北美票房榜。
内容:图片,电影名,上映时间,主演人员,电影链接,电影评分,总点评人数,想看人数,已看人数。
用到的一些Maven依赖:
com.alibaba
fastjson
1.2.58
org.apache.httpcomponents
httpclient
4.5.10
org.jsoup
jsoup
1.11.3
org.apache.poi
poi
3.16
public class Mao {
private String picLink;//电影图片链接
private String movie;//电影名
private String releaseTime;//上映时间
private String star;//参演人员
private String movieLink;//链接
private String score;//电影评分
private String snum;
private String watched;
private String num;
public Mao(String picLink,String movie,String releaseTime,String star,String movieLink,String score,String snum,String watched,String num){
this.picLink = picLink;
this.movie = movie;
this.releaseTime = releaseTime;
this.star = star;
this.movieLink = movieLink;
this.score = score;
this.snum = snum;
this.watched = watched;
this.num = num;
}
public List getComment(String movieLink){
List list = new ArrayList<>(3);
String movieId = movieLink.substring(movieLink.lastIndexOf("/")+1,movieLink.length());
String request = "http://m.maoyan.com/asgard/asgardapi/review/realtime/data.json?movieId="+movieId;
HttpClient client = new DefaultHttpClient();
HttpGet httpget = new HttpGet(request);
HttpResponse response = null;
try{
response = client.execute(httpget);
if(response.getStatusLine().getStatusCode()==200){
HttpEntity entity = response.getEntity();
if(entity !=null){
String body = EntityUtils.toString(entity,"UTF-8");
JSONObject jsobject = JSON.parseObject(body);
JSONObject data = jsobject.getJSONObject("data");
String snum = data.getString("snum");
String watched = data.getString("watched");
String wish = data.getString("wish");
list.add(snum);//总点评人数
list.add(watched);//想观看人数
list.add(wish);//看过人数
}
}
}catch(Exception e){
System.out.println("处理:"+request+"失败,返回状态码:"+response.getStatusLine().getStatusCode());
}
return list;
}
在Opera浏览器中右键单击检查元素,找到Audits下的Devices,将Desktop改为Mobile或IE浏览器中点击F12,将桌面切换为Windows Phone,刷新浏览器
找到数据接口http://m.maoyan.com/asgard/asgardapi/review/realtime/data.json?movieId=1218029
public class Spildermao implements Runnable{
String request;
public Spildermao(String request){
this.request = request;
}
public void run(){
try{
Document doc = Jsoup.connect(request).get();
Elements elements = doc.select(".board-wrapper > dd");
for(int i = 0;i< elements.size();i++){
String src = elements.get(i).select(".board-img").attr("data-src");
String picLink = src.substring(0,src.lastIndexOf("@"));//图片链接
String st = elements.get(i).select(".star").text();
String star = st.substring(st.indexOf(":")+1,st.length());
String re = elements.get(i).select(".releasetime").text();
String releaseTime = re.substring(re.indexOf(":")+1,re.length());
String movie = elements.get(i).select(".name").text();
String movieLink = elements.get(i).select(".name > a").attr("abs:href");
String score = elements.get(i).select(".score").text();
List list = getComment(movieLink);
String snum = list.get(0);
String watched = list.get(1);
String wish = list.get(2);
lists.add(new Mao(picLink,movie,releaseTime,star,movieLink,score,snum,watched,wish));
}
writeToExcel();
}catch(Exception e){
System.out.println("链接:"+request+",处理失败");
}
}
}
public void writeToExcel(){
FileOutputStream fos;
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet("zhangling");
HSSFRow row = sheet.createRow(0);
//设置列宽,POI中的字符宽度算法是:
//double 宽度 = (字符个数 * (字符宽度 - 1) + 5) / (字符宽度 - 1) * 256,然后四舍五入
sheet.setColumnWidth((short)0,(short)(20*256));
sheet.setColumnWidth((short)1,(short)(30*256));
sheet.setColumnWidth((short)2,(short)(30*256));
sheet.setColumnWidth((short)3,(short)(30*256));
sheet.setColumnWidth((short)4,(short)(30*256));
sheet.setColumnWidth((short)5,(short)(15*256));
sheet.setColumnWidth((short)6,(short)(15*256));
sheet.setColumnWidth((short)7,(short)(15*256));
sheet.setColumnWidth((short)8,(short)(15*256));
HSSFCellStyle style = wb.createCellStyle();
style.setAlignment(HSSFCellStyle.ALIGN_CENTER);//水平居中
HSSFCell cell1 = row.createCell(0);
cell1.setCellValue("picture");
cell1.setCellStyle(style);
HSSFCell cell2 = row.createCell(1);
cell2.setCellValue("movie");
cell2.setCellStyle(style);
HSSFCell cell3 = row.createCell(2);
cell3.setCellValue("movieLink");
cell3.setCellStyle(style);
HSSFCell cell4 = row.createCell(3);
cell4.setCellValue("star");
cell4.setCellStyle(style);
HSSFCell cell5 = row.createCell(4);
cell5.setCellValue("releaseTime");
cell5.setCellStyle(style);
HSSFCell cell6 = row.createCell(5);
cell6.setCellValue("score");
cell6.setCellStyle(style);
HSSFCell cell7 = row.createCell(6);
cell7.setCellValue("snum");
cell7.setCellStyle(style);
HSSFCell cell8 = row.createCell(7);
cell8.setCellValue("watched");
cell8.setCellStyle(style);
HSSFCell cell9 = row.createCell(8);
cell9.setCellValue("num");
cell9.setCellStyle(style);
if(!lists.isEmpty()){
HSSFPatriarch patriarch = sheet.createDrawingPatriarch();//只能申明一次
HSSFCellStyle style1 = wb.createCellStyle();
style1.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER);//垂直居中
style1.setWrapText(true);//内容可换行
BufferedImage bufferImg;
for(int i = 0;i
源代码