获取豆瓣榜单TOP250电影的网站,然后通过网站分析每个电影网页,提取与电影有关的电影网站、评论、海报、上映日期、电影简介等信息。然后将这些信息显示到UI界面上。并且支持输入一个电影的网站然后解析该网站。直接根据电影名称等搜索电影,把搜索到的信息显示到UI界面上。
整体代码链接
1. 自动获取指定数目的豆瓣榜单TOP250电影网站,并且自动对每个电影网站进行解析获得相应的电影名、海报、评论等信息。
2. 在UI界面搜索框中输入一个电影的网站即可解析该网站,并把解析的内容更新UI界面。
3. 下载搜索到的所有海报到工程根目录下的image文件夹,并自动分类。
4. 利用Selenium 插件自动控制Google Chrome 进行全网搜索,只需要输入不完整的电影名即可实现(要求必须有Google Chrome浏览器)。
豆瓣电影 Top 250
主要获取的电影信息有:网站、中文名、英文名、评分等。。
package Courses.GrabData;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//电影详情页面可解析信息如下:电影id,电影中文名,电影别名,电影豆瓣 评分,看过人数,想看人数,评分人数,5张海报图,条目分类,导演,主演,编 剧,官方网站,豆瓣小站,上映时间日期,年代,语言,电影时长,影片类型,
/**
* 爬取一个电影详情页面。支持多线程
* @author Administrator
*
*/
public class DianYingXiangQing implements Runnable{
public String wangzhan = ""; //网站
public String zhongwenname = "";//中文名
public String yingwenname = "";//英文名
public String daoyanname = ""; //导演*
public String bianjuname = "";//编剧*
public String zhuyanname = ""; //主演*
public String shangyingriqi = ""; //上映日期
public String pingjiarenshu = ""; //评价人数
public String pingjiafenshu = ""; //评价分数
public String leixing = ""; //类型*
public String jianjie = ""; //简介
public String haibaourl = ""; //海报网站
public String duanpingurl = ""; //全部短评网站
public int duanpingcount = 0; //短评数量
public ArrayList<String> list_haibao = new ArrayList<>();
public ArrayList<String> list_duanping = new ArrayList<>();
public boolean imagebaocun = false; //是否保存图片为本地文件,如果保存的话读取海报时直接本地读取,不需要去网站下载,节约时间,但占较大的的磁盘空间
public DianYingXiangQing(String wz){
wangzhan = wz;
//run();
}
@Override
/**
* 程序运行开始爬取电影详情
*/
public void run(){
String lie = "";
Scanner input = null;
Pattern r;
Matcher m;
try {
URL url = new URL(wangzhan);
input = new Scanner(url.openStream(),"UTF-8");
}
catch (java.net.MalformedURLException ex){
System.out.println("Invalid URL"+ lie);
}
catch (java.io.IOException ex){
System.out.println("I/O Errors: no such file" + lie);
}
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile("");
m = r.matcher(lie);
if(m.find()) break;
if(daoyan || bianju || zhuyan) {
if(daoyan)
r = Pattern.compile("director");
else if(bianju)
r = Pattern.compile("author");
else
r = Pattern.compile("actor");
m = r.matcher(lie);
if(m.find()) {
int geshu = 0;
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile("]");
m = r.matcher(lie);
if(m.find())
break;
r = Pattern.compile("\"name\": \"([^\"]*)");
m = r.matcher(lie);
if(m.find()) {
geshu++;
if(daoyan){
if(geshu>1)
daoyanname += " / " + m.group(1);
else
daoyanname += m.group(1);
}
else if(bianju){
if(geshu>1)
bianjuname += " / " + m.group(1);
else
bianjuname += m.group(1);
}
else if(zhuyan){
if(geshu>1)
zhuyanname += " / " + m.group(1);
else
zhuyanname += m.group(1);
}
}
}
if(daoyan)
daoyan = false;
else if(bianju)
bianju = false;
else if(zhuyan)
zhuyan = false;
}
}
else {
r = Pattern.compile("\"datePublished\": \"([^\"]*)");
m = r.matcher(lie);
if(m.find())
shangyingriqi = m.group(1);
if(shangyingriqi.length()>0) {
r = Pattern.compile("\"ratingCount\": \"([^\"]*)\"");
m = r.matcher(lie);
if(m.find())
pingjiarenshu = m.group(1);
}
if(pingjiarenshu.length()>0) {
r = Pattern.compile("\"ratingValue\": \"([^\"]*)\"");
m = r.matcher(lie);
if(m.find())
pingjiafenshu = m.group(1);
}
}
}
//获取海报5张即可
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile(");
m = r.matcher(lie.length()>16?lie.substring(0,16):lie);
if(m.find()) {
lie = input.nextLine();
r = Pattern.compile("href=\"([^\"]*)");
m = r.matcher(lie);
if(m.find()) {
haibaourl = m.group(1);
gethaibao(haibaourl,10);
if(imagebaocun)
ImageDownload();
}
break;
}
}
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile(" 类型:");
m = r.matcher(lie.length()>35?lie.substring(0,35):lie);
if(m.find()){
int fromIndex = 0;
while(true){
int count1 = lie.indexOf("", fromIndex);
if(count1 == -1)
break;
int count2 = lie.indexOf("", count1);
count1+=25;
if(fromIndex != 0)
leixing += "/" + lie.substring(count1, count2);
else
leixing += lie.substring(count1, count2);
fromIndex = count2;
}
break;
}
}
//简介
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile(");
m = r.matcher(lie.length()>55?lie.substring(0,55):lie);
if(m.find()) {
jianjie = input.nextLine();
input.nextLine();
jianjie += input.nextLine();
break;
}
}
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile(" ");
m = r.matcher(lie.length()>31?lie.substring(0,31):lie);
if(m.find()) {
break;
}
}
//获取100条评论,按热度排序的
while(input.hasNext()){
lie = input.nextLine();
r = Pattern.compile(");
m = r.matcher(lie);
if(m.find()) {
duanpingurl = m.group(1);
duanpingcount = 100;
getduanping(duanpingurl,100);
break;
}
}
}
/**
* 获取短评,内部调用
* @param s
* @param shumu
*/
void getduanping(String s,int shumu) {
if(shumu<=0) return;
String lie = "";
Scanner input = null;
Pattern r;
Matcher m;
int count_zhang=0;
try {
URL url = new URL(s);
input = new Scanner(url.openStream(),"UTF-8");
}
catch (java.net.MalformedURLException ex){
System.out.println("Invalid URL");
}
catch (java.io.IOException ex){
System.out.println("I/O Errors: no such file");
}
while(input.hasNext()) {
lie = input.nextLine();
if(lie.length() == 38 && lie.equals(" "))
break;
}
while(input.hasNext()) {
lie = input.nextLine();
r = Pattern.compile("([^<]*)");
m = r.matcher(lie);
if(m.find()) {
list_duanping.add(m.group(1));
count_zhang++;
if(shumu == count_zhang)
return;
}
else {
if(lie.equals(" ")) break;
}
}
while(input.hasNext()) {
lie = input.nextLine();
r = Pattern.compile("后页");
m = r.matcher(lie);
if(m.find()) {
r = Pattern.compile("href=\"([^\"]*)");
m = r.matcher(lie);
if(m.find()){
String xiaye = s.substring(0, s.indexOf('?'))+m.group(1);
int weizi;
while((weizi = xiaye.indexOf("amp;")) != -1) {
xiaye = xiaye.substring(0, weizi) + xiaye.substring(weizi+4,xiaye.length());
}
getduanping(xiaye,shumu-count_zhang);
}
break;
}
}
}
/**
* 获取海报地址,内部调用
* @param s
* @param shumu
*/
void gethaibao(String s,int shumu) {
if(shumu<=0) return;
String lie = "";
Scanner input = null;
Pattern r;
Matcher m;
int count_zhang=0;
try {
URL url = new URL(s);
input = new Scanner(url.openStream());
}
catch (java.net.MalformedURLException ex){
System.out.println("Invalid URL");
}
catch (java.io.IOException ex){
System.out.println("I/O Errors: no such file");
}
while(input.hasNext()) {
lie = input.nextLine();
if(lie.length() == 22&&lie.equals(" "))
break;
}
while(input.hasNext()) {
lie = input.nextLine();
r = Pattern.compile("src=\"([^\"]*)\" />");
m = r.matcher(lie);
if(m.find()) {
String haibaos = m.group(1).substring(0,m.group(1).length()-4)+".jpg";
list_haibao.add(haibaos);
count_zhang++;
if(count_zhang == shumu)
return;
}
else {
if(lie.equals("")) break;
}
}
while(input.hasNext()) {
lie = input.nextLine();
r = Pattern.compile("后页");
m = r.matcher(lie);
if(m.find()) {
r = Pattern.compile("href=\"([^\"]*)");
m = r.matcher(lie);
if(m.find()){
gethaibao(m.group(1),shumu-count_zhang);
}
break;
}
}
}
public void printall(){
System.out.println(getshuju());
}
/**
* 下载海报图片。内部调用
* @param s
* @param t
*/
void ImageDownload() {
try {
String filePar = "image/"+zhongwenname +"/";// 文件夹路径
File myPath = new File( filePar );
if (!myPath.exists()){//若此目录不存在,则创建之
myPath.mkdir();
}
else return; //如果存在退出,无需重复下载
for(int i=1;i<=list_haibao.size();i++) {
URL url = new URL(list_haibao.get(i-1));
DataInputStream dataInputStream = new DataInputStream(url.openStream());
FileOutputStream fileOutputStream = new FileOutputStream(new File(filePar+ zhongwenname + i +".jpg"));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public String getshuju() {
String s = "\t";
return wangzhan + s + zhongwenname + s + yingwenname + s + daoyanname + s + bianjuname +
s + zhuyanname + s + shangyingriqi + s + pingjiarenshu + s + pingjiafenshu + s +
leixing + s + jianjie + s + haibaourl + s + duanpingurl + s + duanpingcount;
}
}
2.设计一个类爬取 豆瓣电影 Top 250上的电影网站。
但不爬取其详细信息,然后把这个网站传给上面的类爬取每个电影的详情。
package Courses.GrabData;
import java.io.File;
import java.io.PrintWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 获取豆瓣top250页面的电影及其详情
* @author Administrator
*
*/
public class DouBan{
private ArrayList<BangDan> list_bangdan = new ArrayList<>();
public ArrayList<DianYingXiangQing> list_xiangqing = new ArrayList<>();
public int number = 0;
/**
* 默认爬取250个
*/
public DouBan(){
this(250);
}
/**
* 爬取n个电影详情
*
*/
public DouBan(int n){
if(n<1) n=1;
if(n>250) n=250;
number = n;
getbangdan();
// printwbangdan();
getxiangqing();
//printwxiangqing();
}
public void getxiangqing() {
int i=0;
int dy_shumu = list_bangdan.size();
Thread thread1[] = new Thread[dy_shumu];
for(BangDan bd: list_bangdan) {
DianYingXiangQing xq = new DianYingXiangQing(bd.wangzhan);
thread1[i] = new Thread(xq);
thread1[i].setDaemon(true);
thread1[i].setName(bd.zhongwenname);
thread1[i].start();
list_xiangqing.add(xq);
i++;
}
while(true) {
boolean b = true;
for(int j=0;j<dy_shumu;j++) {
if(thread1[j].isAlive()) {
b = false;
}
}
if(b)
break;
}
}
public void printwxiangqing() {
File file = new File("电影TOP250详情.txt");
/*if(file.exists()) {
System.out.println("Error:\"电影TOP250详情.txt\"文件创建失败,检查该文件是否已存在?");
System.exit(1);
}*/
for(int i=1;file.exists();i++) {
file = new File("电影TOP250详情"+ i+".txt");
}
try {
PrintWriter output = new PrintWriter(file);
int i=1;
for(DianYingXiangQing xq: list_xiangqing) {
output.println(i + "\t" + xq.getshuju());
for(String s :xq.list_haibao)
output.print(s + "\t");
output.println();
for(String s :xq.list_duanping)
output.print(s + "\t");
output.println();
i++;
}
output.close();
}
catch (Exception ex) {
System.out.println("Error: " + ex.getMessage());
}
}
public void printwbangdan() {
File file = new File("电影TOP250.txt");
if(file.exists()) {
System.out.println("Error:\"电影TOP250.txt\"文件创建失败,检查该文件是否已存在?");
System.exit(1);
}
try {
PrintWriter output = new PrintWriter(file);
for(BangDan b: list_bangdan) {
output.println(b.getshuju());
}
output.close();
}
catch (Exception ex) {
System.out.println("Error: " + ex.getMessage());
}
}
public void getbangdan() {
String URLString = "https://movie.douban.com/top250?start=" + (list_bangdan.size()/25*25);
try {
URL url = new URL(URLString);
Scanner input = new Scanner(url.openStream(),"UTF-8");
String lie = "";
Pattern r;
Matcher m;
do {
lie = input.nextLine();
r = Pattern.compile("");
m = r.matcher(lie);
}while(input.hasNext() && !m.find());
for(int i=0;i<25;i++) {
BangDan dianying = new BangDan();
do {
lie = input.nextLine();
r = Pattern.compile("");
m = r.matcher(lie);
}while(input.hasNext() && !m.find());
lie = input.nextLine();
r = Pattern.compile("(\\D*)(\\d*)");
m = r.matcher(lie);
if(m.find()) {
dianying.paiming = Integer.parseInt(m.group(2));
}
lie = input.nextLine();
r = Pattern.compile("]*)");
m = r.matcher(lie);
if(m.find()) {
dianying.wangzhan = (m.group(1));
}
r = Pattern.compile("(\\D*)(\\d*)");
m = r.matcher(dianying.wangzhan);
if(m.find()) {
dianying.id = (m.group(2));
}
lie = input.nextLine();
r = Pattern.compile("([[^[\u4e00-\u9fa5]]]+)([^\"]+)\" src=\"([^\">]*)");
m = r.matcher(lie);
if(m.find()) {
dianying.zhongwenname = (m.group(2));
dianying.fengmian = m.group(3);
}
do {
lie = input.nextLine();
r = Pattern.compile("");
m = r.matcher(lie);
}while(input.hasNext() && !m.find());
lie = input.nextLine();
r = Pattern.compile("/ ([^<]+)");
m = r.matcher(lie);
if(m.find()) {
dianying.yingwenname = (m.group(1));
}
//导演,主演
do {
lie = input.nextLine();
r = Pattern.compile("导演");
m = r.matcher(lie);
}while(input.hasNext() && !m.find());
r = Pattern.compile(" 导演: (.*) 主演: (.*)...
");
m = r.matcher(lie);
if(m.find()) {
dianying.daoyan = (m.group(1));
dianying.zhuyan = (m.group(2));
}
else {
r = Pattern.compile(" 导演: (.*) &");
m = r.matcher(lie);
if(m.find()) {
dianying.daoyan = (m.group(1));
}
}
//获取摘要
Matcher m2;
do {
lie = input.nextLine();
r = Pattern.compile("");
m = r.matcher(lie);
Pattern r2 = Pattern.compile(" ");;
m2 = r2.matcher(lie);;
}while(input.hasNext() && !m.find() && !m2.find());
r = Pattern.compile("([^<]+)");
m = r.matcher(lie);
if(m.find()) {
dianying.zhaiyao = (m.group(1));
}
//dianying.printall();
list_bangdan.add(dianying);
if(list_bangdan.size() == number)
return;
}
getbangdan();
}
catch (java.net.MalformedURLException ex){
System.out.println("Invalid URL");
}
catch (java.io.IOException ex){
System.out.println("I/O Errors: no such file");
}
catch (java.lang.NumberFormatException ex){
System.out.println("null");
}
}
}
class BangDan {
//排行榜排名,网站、电影中文名, 电影英文名,电影id,电影导演,电影主演,电影封面、摘要
public int paiming;
public String wangzhan = "";
public String zhongwenname = "";
public String yingwenname = "";
public String id = "";
public String daoyan = "";
public String zhuyan = "";
public String fengmian = "";
public String zhaiyao = "";
String getshuju() {
String s = "\t";
return paiming + s + wangzhan + s + zhongwenname + s + yingwenname + s + id + s + daoyan + s + zhuyan + s + fengmian + s + zhaiyao;
}
void printall(){
System.out.println(paiming + "," + wangzhan + "," + zhongwenname + "," + yingwenname + "," + id + "," + daoyan + "," + zhuyan + "," + fengmian + "," + zhaiyao);
}
}
3.设计一个UI界面,把爬取的信息显示出来。
UI界面显示爬取的信息,除此之外也是程序开始的地方。
package Courses.UI;
import Courses.GrabData.DianYingXiangQing;
import Courses.GrabData.DouBan;
import Selenium.HelloSelenium;
import javafx.application.Application;
import javafx.geometry.Orientation;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.TextField;
import javafx.scene.image.Image;
import javafx.scene.image.ImageView;
import javafx.scene.layout.FlowPane;
import javafx.scene.layout.Pane;
import javafx.scene.text.*;
import javafx.stage.Stage;
import javafx.scene.paint.Color;
import javafx.scene.control.TextArea;
/**
* UI界面,也是程序开始的地方
* @author Administrator
* 2019年12月8日
*/
public class WebCrawlerUI extends Application {
static double imageHeight = 550; //海报高度
static double imagelayouH = 450; //海报距顶层垂直距离
int count_num = 20; //获取多少个电影
static String dianyingwangzhan = ""; //搜索框输入的电影网站
DianYingXiangQing dianying;
int num = 0; //第num个电影
int haibao_count = 1; //海报计数
int duanping_count = 0; //评论计数
int xianshipinglun = 5; //一页显示多少个评论
DouBan douban;
ImageView image;
TextArea textjianjie;
Text textname;
Text texturl;
Text textriqi;
Text textlx;
Text textfs;
Text text_pinglunyema;
Text text_haibaoyema;
Text text_dianyingyema;
TextArea []pinglun = new TextArea[5];
public static void main(String[] args) {
launch();
}
public void start (Stage primaryStage) {
Pane pane = new Pane();
System.out.println("正在加载豆瓣电影TOP250!!!");
System.out.println("共加载" + count_num + "个电影详情");
douban = new DouBan(count_num);
dianying = douban.list_xiangqing.get(num);
//添加图片
image = new ImageView();
image.setFitHeight(imageHeight);
image.setPreserveRatio(true);//保留比例
image.setSmooth(true);//光滑
image.setCache(true);//缓存
image.setLayoutY(imagelayouH);
pane.getChildren().add(image);
//左上角搜索框
FlowPane flow = new FlowPane();
Text text1 = new Text(" 请在下框中输入豆瓣电影网址:");
text1.setScaleX(1.5);
text1.setScaleY(1.5);
flow.getChildren().add(new Text());
flow.getChildren().add(text1);
TextField dyurl = new TextField("例:https://movie.douban.com/subject/26794435/");
dyurl.setMinWidth(500);
flow.getChildren().add(dyurl);
//左上角top250文字提示
flow.getChildren().add(new Label(""));
Text text2 = new Text(" 点击下面按钮显示豆瓣评分TOP250:");
text2.setScaleX(1.5);
text2.setScaleY(1.5);
text2.setFill(Color.BLUE);
flow.getChildren().add(text2);
//左上角完成
flow.setVgap(10);
flow.setHgap(4);
flow.setOrientation(Orientation.VERTICAL);
pane.getChildren().add(flow);
//添加确认按钮
Button butten = new Button("确认");
butten.setLayoutX(520);
butten.setLayoutY(60);
butten.setOnMouseClicked( e -> {
dianyingwangzhan = dyurl.getText();
UIUpdate(0);
});
pane.getChildren().add(butten);
//添加上一页按钮
Button buttenUp = new Button("上一页");
buttenUp.setLayoutX(50);
buttenUp.setLayoutY(180);
buttenUp.setOnMouseClicked( e -> {
UIUpdate(-1);
});
pane.getChildren().add(buttenUp);
//添加下一页按钮
Button buttenDown = new Button("下一页");
buttenDown.setLayoutX(300);
buttenDown.setLayoutY(180);
buttenDown.setOnMouseClicked( e -> {
UIUpdate(1);
});
pane.getChildren().add(buttenDown);
//电影页码
text_dianyingyema = new Text("第" + (num + 1) + "页/共" + douban.list_xiangqing.size() + "页");
text_dianyingyema.setLayoutX(160);
text_dianyingyema.setLayoutY(200);
text_dianyingyema.setScaleX(1.5);
text_dianyingyema.setScaleY(1.5);
pane.getChildren().add(text_dianyingyema);
//电影简介
Text text3 = new Text (" 简介:");
text3.setLayoutY(250);
text3.setFill(Color.RED);
text3.setScaleX(1.5);
text3.setScaleY(1.5);
pane.getChildren().add(text3);
textjianjie = new TextArea (dianying.jianjie);
textjianjie.setLayoutY(270);
textjianjie.setEditable(false);
//textjianjie.setTextFill(Color.RED);
textjianjie.setWrapText(true);
pane.getChildren().add(textjianjie);
//电影名字
Text text4 = new Text (" 影名:");
text4.setLayoutY(50);
text4.setLayoutX(800);
text4.setFill(Color.RED);
text4.setScaleX(1.5);
text4.setScaleY(1.5);
pane.getChildren().add(text4);
textname = new Text (dianying.zhongwenname);
textname.setLayoutY(50);
textname.setLayoutX(900);
textname.setScaleX(1.5);
textname.setScaleY(1.5);
textname.setFill(Color.RED);
pane.getChildren().add(textname);
//电影网站
Text text5 = new Text ("豆瓣网址:");
text5.setLayoutY(50);
text5.setLayoutX(1200);
text5.setScaleX(1.5);
text5.setScaleY(1.5);
pane.getChildren().add(text5);
texturl = new Text (dianying.wangzhan);
texturl.setLayoutY(50);
texturl.setLayoutX(1400);
texturl.setScaleX(1.5);
texturl.setScaleY(1.5);
pane.getChildren().add(texturl);
//电影类型
Text text6 = new Text ("类型:");
text6.setLayoutY(100);
text6.setLayoutX(820);
text6.setScaleX(1.5);
text6.setScaleY(1.5);
pane.getChildren().add(text6);
textlx = new Text (dianying.leixing);
textlx.setLayoutY(100);
textlx.setLayoutX(900);
textlx.setScaleX(1.5);
textlx.setScaleY(1.5);
pane.getChildren().add(textlx);
//上映日期
Text text7 = new Text ("上映日期:");
text7.setLayoutY(100);
text7.setLayoutX(1200);
text7.setScaleX(1.5);
text7.setScaleY(1.5);
pane.getChildren().add(text7);
textriqi = new Text (dianying.shangyingriqi);
textriqi.setLayoutY(100);
textriqi.setLayoutX(1350);
textriqi.setScaleX(1.5);
textriqi.setScaleY(1.5);
pane.getChildren().add(textriqi);
//电影评分
Text text8 = new Text ("评分:");
text8.setLayoutY(150);
text8.setLayoutX(820);
text8.setScaleX(1.5);
text8.setScaleY(1.5);
pane.getChildren().add(text8);
textfs = new Text (dianying.pingjiafenshu);
textfs.setLayoutY(150);
textfs.setLayoutX(900);
textfs.setScaleX(1.5);
textfs.setScaleY(1.5);
pane.getChildren().add(textfs);
//电影短评
Text text9 = new Text ("短评:");
text9.setLayoutY(200);
text9.setLayoutX(820);
text9.setScaleX(1.5);
text9.setScaleY(1.5);
pane.getChildren().add(text9);
text_pinglunyema = new Text("第" +(duanping_count/xianshipinglun + 1)+ "页/" + "共" + dianying.duanpingcount/xianshipinglun + "页");
text_pinglunyema.setLayoutX(1050);
text_pinglunyema.setLayoutY(820);
text_pinglunyema.setScaleX(1.5);
text_pinglunyema.setScaleY(1.5);
pane.getChildren().add(text_pinglunyema);
for(int i=0;i<5;i++) {
int jianju = 100;
pinglun[i] = new TextArea (dianying.list_duanping.get(i + duanping_count));
pinglun[i].setLayoutY(250 + i*jianju );
pinglun[i].setLayoutX(820);
pinglun[i].setMaxHeight(100);
pinglun[i].setWrapText(true);
pinglun[i].setEditable(false);
pane.getChildren().add(pinglun[i]);
}
//短评下一页按钮
Button butten_duanping_down = new Button("下一页短评");
butten_duanping_down.setLayoutX(1200);
butten_duanping_down.setLayoutY(800);
butten_duanping_down.setOnMouseClicked( e -> {
if(duanping_count>=dianying.duanpingcount - xianshipinglun)
return;
duanping_count += xianshipinglun;
setDuanping();
});
pane.getChildren().add(butten_duanping_down);
//短评上一页按钮
Button butten_duanping_up = new Button("上一页短评");
butten_duanping_up.setLayoutX(900);
butten_duanping_up.setLayoutY(800);
butten_duanping_up.setOnMouseClicked( e -> {
if(duanping_count<xianshipinglun)
return;
duanping_count -= xianshipinglun;
setDuanping();
});
pane.getChildren().add(butten_duanping_up);
//海报下一页按钮
double x = 450;
Button butten_haibao_down = new Button("下一个海报");
butten_haibao_down.setLayoutX(x);
butten_haibao_down.setLayoutY(620);
butten_haibao_down.setOnMouseClicked( e -> {
if(haibao_count>=dianying.list_haibao.size())
return;
haibao_count++;
setImage(haibao_count);
});
pane.getChildren().add(butten_haibao_down);
//海报上一页按钮
Button butten_haibao_up = new Button("上一个海报");
butten_haibao_up.setLayoutX(x);
butten_haibao_up.setLayoutY(540);
butten_haibao_up.setOnMouseClicked( e -> {
if(haibao_count<=1)
return;
haibao_count--;
setImage(haibao_count);
});
pane.getChildren().add(butten_haibao_up);
//海报页码
text_haibaoyema = new Text("第" +haibao_count+ "页/" + "共" + dianying.list_haibao.size() + "页");
text_haibaoyema.setLayoutX(x);
text_haibaoyema.setLayoutY(600);
text_haibaoyema.setScaleX(1.5);
text_haibaoyema.setScaleY(1.5);
pane.getChildren().add(text_haibaoyema);
setImage(1);
Scene scene = new Scene(pane);
primaryStage.setMaximized(true); //最大化窗口
primaryStage.setTitle("网页爬虫");
primaryStage.setScene(scene);
primaryStage.show();
}
/**
* 界面更新,每次点击确认按钮时
*/
void UIUpdate(int annv) {
if(annv==0) {
int index1 = dianyingwangzhan.indexOf("https://movie.douban.com/subject/");
int index2 = dianyingwangzhan.indexOf("/",index1 + 33);
if(index1<0 || index2<0)
{
dianyingwangzhan = HelloSelenium.getSearchUrl(dianyingwangzhan);
System.out.println("正在搜索电影网站电影网站" + dianyingwangzhan);
}
else dianyingwangzhan = dianyingwangzhan.substring(index1, index2+1);
boolean baohan = false;
for(int i=0;i<douban.list_xiangqing.size();i++){
if(dianyingwangzhan.equals(douban.list_xiangqing.get(i).wangzhan)) {
baohan = true;
num = i;
break;
}
}
if(baohan)
dianying = douban.list_xiangqing.get(num);
else {
dianying = new DianYingXiangQing(dianyingwangzhan);
dianying.run();
num = douban.list_xiangqing.size();
douban.list_xiangqing.add(dianying);
}
}
else {
if(annv==-1)
--num;
else
++num;
if(num >= douban.list_xiangqing.size()) {
num = 0;
}
if(num<0)
num = douban.list_xiangqing.size()-1;
dianying = douban.list_xiangqing.get(num);
}
haibao_count = 1;
setImage(haibao_count);
SetSummary();
}
/**
* 修改摘要,电影名等信息
*/
void SetSummary(){
textjianjie.setText(dianying.jianjie);
textname.setText(dianying.zhongwenname);
texturl.setText(dianying.wangzhan);
textlx.setText(dianying.leixing);
textriqi.setText(dianying.shangyingriqi);
textfs.setText(dianying.pingjiafenshu);
duanping_count = 0;
text_dianyingyema.setText("第" + (num + 1) + "页/共" + douban.list_xiangqing.size() + "页");
setDuanping();
}
/**
* 修改短评
*/
void setDuanping() {
text_pinglunyema.setText("第" +(duanping_count/xianshipinglun + 1)+ "页/" + "共" + dianying.duanpingcount/xianshipinglun + "页");
for(int i=0;i<xianshipinglun;i++) {
pinglun[i].setText(dianying.list_duanping.get(duanping_count + i));
}
}
/**
* 修改海报
* @param n
*/
void setImage(int n){
text_haibaoyema.setText("第" + haibao_count+ "页/" + "共" + dianying.list_haibao.size() + "页");
String name = dianying.zhongwenname;
if(dianying.imagebaocun)
image.setImage(new Image("file:image/" + name+"/" + name+ n+ ".jpg"));
else
image.setImage(new Image(dianying.list_haibao.get(n-1)));
}
}
4.Selenium类爬取网页,可以实现全网搜索。
需要支持Selenium驱动支持和安装Chrome浏览器。
如果不需要本功能可把UI类中的关于Selenium的两行代码都删除,位置大概是第7行和342行。
程序就可以保持其他功能不受影响。
package Selenium;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
/**
* 模拟浏览器行为,爬取豆瓣电影搜索内容
* 一般URL无法爬取,因为豆瓣把搜索的内容都隐藏了
* @author Administrator
*
*/
public class HelloSelenium {
public static void main(String[] args) {
getSearchUrl("小丑");
}
public static String getSearchUrl(String name)
{
System.setProperty("webdriver.chrome.driver","chromedriver.exe");//自动启动谷歌浏览器驱动
/* ChromeOptions options = new ChromeOptions();
options.addArguments("--headless");
WebDriver driver = new ChromeDriver(options);*/
WebDriver driver = new ChromeDriver();
driver.navigate().to("https://search.douban.com/movie/subject_search?search_text=" + name + "&cat=1002");
try {
Thread.sleep(20000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
List<WebElement> signOuts = driver.findElements(By.className("title-text"));
String url = "";
for(WebElement signOut:signOuts){
url = signOut.getAttribute("href");
if(url.indexOf("subject") != -1){
break;
}
}
System.out.println("url:" + url);
//signOut.click(); //点击
driver.quit(); //退出浏览器
return url;
}
}