java爬虫_从腾讯视频播放界面爬取视频并存到本地

源码如下:

package com.example.demo.test.db;

import org.apache.commons.lang.RandomStringUtils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * 获取视频接口的json
 * @author Administrator
 *
 */
public class CatchVedio {
    //    Socket client = new Scoket();
    private URL url;
    private HttpURLConnection urlConnection;
    private int responseCode;
    private BufferedReader reader;
    private BufferedWriter writer;


    public static void main(String[] args) {
        CatchVedio cv = new CatchVedio();
        try {

            String[] VedioURL = cv.get_VedioURL();//接收
            for(String temp:VedioURL) {//temp是每一个视频的播放地址
                StringBuilder stringBuilder = new StringBuilder();
                String filename= RandomStringUtils.randomAlphanumeric(10);
                String s = stringBuilder.append("D:\\worm\\video").append(File.separator).append(filename).append(".mp4").toString();
                CatchIMG.getImg(cv.analyse(cv.get_Json(temp)),s);
                //cv.toDownloadURL(cv.analyse(cv.get_Json(temp)));//写出到文件
            }
        } catch (IOException e) {
            // TODO 自动生成的 catch 块
            e.printStackTrace();
        }finally {
            try {
                cv.reader.close();
                cv.writer.close();
            } catch (IOException e) {
                // TODO 自动生成的 catch 块
                e.printStackTrace();
            }
        }

    }

    void toDownloadURL(String real_url) throws IOException {//将对应下载地址输出到文件
        this.writer = new BufferedWriter(new FileWriter("D:/worm/downloadURL.txt",true));//定义追加方式写入的流
//        this.writer.append(real_url);
        this.writer.write(real_url+"\r\n");
        this.writer.flush();
    }

    String analyse(String json) {//分析json,传回完整下载地址
        int fvkey_index = json.indexOf("\"fvkey\":\"")+9;
        int endIndex = json.indexOf("\"",fvkey_index);
        String fvkey = json.substring(fvkey_index,endIndex);//获取到fvkey
//        System.out.println(fvkey);

        int fn_index = json.indexOf("\"fn\":\"")+6;
        int fn_end = json.indexOf("\"",fn_index);
        String fn = json.substring(fn_index,fn_end);//获取到视频文件名
//        System.out.println(fn);

        String head = "http://ugcws.video.gtimg.com/";

        StringBuffer real_url = new StringBuffer();
        real_url.append(head);//加入头部
        real_url.append(fn+"?");//加入文件名
        real_url.append("vkey="+fvkey);//加入解锁码
        /*构造成功*/
//        System.out.println(real_url.toString());
        return real_url.toString();

    }

    String get_Json(String url) throws UnsupportedEncodingException, IOException {
        String line = "";
        StringBuffer sb = new StringBuffer();
        this.url = new URL(url);
        this.urlConnection = (HttpURLConnection)this.url.openConnection();
        this.responseCode = this.urlConnection.getResponseCode();
        if (this.responseCode == 200) {
            this.reader = new BufferedReader(new InputStreamReader(this.urlConnection.getInputStream(), "UTF-8"));
            while ((line = this.reader.readLine()) != null) {
                sb.append(line);// 网页传回的只有一行
            }
            return sb.toString();
        }
        return "";
    }

    String[] get_VedioURL() throws IOException {
//    void get_VedioURL() throws IOException {
        File file = new File("D:/worm/vedioURL.txt");
        String line = "";
        this.reader = new BufferedReader(new FileReader(file));
        String[] t = new String[0];
        List container = new ArrayList();
        while(null!=(line = this.reader.readLine())) {
            if(line.equals("")) {
                continue;
            }
            line = this.change(line);//转换一下
            container.add(line);//装入容器
        }
        return container.toArray(t);
    }
    /**
     * http://vv.video.qq.com/getinfo?vids=x0164ytbgov&platform=101001&charge=0&otype=json&defn=shd //格式
     * @param str
     * @return
     * https://v.qq.com/x/page/f08302y6rof.html//页面地址示例
     * https://v.qq.com/x/page/y083158hphd.html
     * https://v.qq.com/x/page/c08503oe58c.html
     */
    String change(String str) {//定义从页面播放地址获取vid转换到后台接口地址的方法
        String head = "http://vv.video.qq.com/getinfo?vids=";
        String tail = "&platform=101001&charge=0&otype=json&defn=shd";
        String vid = str.substring(str.indexOf("page/")+5,str.indexOf(".html"));
        return head+vid+tail;
    }
}

package com.example.demo.test.db;

import org.apache.commons.lang.RandomStringUtils;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * @ClassName: CatchIMG
 * @Description: 爬取一个指定地址的网络資源
 * @author penny
 * @date 2017年12月3日 下午9:00:05
 *
 */
public class CatchIMG {

    /**
     *
     * @Title: getImg
     * @Description: 通过一个url 去获取資源
     * @param @param url 连接地址
     * @param @throws IOException
     * @throws
     */
    public static void getImg(String url, String img) throws IOException{
        long startTime = System.currentTimeMillis();
        URL imgURL = new URL(url.trim());//转换URL
        HttpURLConnection urlConn = (HttpURLConnection) imgURL.openConnection();//构造连接
        urlConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36");
        urlConn.connect();
        System.out.println(CatchIMG.class.toString()+":获取连接="+urlConn.getResponseMessage());
        if(urlConn.getResponseCode()==200){//返回的状态码是200 表示成功
            InputStream ins = urlConn.getInputStream(); //获取输入流,从网站读取数据到 内存中
            OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(img)));
            int len=0;
            byte[] buff = new byte[1024*10];//10k缓冲流 视你内存大小而定咯

            while(-1!=(len=(new BufferedInputStream(ins)).read(buff))){//长度保存到len,内容放入到 buff
                out.write(buff, 0, len);//将图片数组内容写入到图片文件
//              System.out.println(CatchIMG.class.toString()+":"+len+"byte已经写入到文件中,内容:  "+new String(buff));
            }
            urlConn.disconnect();
            ins.close();
            out.close();
            //System.out.println(CatchIMG.class.toString()+":获取图片完成,耗时="+((System.currentTimeMillis()-startTime)/1000)+"s");
        }
    }
    /**
     * @throws IOException
     * @Title: main
     * @Description: 测试方法
     * @throws
     */
//    public static void main(String[] args) throws IOException {
//        //文件名称
//        StringBuilder stringBuilder = new StringBuilder();
//        String filename= RandomStringUtils.randomAlphanumeric(10);
//        String s = stringBuilder.append("F:\\img").append(File.separator).append(filename).append(".avi").toString();
//        CatchIMG.getImg("https://www.imooc.com/video/20607/",s);
//    }

}

你可能感兴趣的:(java爬虫_从腾讯视频播放界面爬取视频并存到本地)