JAVA 爬虫 抖音视频

代码还是有很多不足,希望发现的通知一声!!! 万分感谢

终态ID:这个只是我给取得名字,理解就好。

根据抖音用户的终态ID来进入用户的主页,进而进行下载。终态ID获取方式:手机端-->用户主页-->分享名片-->链接分享中,

例:https://www.douyin.com/share/user/59021821479/?share_type=link  ID:59021821479

下方是代码 

package dou_yin;

import java.util.ArrayList;
import java.util.List;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

/**
 * 根据提供的抖音的ID获取他的所有视频ID
 * 
 * @author lenovo
 *
 */
public class DownloadUrl {
	String URl_Id = null;
	int count = 0;// 计数
	// 根据主页获取每个视频的id

	public void DownHtml(String url) throws InterruptedException {
		ArrayList alURl = new ArrayList<>();//视频id集合
		ArrayList alMP4 = new ArrayList<>();//视频下载URL集合
		System.setProperty("webdriver.chrome.driver", "E:/chromedriver/chromedriver_win32/chromedriver.exe");
		// 实例化一个浏览器对象
		WebDriver driver = new ChromeDriver();
		driver.get(url);
		Thread.sleep(8000);// 休眠等待页面加载
		List elements = driver.findElements(By.cssSelector("li.item,goWork"));// 获取到每个视频的模块
		System.out.println(elements.size());
		//获取每个URl的ID
		for (WebElement we : elements) {
			String ids = we.getAttribute("data-id").toString();// 获取模块的data-id的属性值
			alURl.add("https://www.iesdouyin.com/share/video/" + ids);
		}
		driver.get("http://douyin.iiilab.com/");// 打开可以将每个视频链接转化成可以下载的链接的网页
		Thread.sleep(8000);// 休眠等待页面加载
		//获取可以下载的url
		for (int i = 0; i < alURl.size(); i++) {
			driver.findElement(By.cssSelector("input.form-control.link-input")).clear();// 清空这个输入框
			driver.findElement(By.cssSelector("input.form-control.link-input")).sendKeys(alURl.get(i));// 将需要转换的链接放入该输入框中
			driver.findElement(By.cssSelector("button.btn.btn-default")).click();// 点击解析
			Thread.sleep(4000);// 休眠等待页面加载
			alMP4.add( driver.findElement(By.cssSelector("a.btn.btn-success")).getAttribute("href").toString());// 获取解析后的链接
		}
		driver.close();
		//下载
		for (int i = 0; i < alMP4.size(); i++) {
			DownloadFile df = new DownloadFile();
			df.run(alMP4.get(i));
		}
	}


	/**
	 * 入口
	 * 
	 * @param id
	 */
	public static void main(String[] args) {
		DownloadUrl dl = new DownloadUrl();
		String ID = "80602533314";// 人物ID
		try {
			dl.DownHtml("https://www.douyin.com/share/user/" + ID + "/?share_type=link");
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}



}

package dou_yin;

import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 * 根据视频下载链接 下载视频 后缀为 .mp4 等
 * 
 * @author lenovo
 *
 */
public class DownloadFile {
	public  void downLoadFromUrl(String urlStr, String fileName, String savePath) throws IOException {
		URL url = new URL(urlStr);
		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
		conn.setConnectTimeout(3000);
		conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
		InputStream inputStream = conn.getInputStream();
		byte[] getData = readInputStream(inputStream);
		java.io.File saveDir = new java.io.File(savePath);
		if (!saveDir.exists()) {
			saveDir.mkdir();
		}
		java.io.File file = new java.io.File(saveDir + java.io.File.separator + fileName);
		FileOutputStream fos = new FileOutputStream(file);
		fos.write(getData);
		if (fos != null) {
			fos.close();
		}
		if (inputStream != null) {
			inputStream.close();
		}
	}

	public  byte[] readInputStream(InputStream inputStream) throws IOException {
		byte[] buffer = new byte[1024];
		int len = 0;
		ByteArrayOutputStream bos = new ByteArrayOutputStream();
		while ((len = inputStream.read(buffer)) != -1) {
			bos.write(buffer, 0, len);
		}
		bos.close();
		return bos.toByteArray();
	}


	/**
	 * 程序入口
	 * @param urlStr
	 */
	public void run(String urlStr) {
		long imageTitile = System.currentTimeMillis();
		String fileName = imageTitile + "." + "mp4";
		String savePath = "G:\\VidioVidioVidioVidioVidioVidio";
		try {
			downLoadFromUrl(urlStr, fileName, savePath);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}


你可能感兴趣的:(爬虫)