批量下载魔兽replays录像文件

     自己是个war3爱好者,也经常去浩方或者VS平台去打打,技术不怎么样但也偶尔去http://w3g.replays.net上去下载些replays学习学习,不过Replays_Net上的录像下载确实麻烦,每个链接都得单个点击进去才能下载,看到有人开发过mp3的批量下载器,所以想想就自己开发个类似这样的下载魔兽replays录像文件的东西,方便自己使用,也方便和我一样的war3爱好者。实现思路很简单,参考一些批量下载的程序,技术没什么新的,实用就行了。

 

   已经修改为Form版本(有需要的留言):

批量下载魔兽replays录像文件

 

     主要代码如下:

     War3Replays.java:

 

package com.moonights.war3replays;


/**
 * 
 * @author moonights
 *
 */
public class War3Replays {     
           
    /**录像文件名称*/      
    private String fileName;   
           
    /**录像文件大小*/      
    private String fileSize;       
           
    /**录像文件格式*/      
    private String fileFormat=".w3g";       
           
    /**下载地址*/      
    private String fileUrl;
    
    private String savePath;     
    
	public String getFileFormat() {
		return fileFormat;
	}

	public void setFileFormat(String fileFormat) {
		this.fileFormat = fileFormat;
	}

	public String getFileName() {
		return fileName;
	}

	public void setFileName(String fileName) {
		this.fileName = fileName;
	}

	public String getFileSize() {
		return fileSize;
	}

	public void setFileSize(String fileSize) {
		this.fileSize = fileSize;
	}

	public String getFileUrl() {
		return fileUrl;
	}

	public void setFileUrl(String fileUrl) {
		this.fileUrl = fileUrl;
	}

	public String getSavePath() {
		return savePath;
	}

	public void setSavePath(String savePath) {
		this.savePath = savePath;
	}    
}

 

War3ReplaysDowner.java:

 

package com.moonights.war3replays;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;

import com.moonights.utils.Configuration;
import com.moonights.utils.HttpGet;

/**
 * http://w3g.replays.net 批量下载war3Replays文件的工具,方便喜爱下载魔兽录像的人员使用.
 * 目前只实现到解析到二级目录下的页面,可增加递归实现多级页面解析....
 * 
 * @author moonights
 * 
 */
public class War3ReplaysDowner {
	private static Configuration config = new Configuration("config/war3.properties");

	private static String DOWNLOAD_INDEX_URL = config.getValue("DOWNLOAD_INDEX_URL");

	private static String DOWNLOAD_MAIN_URL = config.getValue("DOWNLOAD_MAIN_URL");// "http://w3g.replays.net/Default.aspx?PageNo=2";

	private static String FILE_SAVE_PATH = config.getValue("FILE_SAVE_PATH");// "E:\\temp";

	private static String FILE_SAVE_TYPE = config.getValue("FILE_SAVE_TYPE");// ".w3g";

	/**
	 * * 只针对这种类型的url:<a
	 * href="http://w3g.replays.net/doc/cn/2010-1-25/12644174774532572561.html"
	 * target="_blank">*
	 */
	private static final String url_regexp_resources = "<a href=\"(\\w+://[^/:]+[^#\\s]*)\" target=\"_blank\">";

	private static final String url_regexp_downloadFile = "<a href=\"(.*?)\">Download REP</a>";

	private static final String url_regexp_downloadFileName = "<h3><span id=\"ctl00_Content_labTitle\">(.*?)</span></h3>";

	/**
	 * 根据 URL 读取应对页面的HTML源码
	 * 
	 * @param url
	 *            文件的URL
	 * @return String URL应对页面的HTML源码, 如果连接到指定URL, 则返回一个空字符串("")
	 */
	public String getHtmlCode(String url) {
		try {
			URL u = new URL(url);
			URLConnection urlConnection = u.openConnection();
			urlConnection.setAllowUserInteraction(false);
			// 使用openStream得到一输入流并由此构造一个BufferedReader对象
			BufferedReader in = new BufferedReader(new InputStreamReader(u
					.openStream()));
			String inputLine;
			StringBuffer tempHtml = new StringBuffer();
			while ((inputLine = in.readLine()) != null) { // 从输入流不断的读数据,直到读完为止
				tempHtml.append(inputLine).append("\n");
			}
			return tempHtml.toString();
		} catch (IOException e) {
			return "";
		}
	}
	/**
	 * 根据 URL 读取应对页面的HTML源码
	 * 
	 * @param url
	 *            文件的URL
	 * @return String URL应对页面的HTML源码, 如果连接到指定URL, 则返回一个空字符串("")
	 */
	public static String getHtml(String urlString) {
		try {
			StringBuffer html = new StringBuffer();
			URL url = new URL(urlString);
			HttpURLConnection conn = (HttpURLConnection) url.openConnection();
			InputStreamReader isr = new InputStreamReader(conn.getInputStream());
			BufferedReader br = new BufferedReader(isr);
			String temp;
			while ((temp = br.readLine()) != null) {
				html.append(temp).append("\n");
			}
			br.close();
			isr.close();
			return html.toString();
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}

	/**
	 * 根据url获取该url页面中的所有包含下载资源的url链接() 只针对这种类型的url:<a
	 * href="http://w3g.replays.net/doc/cn/2010-1-25/12644174774532572561.html"
	 * target="_blank">
	 * 
	 * @param 文件的URL
	 * @return List
	 *         URL应对页面中的所有指定URL:http://w3g.replays.net/doc/cn/2010-1-25/12644174774532572561.html
	 */
	public List<String> getUrlByURLPage(String url) {
		List<String> list = new ArrayList<String>();
		String htmlCode = getHtmlCode(url);
		Pattern p = Pattern.compile(url_regexp_resources,
				Pattern.CASE_INSENSITIVE);
		Matcher matcher = p.matcher(htmlCode);
		String id = null;
		while (matcher.find()) {
			id = matcher.group(1);
			if (!list.contains(id)) {
				list.add(id);
			}
		}
		return list;
	}

	/**
	 * 根据获取的包含下载资源的url查询下载资源url连接以及文件名称 (文件名如下:[SW]GaB.RohJinWook vs ieS.Check
	 * #1.w3g,解析页面中的标题获取,中文有乱码,和手动下载的名称不一致但类似)
	 * 
	 * @param List
	 * @return Vector
	 *         {("http://w3g.replays.net/Download.aspx?ReplayID=41162&File=%2fReplayFile%2f2010-1-26%2f100126_%5bUD%5dfantafiction_VS_%5bORC%5dmmmgbp_TwistedMeadows_RN.w3g",
	 *         "e:\\temp\\dmmmgbp_TwistedMeadows_RN.w3g")}
	 */
	public Vector getWar3ReplaysByTitle(List url_list) {
		Vector vector = new Vector();
		for (int i = 0; i < url_list.size(); i++) {
			String temp_url = url_list.get(i).toString();
			String temp_htmlCode = this.getHtmlCode(temp_url);
			String fileUrl = getMatcher(url_regexp_downloadFile, temp_htmlCode,
					1);
			String fileName = getMatcher(url_regexp_downloadFileName,
					temp_htmlCode, 1);
			if (!fileUrl.equals("") && !fileName.equals("")) {
				War3Replays war3Replays = new War3Replays();
				fileUrl = DOWNLOAD_INDEX_URL + fileUrl;// 将相对URL修改为绝对URL
				// fileName = FILE_SAVE_PATH+"\\"+fileName+FILE_SAVE_TYPE;//
				war3Replays.setFileName(fileName);
				war3Replays.setFileUrl(fileUrl);
				war3Replays.setSavePath(this.FILE_SAVE_PATH);
				war3Replays.setFileFormat(this.FILE_SAVE_TYPE);
				vector.add(war3Replays);
			}
			if (i > 0 && i % 10 == 0) {
				// 每循环10次后休息2秒再进行请求, 否则可能被当作网络攻击
				try {
					Thread.sleep(2000);
					System.out
							.println(">>>>>>>>>>>>>>>>>>>暂停页面抓取,2秒后继续<<<<<<<<<<<<<<<<<<<<<<");
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
		return vector;
	}

	/**
	 * 根据获取的包含下载资源的url链接查询下载War3Replays文件
	 * (文件名如下:_ReplayFile_2010-1-24_100124__NE_mTw.DIDI8_VS__HM_VeryB1gman._AncientIsles_RN.w3g,解析url获取的)
	 * 待优化:待简略一些..
	 * 
	 * @param url_list
	 * @return
	 */
	public Vector getWar3ReplaysByUrl(List url_list) {
		Vector vector = new Vector();
		for (int i = 0; i < url_list.size(); i++) {
			String temp_url = url_list.get(i).toString();
			String temp_htmlCode = this.getHtmlCode(temp_url);
			String fileUrl = getMatcher(url_regexp_downloadFile, temp_htmlCode,
					1);
			if (!fileUrl.equals("")) {
				War3Replays war3Replays = new War3Replays();
				fileUrl = DOWNLOAD_INDEX_URL + fileUrl;// 将相对URL修改为绝对URL
				war3Replays.setFileUrl(fileUrl);
				String regex = "&File=(.*?).w3g";
				String fileName = getMatcher(regex, fileUrl, 1);
				fileName = fileName.replaceAll("(%2f|%5b|%5d)", "_");
				war3Replays.setFileName(fileName);
				war3Replays.setSavePath(this.FILE_SAVE_PATH);
				war3Replays.setFileFormat(this.FILE_SAVE_TYPE);
				vector.add(war3Replays);
			}
			if (i > 0 && i % 10 == 0) {
				// 每循环10次后休息2秒再进行请求, 否则可能被当作网络攻击
				try {
					Thread.sleep(2000);
					System.out
							.println(">>>>>>>>>>>>>>>>>>>暂停页面抓取,2秒后继续<<<<<<<<<<<<<<<<<<<<<<");
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
		return vector;
	}

	public static String getMatcher(String regex, String source, int group) {
		String result = "";
		Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
		Matcher matcher = pattern.matcher(source);
		while (matcher.find()) {
			result = matcher.group(group);
		}
		return result;
	}

	/**
	 * 下载获取到的war3replays录像文件 方法一
	 */
	public void downWar3Replays_WAY1() {
		List url_list = this.getUrlByURLPage(DOWNLOAD_MAIN_URL);
		Vector vector = this.getWar3ReplaysByUrl(url_list);
		HttpGet downer = new HttpGet();
		if (vector.size() > 0) {
			for (int i = 0; i < vector.size(); i++) {
				War3Replays war3Replays = (War3Replays) vector.get(i);
				try {
					// 增加下载列表(此处用户可以写入自己代码来增加下载列表)
					downer.addItem(war3Replays.getFileUrl(), war3Replays
							.getSavePath()
							+ "\\"
							+ war3Replays.getFileName()
							+ war3Replays.getFileFormat());
					// 开始下载
				} catch (Exception err) {
					System.out.println(err.getMessage());
				}
			}
			System.out.println("开始下载.");
			downer.downLoadByList();
			System.out.println("下载完毕.");
		}
	}

	/**
	 * 下载获取到的war3replays录像文件 方法二
	 */
	public void downWar3Replays_WAY2() {
		List url_list = this.getUrlByURLPage(DOWNLOAD_MAIN_URL);
		Vector vector = this.getWar3ReplaysByTitle(url_list);
		HttpGet downer = new HttpGet();
		if (vector.size() > 0) {
			for (int i = 0; i < vector.size(); i++) {
				War3Replays war3Replays = (War3Replays) vector.get(i);
				try {
					// 增加下载列表(此处用户可以写入自己代码来增加下载列表)
					downer.addItem(war3Replays.getFileUrl(), war3Replays
							.getSavePath()
							+ "\\"
							+ war3Replays.getFileName()
							+ war3Replays.getFileFormat());
					// 开始下载
				} catch (Exception err) {
					System.out.println(err.getMessage());
				}
			}
			System.out.println("开始下载.");
			downer.downLoadByList();
			System.out.println("下载完毕.");
		}
	}
}

 

HttpGet.java:

 

package com.moonights.utils;

import java.io.*;
import java.net.*;
import java.util.*;

/**
 * Description: 将指定的HTTP网络资源在本地以文件形式存放
 */
public class HttpGet {

	public final static boolean DEBUG = true;// 调试用

	private static int BUFFER_SIZE = 8096;// 缓冲区大小

	private Vector vDownLoad = new Vector();// URL列表

	private Vector vFileList = new Vector();// 下载后的保存文件名列表

	/**
	 * 构造方法
	 */
	public HttpGet() {

	}

	/**
	 * 清除下载列表
	 */
	public void resetList() {
		vDownLoad.clear();
		vFileList.clear();
	}

	/**
	 * 增加下载列表项
	 * 
	 * @param url
	 *            String
	 * @param filename
	 *            String
	 */
	public void addItem(String url, String filename) {
		vDownLoad.add(url);
		vFileList.add(filename);
	}

	/**
	 * 根据列表下载资源
	 */
	public void downLoadByList() {
		String url = null;
		String filename = null;

		// 按列表顺序保存资源
		for (int i = 0; i < vDownLoad.size(); i++) {
			url = (String) vDownLoad.get(i);
			filename = (String) vFileList.get(i);

			try {
				saveToFile(url, filename);
			} catch (IOException err) {
				if (DEBUG) {
					System.out.println("资源[" + url + "]下载失败!!!");
				}
			}
			/*if(i>0 && i%5==0){ 
				 // 每循环5次后休息2秒再进行请求, 否则可能被当作网络攻击 
				try { 
					//Thread.sleep(2000); 
					//System.out.println(">>>>>>>>>>>>>>>>>>>暂停下载,2秒后继续<<<<<<<<<<<<<<<<<<<<<<");
				}catch (InterruptedException e) { 
					  e.printStackTrace(); 
			    }
			}*/
		}

		if (DEBUG) {
			System.out.println("下载完成!!!");

		}
	}

	/**
	 * 将HTTP资源另存为文件
	 * 
	 * @param destUrl
	 *            String
	 * @param fileName
	 *            String
	 * @throws Exception
	 */
	public void saveToFile(String destUrl, String fileName) throws IOException {
		FileOutputStream fos = null;
		BufferedInputStream bis = null;
		HttpURLConnection httpUrl = null;
		URL url = null;
		byte[] buf = new byte[BUFFER_SIZE];
		int size = 0;

		// 建立链接
		url = new URL(destUrl);
		httpUrl = (HttpURLConnection) url.openConnection();
		// 连接指定的资源
		httpUrl.connect();
		// 获取网络输入流
		bis = new BufferedInputStream(httpUrl.getInputStream());
		// 建立文件
		fos = new FileOutputStream(fileName);
        //判断是否存在该文件,如果存在将文件名称修改为另外一个。。。。
		if (this.DEBUG)
			System.out.println("正在获取链接[" + destUrl + "]的内容...\n将其保存为文件["
					+ fileName + "]");

		// 保存文件
		while ((size = bis.read(buf)) != -1)
			fos.write(buf, 0, size);

		fos.close();
		bis.close();
		httpUrl.disconnect();
	}

	/**
	 * 设置代理服务器
	 * 
	 * @param proxy
	 *            String
	 * @param proxyPort
	 *            String
	 */
	public void setProxyServer(String proxy, String proxyPort) {
		// 设置代理服务器
		System.getProperties().put("proxySet", "true");
		System.getProperties().put("proxyHost", proxy);
		System.getProperties().put("proxyPort", proxyPort);

	}

	/**
	 * 设置认证用户名与密码
	 * 
	 * @param uid
	 *            String
	 * @param pwd
	 *            String
	 */
	/*public void setAuthenticator(String uid, String pwd) {
		Authenticator.setDefault(new MyAuthenticator(uid, pwd));
	}*/

	/**
	 * 主方法(用于测试)
	 * 
	 * @param argv
	 *            String[]
	 */
	public static void main(String argv[]) {

		HttpGet oInstance = new HttpGet();
		try {
			// 增加下载列表(此处用户可以写入自己代码来增加下载列表)
			oInstance.addItem("http://xiazai.xiazaiba.com/0905/0504/2k3IIS6_XiaZaiBa.rar","e:\\temp\\iis6_2.rar");
			// 开始下载
			oInstance.downLoadByList();
		} catch (Exception err) {
			System.out.println(err.getMessage());
		}

	}

}

 

   Configuration.java: 

 

package com.moonights.utils;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/** 
 * 读取properties文件
 * @author moonights
 *
 */
public class Configuration
{
    private Properties propertie;
    private InputStream inputFile;
    private FileOutputStream outputFile;
    
    /** 
     * 初始化Configuration类
     */
    public Configuration(){
        propertie = new Properties();
    }
    
    /** 
     * 初始化Configuration类
     * @param filePath 要读取的配置文件的路径+名称
     */
    public Configuration(String filePath){
        propertie = new Properties();
        try {
            //inputFile = (InputStream) this.getClass().getResourceAsStream(filePath);
            inputFile = this.getClass().getClassLoader().getResourceAsStream(filePath); 

            propertie.load(inputFile);
            inputFile.close();
        } catch (FileNotFoundException ex) {
            System.out.println("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在");
            ex.printStackTrace();
        } catch (IOException ex) {
            System.out.println("装载文件--->失败!");
            ex.printStackTrace();
        }
    }//end ReadConfigInfo()
    
    /** 
     * 重载函数,得到key的值
     * @param key 取得其值的键
     * @return key的值
     */
    public String getValue(String key){
        if(propertie.containsKey(key)){
            String value = propertie.getProperty(key);//得到某一属性的值
            return value;
        }
        else 
            return "";
    }
    
    /** 
     * 重载函数,得到key的值
     * @param fileName properties文件的路径+文件名
     * @param key 取得其值的键
     * @return key的值
     */
    public String getValue(String fileName, String key){
        try {
            String value = "";
            inputFile = new FileInputStream(fileName);
            propertie.load(inputFile);
            inputFile.close();
            if(propertie.containsKey(key)){
                value = propertie.getProperty(key);
                return value;
            }else
                return value;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            return "";
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        } catch (Exception ex) {
            ex.printStackTrace();
            return "";
        }
    }//end getValue()
    
    /** 
     * 清除properties文件中所有的key和其值
     */
    public void clear(){
        propertie.clear();
    }//end clear();
    
    /** 
     * 改变或添加一个key的值,当key存在于properties文件中时该key的值被value所代替,
     * 当key不存在时,该key的值是value
     * @param key 要存入的键
     * @param value 要存入的值
     */
    public void setValue(String key, String value){
        propertie.setProperty(key, value);
    }//end setValue()
    
    /** 
     * 将更改后的文件数据存入指定的文件中,该文件可以事先不存在。
     * @param fileName 文件路径+文件名称
     * @param description 对该文件的描述
     */
    public void saveFile(String fileName, String description){
        try {
            outputFile = new FileOutputStream(fileName);
            propertie.store(outputFile, description);
            outputFile.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException ioe){
            ioe.printStackTrace();
        }
    }//end saveFile()
    
    
    /**
     * 测试
     * @param args
     */
}

 

 Main.java:程序入口

package com.moonights.war3replays;

/**
 * 程序入口。
 * @author moonights
 *
 */
public class Main {
	public static void main(String[] args) {
		War3ReplaysDowner war3_Downer = new War3ReplaysDowner();
		//war3_Downer.downWar3Replays_WAY1();
		war3_Downer.downWar3Replays_WAY2();
	}
}

 

war3.properties:配置文件

##下载首页面
DOWNLOAD_INDEX_URL = http://w3g.replays.net
##下载主页面(修改该变量即可)
DOWNLOAD_MAIN_URL = http://w3g.replays.net/ReplayList.aspx?GameRace=3&PageNo=6
##灵活下载多个页面中的资源 (暂时未实现)
PageNo=5
MAX_PageNo=50
##文件保存路径
FILE_SAVE_PATH=F:\\1.22\\hum
##文件格式
FILE_SAVE_TYPE=.w3g  

 

    程序还有挺多需要改进的地方,文件路径必须先有,时间问题没有做过多的考虑,希望有哪位提提意见,优化一下。如果有人修改成界面的,那就更好了。

 

你可能感兴趣的:(thread,html,游戏,.net,qq)