[共享]RFC中文文档

今天在搜索TCP/IP协议的资料时,无意中找到一些RFC文档的翻译。写了个程序抓取下来以便随时查看。

package canghailan.downloader;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RFCDownload {
	private static final String DOWNLOAD_DIR = "D:\\rfc_cn";
	private static final String[] URLS = {
			"http://www.cnpaf.net/Class/RFC/200411/1047.html",
			"http://www.cnpaf.net/Class/RFC/200411/1048.html",
			"http://www.cnpaf.net/Class/RFC/200411/1049.html",
			"http://www.cnpaf.net/Class/RFC/200411/1050.html",
			"http://www.cnpaf.net/Class/RFC/200411/1051.html",
			"http://www.cnpaf.net/Class/RFC/200411/1052.html",
			"http://www.cnpaf.net/Class/RFC/200411/1053.html",
			"http://www.cnpaf.net/Class/RFC/200411/1054.html" };
	private static final String DEFAULT_CHARSET = "gbk";
	private static final Pattern A_PATTERN = Pattern
			.compile("<[Aa][^>]+href=['\"]?([^\\s'\"]*)['\"]?[^>]*>([^<]*)</\\s*[Aa]\\s*>");

	public static String loadURL(String url) throws IOException {
		Reader reader = null;
		try {
			URLConnection conn = new URL(url).openConnection();
			conn.connect();

			int length = conn.getContentLength();
			String charset = conn.getContentEncoding();

			reader = new BufferedReader(new InputStreamReader(
					conn.getInputStream(), charset == null ? DEFAULT_CHARSET
							: charset));

			StringBuilder buffer = new StringBuilder(length);
			for (;;) {
				int ch = reader.read();
				if (ch == -1) {
					break;
				}
				buffer.append((char) ch);
			}
			return buffer.toString();
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	public static void saveFile(String file, String content) throws IOException {
		Writer writer = null;
		try {
			writer = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(file), "utf-8"));
			writer.write(content);
			writer.flush();
		} finally {
			if (writer != null) {
				writer.close();
			}
		}
	}

	public static void loadAndSave(String url, String text) throws IOException {
		saveFile(getFile(text), loadURL(url));
	}

	public static String getFile(String text) {
		return DOWNLOAD_DIR + File.separator
				+ text.replace("RFC", "rfc").replace("/", "-") + ".txt";
	}

	public static void main(String[] args) throws IOException {
		for (String pageURL : URLS) {
			String page = loadURL(pageURL);
			Matcher matcher = A_PATTERN.matcher(page);
			while (matcher.find()) {
				String url = matcher.group(1);
				String text = matcher.group(2);
				if (url.contains("/rfc/")) {
					System.out.println(url + "(" + text + ")");
					loadAndSave(url, text);
				}
			}
		}
	}
}

package canghailan.downloader;

import java.io.File;
import java.io.FilenameFilter;
import java.util.regex.Pattern;

public class RFCRename {
	private static final String DIR = "D:\\rfc_cn";
	private static final Pattern BAD_FILE = Pattern.compile("rfc\\d+_.*");

	public static void main(String[] args) {
		File dir = new File(DIR);
		for (String badFile : dir.list(new FilenameFilter() {
			@Override
			public boolean accept(File dir, String name) {
				return BAD_FILE.matcher(name).matches();
			}
		})) {
			String name = badFile.replaceFirst("_", " ");
			new File(dir, badFile).renameTo(new File(dir, name));
		}
	}
}

 

 

 

 

你可能感兴趣的:(rfc)