监控火车票小程序 ( by quqi99 )
作者:张华 发表于:2010-02-04
版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明
最近为了买到回家的火车票,写了个小程序,自动监控北京到武汉的火车票,及时发送我的邮箱。
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.Address;
import javax.mail.Authenticator;
import javax.mail.BodyPart;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.PasswordAuthentication;
import javax.mail.Session;
import javax.mail.Transport;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
/**
* @version 0.10 2010-2-4
* @author Zhang Hua
* @descrip 从网页抽取到火车票信息
*/
public class GetHuochepiao {
// 邮件信息
private static String password = "****";
// 买票日期区间
private static Set<String> sets = new HashSet<String>();
static {
// sets.add("2010-02-09");
// sets.add("2010-2-9");
// sets.add("2010-02-9");
// sets.add("2010-2-09");
sets.add("2010-2-10");
sets.add("2010-02-10");
sets.add("2010-2-11");
sets.add("2010-02-11");
}
// 要监控的网址
private static Set<String> urls = new HashSet<String>();
static {
// 谷歌春运
urls.add("http://shenghuo.google.cn/shenghuo/search?a_y0=9&view=Table&a_n0=%E7%81%AB%E8%BD%A6%E7%A5%A8&a_n1=%E5%A7%8B%E5%8F%91%E7%AB%99&a_y1=1&a_o1=0&a_n2=%E5%88%B0%E8%BE%BE%E7%AB%99&a_y2=1&a_o2=0&a_n3=%E8%BD%A6%E6%AC%A1&a_y3=1&a_o3=0&a_v1=%E5%8C%97%E4%BA%AC&a_v2=%E6%AD%A6%E6%B1%89#a_y0%3D9%26a_n0%3D!5E7!581!5AB!5E8!5BD!5A6!5E7!5A5!5A8%26view%3DTable%26scoring%3Dad_!5E6!597!5A5!5E6!59C!59F!53A7%26a_n1%3D!5E5!59F!58E!5E5!5B8!582%26a_y1%3D1%26a_o1%3D0%26a_v1%3D!5E5!58C!597!5E4!5BA!5AC%26a_n2%3D!5E5!58F!591!5E8!5BD!5A6!5E6!597!5B6!5E9!597!5B4%26a_y2%3D7%26a_o2%3D3%26a_f2%3D10!52F2!52F2010%26a_t2%3D10!52F2!52F2010%26a_n3%3D!5E6!597!5A5!5E6!59C!59F%26a_y3%3D7%26a_o3%3D3%26a_f3%3D4!52F2!52F2010%26a_t3%3D4!52F2!52F2010%26a_n4%3D!5E5!5A7!58B!5E5!58F!591!5E7!5AB!599%26a_y4%3D1%26a_o4%3D0%26a_v4%3D!5E5!58C!597!5E4!5BA!5AC%26a_n5%3D!5E5!588!5B0!5E8!5BE!5BE!5E7!5AB!599%26a_y5%3D1%26a_o5%3D0%26a_v5%3D!5E6!5AD!5A6!5E6!5B1!589");
// 酷讯火车票
// urls.add("http://piao.kuxun.cn/beijing-wuhan/");
//火车票网
urls.add("http://www1.huochepiao.com/search.asp?chufa=%B1%B1%BE%A9&daoda=%CE%E4%BA%BA&piaoyuan=%BB%F0%B3%B5%C6%B1%D4%B4%B2%E9%D1%AF");
// 赶集网
urls.add("http://bj.ganji.com/piao/zz_北京-武昌/");
urls.add("http://bj.ganji.com/piao/zz_北京-汉口/");
}
private static final String year = "2010";
private static final String regularExpression = "<[aA][^>]+(href=[^>]+)>(.*?)</[aA]>";
private static Pattern pattern = Pattern.compile(regularExpression,Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final String metaRegularExpression = "<meta[^>]+charset=([^>]+)/>";
private static Pattern metaPattern = Pattern.compile(metaRegularExpression,Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
// public static final SimpleDateFormat dateFormat=new SimpleDateFormat
// ("yyyy-MM-dd");
private static Map<String, String> map = new HashMap<String, String>();
private static Set<String> hasCrawledUrls = new HashSet<String>();
public static void main(String[] args) {
MailSenderInfo mailInfo = new MailSenderInfo();
mailInfo.setMailServerHost("mail.**.com");
mailInfo.setMailServerPort("25");
mailInfo.setValidate(true);
mailInfo.setUserName("***@**.com");
mailInfo.setPassword(password); // 您的邮箱密码
mailInfo.setFromAddress("***@**.com");
mailInfo.setToAddress("****@**.com");
// mailInfo.setSubject("火车票信息:" );
// mailInfo.setContent("<a href=/"" + "/">" + "</a>/n" );
// sendHtmlMail(mailInfo);// 发送html格式 邮件
int count = 0;
while(true){
count++;
for (String urlString : urls) {
try {
System.out.println("处理种子:" + urlString);
// 获取网页内容
String content = GetContent(urlString);
// 解析相关元数据
Matcher matcher = pattern.matcher(content);
while (matcher != null && matcher.find()) {
String url = matcher.group(1);
String[] arr = url.split(" ");
if (arr != null && arr.length > 1)
url = arr[0];
url = url.replaceAll("/'", ""); // 去掉单引号
url = url.replaceAll("/"", ""); // 去掉双引号
url = url.replaceAll("href=", "");
url = url.trim();
URL tempURL;
if(!url.toLowerCase().startsWith("http") && !url.toLowerCase().startsWith("javascript")){
try{
tempURL = new URL(new URL(urlString),url);
url = tempURL.toString();
}catch(Exception e){
e.printStackTrace();
}
}
url=java.net.URLDecoder.decode(url,"UTF-8"); //对URL进行解码
if(hasCrawledUrls.contains(url)){
// System.out.println(url);
continue; //已经处理过了,不再处理
}
hasCrawledUrls.add(url);
String anchor = matcher.group(2);
int pos = anchor.indexOf("发车日期");
if (anchor != null && pos != -1 && anchor.indexOf("1张")==-1
&& anchor.indexOf("5张")==-1 && anchor.indexOf("4张")==-1 && anchor.indexOf("L638")==-1) {
// 解析发车日期
String date = anchor.substring(pos + 4);
date = date.replaceAll(":", "");
date = date.trim();
if (date != null) {
if (date.length() >= 3 && date.length() <= 5) {
date = year + "-" + date;
}
}
if (date != null && sets.contains(date)) {
// 放入缓存
if (count <= 1) {
// 第一轮不记
System.out.println("Initializ ... " + anchor + " " + url);
map.put(url, anchor);
} else {
// 第二轮,好像直接用map.containsKey对有汉字的URL无法比较
if (map.keySet().contains(url)) {
// 旧消息,什么也不做
System.out.println("running ...");
} else {
// 有新消息了,发邮件,并放入缓存
mailInfo.setSubject("火车票信息:" + anchor);
mailInfo.setContent("<a href=/"" + url + "/">" + anchor + "</a>/n" + url);
if(!url.startsWith("http://piao.kuxun.cn")) //酷讯的URL老是变的
sendHtmlMail(mailInfo);// 发送html格式 邮件
System.err.println("Send Mail : " + anchor + " " + url);
map.put(url, anchor);
}
}
}
}
}
} catch (Throwable exception) {
exception.printStackTrace();
}
}
}
}
public static String GetContent(String urlString) {
String content;
BufferedInputStream bis = null;
ByteArrayOutputStream bos = null;
try {
bos = new ByteArrayOutputStream();
URL url = new URL(urlString);
URLConnection conn = url.openConnection();
bis = new BufferedInputStream(conn.getInputStream());
byte[] buff = new byte[2048];
int len = bis.read(buff);
while (len != -1){
bos.write(buff, 0, len);
len = bis.read(buff);
}
//获取字符编码
String encoding = conn.getHeaderField("Content-Type");
encoding = "utf-8"; //default
if("text/html".equals(encoding)){
Matcher matcher = metaPattern.matcher(new String(bos.toByteArray()));
if (matcher != null && matcher.find()) {
encoding = matcher.group(1);
encoding = encoding.replaceAll("/"", "");
encoding = encoding.trim();
}
}
content = new String(bos.toByteArray(),encoding);
return content;
} catch (Exception e) {
e.printStackTrace();
return null;
}finally{
try{
if(bis!=null)
bis.close();
if(bos!=null)
bos.close();
}catch(Exception e){
}
}
}
/**
* 以HTML格式发送邮件
*
* @param mailInfo
* 待发送的邮件信息
*/
public static boolean sendHtmlMail(MailSenderInfo mailInfo) {
// 判断是否需要身份认证
MyAuthenticator authenticator = null;
Properties pro = mailInfo.getProperties();
// 如果需要身份认证,则创建一个密码验证器
if (mailInfo.isValidate()) {
authenticator = new MyAuthenticator(mailInfo.getUserName(),mailInfo.getPassword());
}
// 根据邮件会话属性和密码验证器构造一个发送邮件的session
Session sendMailSession = Session.getDefaultInstance(pro, authenticator);
try {
// 根据session创建一个邮件消息
Message mailMessage = new MimeMessage(sendMailSession);
// 创建邮件发送者地址
Address from = new InternetAddress(mailInfo.getFromAddress());
// 设置邮件消息的发送者
mailMessage.setFrom(from);
// 创建邮件的接收者地址,并设置到邮件消息中
Address to = new InternetAddress(mailInfo.getToAddress());
// Message.RecipientType.TO属性表示接收者的类型为TO
mailMessage.setRecipient(Message.RecipientType.TO, to);
// 设置邮件消息的主题
mailMessage.setSubject(mailInfo.getSubject());
// 设置邮件消息发送的时间
mailMessage.setSentDate(new Date());
// MiniMultipart类是一个容器类,包含MimeBodyPart类型的对象
Multipart mainPart = new MimeMultipart();
// 创建一个包含HTML内容的MimeBodyPart
BodyPart html = new MimeBodyPart();
// 设置HTML内容
html.setContent(mailInfo.getContent(), "text/html; charset=utf-8");
mainPart.addBodyPart(html);
// 将MiniMultipart对象设置为邮件内容
mailMessage.setContent(mainPart);
// 发送邮件
Transport.send(mailMessage);
return true;
} catch (MessagingException ex) {
ex.printStackTrace();
}
return false;
}
}
class MyAuthenticator extends Authenticator {
String userName = null;
String password = null;
public MyAuthenticator() {
}
public MyAuthenticator(String username, String password) {
this.userName = username;
this.password = password;
}
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(userName, password);
}
}
class MailSenderInfo {
// 发送邮件的服务器的IP和端口
private String mailServerHost;
private String mailServerPort = "25";
// 邮件发送者的地址
private String fromAddress;
// 邮件接收者的地址
private String toAddress;
// 登陆邮件发送服务器的用户名和密码
private String userName;
private String password;
// 是否需要身份验证
private boolean validate = false;
// 邮件主题
private String subject;
// 邮件的文本内容
private String content;
// 邮件附件的文件名
private String[] attachFileNames;
/** */
/**
* 获得邮件会话属性
*/
public Properties getProperties() {
Properties p = new Properties();
p.put("mail.smtp.host", this.mailServerHost);
p.put("mail.smtp.port", this.mailServerPort);
p.put("mail.smtp.auth", validate ? "true" : "false");
return p;
}
public String getMailServerHost() {
return mailServerHost;
}
public void setMailServerHost(String mailServerHost) {
this.mailServerHost = mailServerHost;
}
public String getMailServerPort() {
return mailServerPort;
}
public void setMailServerPort(String mailServerPort) {
this.mailServerPort = mailServerPort;
}
public boolean isValidate() {
return validate;
}
public void setValidate(boolean validate) {
this.validate = validate;
}
public String[] getAttachFileNames() {
return attachFileNames;
}
public void setAttachFileNames(String[] fileNames) {
this.attachFileNames = fileNames;
}
public String getFromAddress() {
return fromAddress;
}
public void setFromAddress(String fromAddress) {
this.fromAddress = fromAddress;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public String getToAddress() {
return toAddress;
}
public void setToAddress(String toAddress) {
this.toAddress = toAddress;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public String getSubject() {
return subject;
}
public void setSubject(String subject) {
this.subject = subject;
}
public String getContent() {
return content;
}
public void setContent(String textContent) {
this.content = textContent;
}
}