public class Job51Senderimpl implements ISender{
protected final Logger logger = Logger.getLogger(Job51Senderimpl.class);
private Document document;
public Job51Senderimpl() throws IOException, DocumentException{
File configFile = new File(getClass().getClassLoader().getResource("").getPath()+"sysconfig.xml");
String xml = IOUtils.toString(FileUtils.openInputStream(configFile));
document = DocumentHelper.parseText(xml);
}
/**
*
* <pre>
* 创建人: 王涛
* 创建于: 2009-6-26
* 描 述:
* 根据关键字和所在城市,进行职位查询
* </pre>
* @param url
*/
@SuppressWarnings({"unchecked"})
public void searchResultByKeyWord(String url){
List<Element> citys = document.selectNodes("ROOT/city/value");
this.logger.debug(((Element)citys.get(0)).getText());
String jobarea = "";
for(Element e : citys){
jobarea += City.CITY_LIST_51JOB.get(e.getText()) + ",";
}
String html = null;
try {
html = getHTMLByUrl(url+"?fromJs=1" +
"&jobarea="+URLEncoder.encode(jobarea.substring(0, jobarea.length()-1), "UTF-8")+"" +
"&funtype=0000&industrytype=00" +
"&keyword="+URLEncoder.encode(document.selectSingleNode("//ROOT/keyword").getText(),"UTF-8")+
"&keywordtype=2&lang=c&stype=1&postchannel=0000&fromType=1");
//this.logger.debug(html);
this.getJobCorpInfoByHTML(html);
}catch (FileNotFoundException e) {
e.printStackTrace();
logger.error("提交查询条件时发生错误,原因[可能是网站查询地址改变或错误]");
}catch (Exception e) {
e.printStackTrace();
}
}
/**
*
* <pre>
* 创建人: 王涛
* 创建于: 2009-6-26
* 描 述:
* 核心方法,首先获取公司名称和职位链接,并对于资源文件中的不包括的公司名称进行过滤,在获得邮箱地址之后,向邮箱地址发送邮件.
* 执行完本页的职位列表之后,获取下一页链接如果有下一页链接,将递归调用此方法.
* </pre>
* @param html
* @throws Exception
*/
@SuppressWarnings("unchecked")
public void getJobCorpInfoByHTML(String html) throws Exception{
String theHtml = html;
Matcher matcherJobURL = Pattern.compile("<a href=\"(.+?)\" onclick=\"zzSearch.acStatRecJob").matcher(theHtml);
Matcher matcherCorpName = Pattern.compile("<a href=\".+?\" class=coname target=\"_blank\" >(.+?)</a></td>").matcher(theHtml);
while(matcherJobURL.find() && matcherCorpName.find()){
for(Element exclud : (List<Element>)document.selectNodes("ROOT/exclud/value")){
if(matcherCorpName.group(1).indexOf(exclud.getText()) == -1){
//logger.info("准备打开职位详细页面 : " + matcherJobURL.group(1));
try {
//打开职位详细页面
Email email = buildEmailDetailByHTML(getHTMLByUrl(matcherJobURL.group(1)));//获取邮箱信息
if(email != null){
MailSender.sendHTML(email);//发送邮件
logger.error("发送邮件: "+email.getSendTo());
}
} catch (Exception e) {
logger.error("打开职位详细页面时发生错误");
e.printStackTrace();
}
}else{logger.info("过滤掉公司名称为" + exclud.getText() + "的公司");}
}
}
String nextUrl = getNextUrlsByHtml(theHtml);
if(nextUrl != null){
this.getJobCorpInfoByHTML(SenderDispatch.HTTP.doGet(nextUrl).toString("GB2312"));
}
}
/**
*
* <pre>
* 创建人: 王涛
* 创建于: 2009-6-26
* 描 述:
* 根据html中的内容找到包含的Email地址
* </pre>
* @param html
* @return
*/
public Email buildEmailDetailByHTML(String html){
//logger.info("获取职位名称和邮箱地址信息");
Email emailDetail = null;
Matcher jobName = Pattern.compile("<td class=\"sr_bt\" colspan=\"2\">(.+?)</td>").matcher(html);
Matcher mailAddress = Pattern.compile("<a href=\" mailto:.+?\ " class=\"orange\">(.+?)</a>").matcher(html);
if(jobName.find() && mailAddress.find()){
emailDetail = new Email(document.selectSingleNode("ROOT/emaildetail/emailusername").getText(),
document.selectSingleNode("ROOT/emaildetail/emailpassword").getText());
emailDetail.setSendTo(mailAddress.group(1));
//emailDetail.setSendTo("
[email protected] ");
emailDetail.setSubject(document.selectSingleNode("ROOT/emaildetail/mailsubject").getText() + " : " + jobName.group(1));
}
return emailDetail;
}
/**
*
* <pre>
* 创建人: 王涛
* 创建于: 2009-6-26
* 描 述:
* 获得下一页的连接
* </pre>
* @param html
* @return 如果没有匹配到下一页的连接,返回null
*/
public String getNextUrlsByHtml(String html){
//logger.debug(html);
String url = "";
Matcher matcher = Pattern.compile("</td><td><a href=\"(.+?)\" .+?").matcher(html);
if(!matcher.find()) //需要先find才能取
return null;
else{
url = matcher.group(1);
}
logger.debug("下一页链接 : " + url);
return url;
}
/**
*
* <pre>
* 创建人: 王涛
* 创建于: 2009-6-24
* 描 述:
* 获取网站内容
* </pre>
* @param url
*/
public String getHTMLByUrl(String url){
String html = null;
try {
html = SenderDispatch.HTTP.doGet(url.toString()).toString("GB2312");
//logger.debug(html);
}
catch (IOException e) {
e.printStackTrace();
logger.error("创建连接时发生错误,原因[可能是网络连接错误或对方服务器无法访问]");
} catch (Exception e) {
e.printStackTrace();
logger.error("试图连接时发生错误,原因[可能是网络地址填写错误]");
}
return html;
}
}