java定时任务管理实现

阅读更多
代码清单: 
==============================SQL==================================== 
计划任务表 
==============================定时任务模块类==================================== 
计划管理DAO接口 CmsSchedulerDao.java 
计划管理DAO接口实现类 CmsSchedulerDaoImpl.java 
计划任务管理服务接口 CmsSchedulerMng.java 
计划任务管理服务接口实现类 CmsSchedulerMngImpl.java 
定时任务管理接口 SchedulerTaskManageSvc.java 
定时任务管理接口实现类 SchedulerTaskManageSvcImpl.java 
定时任务接口 SchedulerTaskSvc.java 
定时任务抽象实现类 AbstractSchedulerTaskSvc.java 
定时任务接口-采集器实现类-多线程版 SchedulerAcquisitionSvcImpl.java 
定时服务关联任务bean SchedulerTaskBean.java 
计划任务Controller CmsSchedulerAct.java 
持久对象基类 BaseCmsScheduler.java 
持久对象 CmsScheduler.java 
HBM文件 CmsScheduler.hbm.xml 
==============================定时任务模块相关互助类==================================== 
计划框架 
计划框架-任务调度 Scheduler.java 
计划框架-时间生成器接口 ScheduleIterator.java 
计划任务抽象类 SchedulerTask.java 
计划框架-时间生成器接口实现类 SimpleScheduleIterator.java 
时间计划参数bean ScheduleParamBean.java 
采集相关 
HTML解析工具类接口 ParseHtmlTool.java 
HTML解析工具,HtmlParser实现类 HtmlParserImpl.java 
采集参数封装bean ParamBean.java 
队列 Queue.java 
URL队列 UrlQueue.java 
接下来是XML配置 
==============================定时任务模块XML配置==================================== 
dao配置 
 
manage配置 
 
SERVICE配置 
 
 
接下来是messages_zh_CN.properties 添加了常量 
==============================messages_zh_CN.properties==================================== 
messages_zh_CN.properties 
接下来是模板 
==============================模板==================================== 
generate_left.html 有修改 
scheduler/add.html 
scheduler/edit.html 
scheduler/list.html 

具体代码如下:
==============================SQL==================================== 
1:计划任务表 
/* 
MySQL Data Transfer 
Source Host: localhost 
Source Database: jeecms 
Target Host: localhost 
Target Database: jeecms 
Date: 2011-11-8 11:36:55 
*/ 

SET FOREIGN_KEY_CHECKS=0; 
-- ---------------------------- 
-- Table structure for jc_scheduler 
-- ---------------------------- 
CREATE TABLE `jc_scheduler` ( 
  `scheduler_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '任务主键', 
  `site_id` int(11) DEFAULT NULL, 
  `associate_id` int(11) DEFAULT NULL COMMENT '相关ID', 
  `module_type` varchar(100) DEFAULT NULL COMMENT '模块类型', 
  `name` varchar(100) DEFAULT NULL COMMENT '任务名称', 
  `start_time` datetime DEFAULT NULL COMMENT '开始时间', 
  `end_time` datetime DEFAULT NULL COMMENT '结束时间', 
  `status` int(1) NOT NULL DEFAULT '0' COMMENT '当前状态(0:静止;1:采集)', 
  `expression` varchar(50) NOT NULL COMMENT '计划表达式', 
  PRIMARY KEY (`scheduler_id`) 
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=utf8; 

-- ---------------------------- 
-- Records  
-- ---------------------------- 
INSERT INTO `jc_scheduler` VALUES ('4', '1', '1', 'schedulerAcquisitionSvc', '测试', '2011-11-07 18:02:30', '2011-11-07 18:04:00', '0', '*,*,*,*,3,0'); 
INSERT INTO `jc_scheduler` VALUES ('8', '1', '5', 'schedulerAcquisitionSvc', '测试采集java', '2011-11-08 10:25:15', '2011-11-08 10:27:04', '0', '*,*,*,*,26,0'); 
INSERT INTO `jc_scheduler` VALUES ('9', '1', '1', 'schedulerAcquisitionSvc', '测试采集新闻', '2011-11-08 10:37:58', '2011-11-08 10:38:11', '0', '*,*,*,*,38,0'); 

==============================定时任务模块类==================================== 
计划管理DAO接口 CmsSchedulerDao.java 
package com.jeecms.cms.dao.assist; 

import java.util.List; 

import com.jeecms.cms.entity.assist.CmsScheduler; 
import com.jeecms.common.hibernate3.Updater; 
/** 
 * 计划管理DAO接口 
 * @author javacoo 
 * @since 2011-11-07 
 */ 
public interface CmsSchedulerDao { 
public List getList(); 

public List getListBy(CmsScheduler bean); 

public CmsScheduler findById(Integer id); 

public CmsScheduler save(CmsScheduler bean); 

public CmsScheduler updateByUpdater(Updater updater); 

public CmsScheduler deleteById(Integer id); 
} 

计划管理DAO接口实现类 CmsSchedulerDaoImpl.java 
package com.jeecms.cms.dao.assist.impl; 

import java.util.List; 

import org.apache.commons.lang.StringUtils; 
import org.springframework.stereotype.Repository; 

import com.jeecms.cms.dao.assist.CmsSchedulerDao; 
import com.jeecms.cms.entity.assist.CmsScheduler; 
import com.jeecms.common.hibernate3.Finder; 
import com.jeecms.common.hibernate3.HibernateBaseDao; 

@Repository 
public class CmsSchedulerDaoImpl extends 
HibernateBaseDao implements CmsSchedulerDao { 
@SuppressWarnings("unchecked") 
public List getList() { 
Finder f = Finder.create("from CmsScheduler bean order by bean.id asc"); 
return find(f); 
} 
@SuppressWarnings("unchecked") 
public List getListBy(CmsScheduler bean) { 
Finder f = Finder.create("from CmsScheduler bean"); 
if(StringUtils.isNotEmpty(bean.getModuleType()) && bean.getSite().getId() != null) { 
f.append(" where bean.moduleType=:moduleType and bean.site.id=:siteId"); 
f.setParam("moduleType", bean.getModuleType()); 
f.setParam("siteId", bean.getSite().getId()); 
} 
f.append(" order by bean.id asc"); 
return find(f); 
} 

public CmsScheduler findById(Integer id) { 
CmsScheduler entity = get(id); 
return entity; 
} 

public CmsScheduler save(CmsScheduler bean) { 
getSession().save(bean); 
return bean; 
} 

public CmsScheduler deleteById(Integer id) { 
CmsScheduler entity = super.get(id); 
if (entity != null) { 
getSession().delete(entity); 
} 
return entity; 
} 

@Override 
protected Class getEntityClass() { 
return CmsScheduler.class; 
} 
} 

计划任务管理服务接口 CmsSchedulerMng.java 
package com.jeecms.cms.manager.assist; 

import java.util.List; 

import com.jeecms.cms.entity.assist.CmsScheduler; 
/** 
 * 计划任务管理服务接口 
 * @author javacoo 
 * @since 2011-11-07 
 * @version 1.0  
 */ 
public interface CmsSchedulerMng { 
/** 
 * 取得所有计划任务 
 * @return 所有计划任务 
 */ 
List getList(); 
/** 
 * 取得指定站点,指定模块所有计划任务 
 * @param bean 计划任务bean 
 * @return 所有计划任务 
 */ 
List getListBy(CmsScheduler bean); 
    /** 
     * 根据ID取得计划任务 
     * @param id 
     * @return 计划任务 
     */ 
CmsScheduler findById(Integer id); 
    /** 
     * 停止指定的计划任务 
     * @param id 
     */ 
void stop(Integer id); 
/** 
     * 开始指定的计划任务 
     * @param id 
     */ 
CmsScheduler start(Integer id); 
/** 
     * 停止指定的计划任务 
     * @param id 
     */ 
void end(Integer id); 
    /** 
     * 保存计划任务 
     * @param bean 
     * @return 
     */ 
CmsScheduler save(CmsScheduler bean); 
/** 
     * 更新计划任务 
     * @param bean 
     * @return 
     */ 
CmsScheduler update(CmsScheduler bean); 
/** 
     * 删除计划任务 
     * @param bean 
     * @return 
     */ 
CmsScheduler deleteById(Integer id); 
/** 
     * 批量删除计划任务 
     * @param bean 
     * @return 
     */ 
CmsScheduler[] deleteByIds(Integer[] ids); 
} 

计划任务管理服务接口实现类 CmsSchedulerMngImpl.java 
package com.jeecms.cms.manager.assist.impl; 

import java.util.Date; 
import java.util.List; 

import org.springframework.beans.factory.annotation.Autowired; 
import org.springframework.stereotype.Service; 
import org.springframework.transaction.annotation.Transactional; 

import com.jeecms.cms.dao.assist.CmsSchedulerDao; 
import com.jeecms.cms.entity.assist.CmsAcquisition; 
import com.jeecms.cms.entity.assist.CmsScheduler; 
import com.jeecms.cms.manager.assist.CmsSchedulerMng; 
import com.jeecms.common.hibernate3.Updater; 
/** 
 * 计划任务管理服务接口实现类 
 * @author javacoo 
 * @since 2011-11-07 
 * @version 1.0  
 */ 
@Service 
@Transactional 
public class CmsSchedulerMngImpl implements CmsSchedulerMng{ 
@Transactional(readOnly = true) 
public List getList() { 
return dao.getList(); 
} 

@Transactional(readOnly = true) 
public List getListBy(CmsScheduler bean) { 
return dao.getListBy(bean); 
} 

@Transactional(readOnly = true) 
public CmsScheduler findById(Integer id) { 
CmsScheduler entity = dao.findById(id); 
return entity; 
} 

public void stop(Integer id) { 
CmsScheduler acqu = findById(id); 
if (acqu == null) { 
return; 
} 
if (acqu.getStatus() == CmsScheduler.START) { 
acqu.setStatus(CmsScheduler.STOP); 
} 
} 


public CmsScheduler start(Integer id) { 
CmsScheduler scheduler = findById(id); 
if (scheduler == null) { 
return scheduler; 
} 
scheduler.setStatus(CmsAcquisition.START); 
scheduler.setStartTime(new Date()); 
scheduler.setEndTime(null); 
return scheduler; 
} 

public void end(Integer id) { 
CmsScheduler scheduler = findById(id); 
if (scheduler == null) { 
return; 
} 
scheduler.setStatus(CmsAcquisition.STOP); 
scheduler.setEndTime(new Date()); 
} 


public CmsScheduler save(CmsScheduler bean) { 
bean.init(); 
dao.save(bean); 
return bean; 
} 

public CmsScheduler update(CmsScheduler bean) { 
Updater updater = new Updater(bean); 
bean = dao.updateByUpdater(updater); 
return bean; 
} 

public CmsScheduler deleteById(Integer id) { 
CmsScheduler bean = dao.deleteById(id); 
return bean; 
} 

public CmsScheduler[] deleteByIds(Integer[] ids) { 
CmsScheduler[] beans = new CmsScheduler[ids.length]; 
for (int i = 0, len = ids.length; i < len; i++) { 
beans[i] = deleteById(ids[i]); 
} 
return beans; 
} 

private CmsSchedulerDao dao; 



@Autowired 
public void setDao(CmsSchedulerDao dao) { 
this.dao = dao; 
} 

} 

定时任务管理接口 SchedulerTaskManageSvc.java 
package com.jeecms.cms.service.scheduler; 

import java.util.List; 

import com.jeecms.cms.entity.assist.CmsScheduler; 


/** 
 * 定时任务管理接口 
 * @author javacoo 
 * @since 2011-11-07 
 */ 
public interface SchedulerTaskManageSvc { 
/** 
 * 开始计划任务 
 * @param scheduler 任务对象 
 * @return true/false 
 */ 
boolean start(CmsScheduler scheduler); 
/** 
 * 结束计划任务 
 * @param scheduler 任务对象 
 * @return true/false 
 */ 
boolean stop(CmsScheduler scheduler); 
/** 
 * 取得关联任务map 
 * @param scheduler 任务对象 
 * @return 关联任务map 
 */ 
List associateTaskList(CmsScheduler scheduler); 
} 


定时任务管理接口实现类 SchedulerTaskManageSvcImpl.java 
package com.jeecms.cms.service.scheduler; 

import java.util.List; 
import java.util.Map; 
import java.util.concurrent.ConcurrentHashMap; 

import org.apache.commons.lang.StringUtils; 
import org.springframework.beans.factory.annotation.Autowired; 
import org.springframework.stereotype.Service; 

import com.jeecms.cms.entity.assist.CmsScheduler; 
import com.jeecms.common.scheduling.core.Scheduler; 
import com.jeecms.common.scheduling.core.SchedulerTask; 
import com.jeecms.common.scheduling.impl.ScheduleParamBean; 
import com.jeecms.common.scheduling.impl.SimpleScheduleIterator; 
/** 
 * 定时任务管理服务接口实现类 
 * @author javacoo 
 * @since 2011-11-07 
 */ 
@Service 
public class SchedulerTaskManageSvcImpl implements SchedulerTaskManageSvc {
/**任务管理对象MAP*/ 
private static Map taskManageMap = new ConcurrentHashMap(); 
/**定时任务服务对象MAP*/ 
@Autowired 
private Map schedulerTaskSvcMap; 
    /** 
     * 任务管理对象 
     * @author javacoo 
   	 * @since 2011-11-07 
     */ 
    private class TaskManage{ 
    	/**任务调度*/ 
    	private final Scheduler scheduler = new Scheduler(); 
    	/**任务参数bean*/ 
    	private ScheduleParamBean scheduleParamBean; 
    	/**定时任务*/ 
    	private final SchedulerTaskSvc schedulerTaskSvc; 
    	private CmsScheduler cmsScheduler; 
    	public TaskManage(SchedulerTaskSvc schedulerSvc,CmsScheduler cmsScheduler){ 
    	 this.schedulerTaskSvc = schedulerSvc; 
    	 this.cmsScheduler = cmsScheduler; 
    	} 
    	/** 
    	 * 解析计划表达式 
    	 * @return 
    	 */ 
    	private boolean parseSchedulerParam(){ 
    	 scheduleParamBean = new ScheduleParamBean(); 
    	 System.out.println("计划表达式:"+cmsScheduler.getExpression()); 
    	 String schedulerParamStr = cmsScheduler.getExpression(); 
    	 if(StringUtils.isNotEmpty(schedulerParamStr) && schedulerParamStr.contains(",")){ 
    	 String[] strAarr = schedulerParamStr.split(","); 
    	 if(strAarr.length == 6){ 
    	 if(StringUtils.isNumeric(strAarr[0])){ 
    	 scheduleParamBean.setWeekOfMonth(Integer.valueOf(strAarr[0])); 
    	 } 
    	 if(StringUtils.isNumeric(strAarr[1])){ 
    	 scheduleParamBean.setDayOfWeek(Integer.valueOf(strAarr[1])); 
    	 } 
    	 if(StringUtils.isNumeric(strAarr[2])){ 
    	 scheduleParamBean.setDayOfMonth(Integer.valueOf(strAarr[2])); 
    	 } 
    	 if(StringUtils.isNumeric(strAarr[3])){ 
    	 scheduleParamBean.setHourOfDay(Integer.valueOf(strAarr[3])); 
    	 } 
    	 if(StringUtils.isNumeric(strAarr[4])){ 
    	 scheduleParamBean.setMinute(Integer.valueOf(strAarr[4])); 
    	 } 
    	 if(StringUtils.isNumeric(strAarr[5])){ 
    	 scheduleParamBean.setSecond(Integer.valueOf(strAarr[5])); 
    	 } 
    	 }else{ 
    	 return false; 
    	 } 
    	 }else{ 
    	 return false; 
    	 } 
    	 return true; 
    	} 
    	/** 
    	 * 开始 
    	 */ 
    	public void start() { 
    	 if(parseSchedulerParam()){ 
    	 scheduler.schedule(new SchedulerTask() { 
    	 public void run() { 
    	 processer(); 
    	 } 
    	 private void processer() { 
    	 System.out.println("============开始执行计划任务================="); 
    	 schedulerTaskSvc.start(cmsScheduler); 
    	 } 
    	 }, new SimpleScheduleIterator(scheduleParamBean)); 
    	 } 
    	} 
    	/** 
    	 * 取消 
    	 */ 
    	public void cancel() { 
    	 schedulerTaskSvc.stop(cmsScheduler); 
    	 scheduler.cancel(); 
    	} 
    	
    } 
    /** 
     * 开始执行计划 
     * @param scheduler 计划对象 
     */ 
public boolean start(CmsScheduler scheduler) { 
SchedulerTaskSvc schedulerSvc = getSchedulerTaskSvcByModuleType(scheduler.getModuleType()); 
TaskManage taskManage = new TaskManage(schedulerSvc,scheduler); 
taskManage.start(); 
taskManageMap.put(scheduler.getId(), taskManage); 
return true; 
} 
/** 
     * 停止执行计划 
     * @param scheduler 计划对象 
     */ 
public boolean stop(CmsScheduler scheduler) { 
TaskManage taskManage = taskManageMap.get(scheduler.getId()); 
taskManage.cancel(); 
return true; 
} 
/** 
     * 取得计划关联的任务对象集合 
     * @param scheduler 计划对象 
     */ 
public List associateTaskList(CmsScheduler scheduler) { 
SchedulerTaskSvc schedulerSvc = getSchedulerTaskSvcByModuleType(scheduler.getModuleType()); 
return schedulerSvc.associateTaskList(scheduler); 
} 

/** 
     * 根据模块的类型,取得定时任务服务对象 
     * @param moduleType 模块类型 
     */ 
private SchedulerTaskSvc getSchedulerTaskSvcByModuleType(String moduleType){ 
return schedulerTaskSvcMap.get(moduleType); 
} 

} 

定时任务接口 SchedulerTaskSvc.java 
package com.jeecms.cms.service.scheduler; 

import java.util.List; 

import com.jeecms.cms.entity.assist.CmsScheduler; 

/** 
 * 定时任务接口 
 * @author javacoo 
 * @since 2011-11-04 
 */ 
public interface SchedulerTaskSvc { 
/** 
 * 开始计划任务 
 * @param cmsScheduler 任务对象 
 * @return true/false 
 */ 
boolean start(CmsScheduler cmsScheduler); 
/** 
 * 结束计划任务 
 * @param cmsScheduler 任务对象 
 * @return true/false 
 */ 
boolean stop(CmsScheduler cmsScheduler); 
/** 
 * 取得关联任务map 
 * @param cmsScheduler 任务对象 
 * @return 关联任务map 
 */ 
List associateTaskList(CmsScheduler cmsScheduler); 
} 

定时任务抽象实现类 AbstractSchedulerTaskSvc.java 
package com.jeecms.cms.service.scheduler; 

import java.util.List; 

import com.jeecms.cms.entity.assist.CmsScheduler; 
/** 
 * 定时任务抽象实现类 
 * @author javacoo 
 * @since 2011-11-08 
 */ 
public abstract class AbstractSchedulerTaskSvc implements SchedulerTaskSvc{ 

/** 
 * 开始计划任务 
 * @return true/false 
 */ 
public boolean start(CmsScheduler scheduler){ 
return execute(scheduler); 
} 
/** 
 * 开始计划任务 
 * @return true/false 
 */ 
public boolean stop(CmsScheduler scheduler){ 
return true; 
} 
/** 
 * 取得关联任务map 
 * @return 关联任务map 
 */ 
public List associateTaskList(CmsScheduler scheduler){ 
return null; 
} 
protected abstract boolean execute(CmsScheduler scheduler); 

} 


定时任务接口-采集器实现类-多线程版 SchedulerAcquisitionSvcImpl.java 
package com.jeecms.cms.service.scheduler; 

import java.io.IOException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.util.ArrayList; 
import java.util.HashMap; 
import java.util.List; 
import java.util.Map; 
import java.util.concurrent.CountDownLatch; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 

import org.apache.commons.lang.StringUtils; 
import org.apache.http.HttpEntity; 
import org.apache.http.HttpHost; 
import org.apache.http.HttpResponse; 
import org.apache.http.StatusLine; 
import org.apache.http.client.ClientProtocolException; 
import org.apache.http.client.HttpClient; 
import org.apache.http.client.HttpResponseException; 
import org.apache.http.client.ResponseHandler; 
import org.apache.http.client.methods.HttpGet; 
import org.apache.http.conn.params.ConnRoutePNames; 
import org.apache.http.impl.client.DefaultHttpClient; 
import org.apache.http.util.EntityUtils; 
import org.slf4j.Logger; 
import org.slf4j.LoggerFactory; 
import org.springframework.beans.factory.annotation.Autowired; 
import org.springframework.stereotype.Service; 

import com.jeecms.cms.entity.assist.CmsAcquisition; 
import com.jeecms.cms.entity.main.Content; 
import com.jeecms.cms.manager.assist.CmsAcquisitionMng; 
import com.jeecms.common.crawler.UrlQueue; 
import com.jeecms.common.crawler.util.HtmlParserImpl; 
import com.jeecms.common.crawler.util.ParseHtmlTool; 
/** 
 * 计划任务接口-采集器实现类-多线程版 
 * @author javacoo 
 * @since 2011-11-02 
 * @version 1.0  
 */ 
@Service 
public class SchedulerAcquisitionSvcImpl extends AbstractSchedulerTaskSvc { 
private Logger log = LoggerFactory.getLogger(SchedulerAcquisitionSvcImpl.class); 
/**开启线程数*/ 
private static int THREAD_NUM = 2; 
/**每个线程休眠毫秒数*/ 
private static int SLEEP_TIME = 100; 
/**连接集合标志*/ 
    private static String LINK_KEY = "linkKey"; 
    /**标题集合标志*/ 
private static String TITLE_KEY = "titleKey"; 
/**采集管理对象*/ 
private CmsAcquisitionMng cmsAcquisitionMng; 
/**存放HttpClient的ThreadLocal对象*/ 
private static ThreadLocal httpClientThreadLocal = new ThreadLocal(); 
/**存放ParseHtmlTool的ThreadLocal对象*/ 
private static ThreadLocal parseHtmlToolThreadLocal = new ThreadLocal(); 
/**存放UrlQueue的ThreadLocal对象*/ 
private static ThreadLocal urlQueueThreadLocal = new ThreadLocal(); 
/**存放计划UrlQueue的ThreadLocal对象*/ 
private static ThreadLocal planUrlQueueThreadLocal = new ThreadLocal(); 

@Autowired 
public void setCmsAcquisitionMng(CmsAcquisitionMng cmsAcquisitionMng) { 
this.cmsAcquisitionMng = cmsAcquisitionMng; 
} 
@Override 
protected boolean execute(CmsScheduler scheduler) { 
CmsAcquisition acqu = cmsAcquisitionMng.findById(scheduler.getAssociateId()); 
if (acqu == null) { 
return false; 
} 
System.out.println("===============开始执行采集任务"); 
new Thread(new MainThreadProcesser(this,acqu)).start(); 
return true; 
} 
 /** 
 * 取得关联任务map 
 * @return 关联任务map 
 */ 
public List associateTaskList(CmsScheduler scheduler){ 
List list = cmsAcquisitionMng.getList(scheduler.getSite().getId()); 
List resultList = new ArrayList(); 
SchedulerTaskBean schedulerTaskBean = null; 
for(CmsAcquisition acquisition : list){ 
schedulerTaskBean = new SchedulerTaskBean(); 
schedulerTaskBean.setId(acquisition.getId()); 
schedulerTaskBean.setName(acquisition.getName()); 
resultList.add(schedulerTaskBean); 
} 
return resultList; 
} 

/** 
 * 主线程处理类 
 * @author javacoo 
 * @since 2011-11-02 
 */ 
private class MainThreadProcesser implements Runnable { 
private CmsAcquisition acqu; 
private SchedulerTaskSvc schedulerAcquisitionSvc; 
public MainThreadProcesser(SchedulerTaskSvc schedulerAcquisitionSvc,CmsAcquisition acqu) { 
this.acqu = acqu; 
this.schedulerAcquisitionSvc = schedulerAcquisitionSvc; 
} 
//线程锁 
Object threadLock = new Object();  
public void run() { 
long tStart = System.currentTimeMillis(); 
System.out.println("主线程:"+Thread.currentThread().getName() + "开始..."); 
try { 
CountDownLatch latch = new CountDownLatch(THREAD_NUM); 
ExecutorService exec = Executors.newCachedThreadPool(); 
getHttpClient().getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,new HttpHost("128.160.64.5", 1235)); 
CharsetHandler handler = new CharsetHandler(acqu.getPageEncoding());  
//取得当前任务所有计划 
getAllPlans(acqu,getPlanUrlQueue()); 
//开启一线程执行抓取计划下URL 
Thread thread = new Thread(new FetchUrlThread(schedulerAcquisitionSvc,latch,getHttpClient(),getPlanUrlQueue(),getUrlQueue(),getParseHtmlTool(acqu),handler,threadLock)); 
exec.execute(thread); 
//开启指定数目线程执行采集内容 
for(int i=0;i urlMap = null; 
while(!urlAndTitleMapIsEmpty(planUrlQueue)) { 
urlMap = getUrlAndTitleMap(planUrlQueue); 
getAllUrls(httpClient,parseHtmlTool,handler,urlQueue,urlMap); 
Thread.sleep(SLEEP_TIME); 
} 
} catch (ClientProtocolException e) { 
e.printStackTrace(); 
} catch (URISyntaxException e) { 
e.printStackTrace(); 
} catch (IOException e) { 
e.printStackTrace(); 
} catch (InterruptedException e) { 
// TODO Auto-generated catch block 
e.printStackTrace(); 
}finally { 
System.out.println("======================采集URL子线程:"+Thread.currentThread().getName() + "结束."); 
//通知采集内容线程开始执行 
synchronized(threadLock) {  
threadLock.notifyAll(); 
} 
latch.countDown(); 

} 
} 
} 

/** 
 * 采集内容线程 
 * @author javacoo 
 * @since 2011-11-02 
 */ 
private class FetchContentThread implements Runnable { 
private SchedulerTaskSvc acquisitionSvc; 
private CmsAcquisition acqu; 
private CountDownLatch latch; 
private UrlQueue urlQueue; 
private HttpClient httpClient; 
private ParseHtmlTool parseHtmlTool; 
private CharsetHandler handler; 
private Object threadLock; 
public FetchContentThread(SchedulerTaskSvc acquisitionSvc,CmsAcquisition acqu,CountDownLatch latch,HttpClient httpClient,UrlQueue urlQueue,ParseHtmlTool parseHtmlTool,CharsetHandler handler,Object threadLock) { 
this.acquisitionSvc = acquisitionSvc; 
this.acqu = acqu; 
this.latch = latch; 
this.urlQueue = urlQueue; 
this.httpClient = httpClient; 
this.parseHtmlTool = parseHtmlTool; 
this.handler = handler; 
this.threadLock = threadLock; 
} 
public void run() { 
System.out.println("======================采集内容子线程:"+Thread.currentThread().getName() + "开始..."); 
try { 
//等待采集URL线程执行完毕 
synchronized(threadLock) {  
threadLock.wait(); 
} 
Map urlMap = null; 
while(!urlAndTitleMapIsEmpty(urlQueue)) { 
urlMap = getUrlAndTitleMap(urlQueue); 
saveContent(acqu,httpClient,parseHtmlTool,handler,urlMap); 
Thread.sleep(SLEEP_TIME); 
} 
} catch (Exception e) { 
e.printStackTrace(); 
log.warn(null, e); 
} finally { 
System.out.println("======================采集内容子线程:"+Thread.currentThread().getName() + "结束."); 
log.info("Acquisition#{} complete", acqu.getId()); 
latch.countDown(); 
} 
} 
} 


/** 
 * 取得当前主线程的HttpClient对象 
 * @return 当前主线程的HttpClient对象 
 */ 
private static HttpClient getHttpClient(){ 
if(httpClientThreadLocal.get() == null){ 
HttpClient client = new DefaultHttpClient(); 
httpClientThreadLocal.set(client); 
return client; 
}else{ 
return httpClientThreadLocal.get(); 
} 
} 
/** 
 * 取得当前主线程的UrlQueue对象 
 * @return 当前主线程的UrlQueue对象 
 */ 
private static UrlQueue getUrlQueue(){ 
if(urlQueueThreadLocal.get() == null){ 
UrlQueue urlQueue = new UrlQueue(); 
urlQueueThreadLocal.set(urlQueue); 
return urlQueue; 
}else{ 
return urlQueueThreadLocal.get(); 
} 
} 
/** 
 * 取得当前主线程的计划UrlQueue对象 
 * @return 当前主线程的计划UrlQueue对象 
 */ 
private static UrlQueue getPlanUrlQueue(){ 
if(planUrlQueueThreadLocal.get() == null){ 
UrlQueue urlQueue = new UrlQueue(); 
planUrlQueueThreadLocal.set(urlQueue); 
return urlQueue; 
}else{ 
return planUrlQueueThreadLocal.get(); 
} 
} 
/** 
 * 取得当前主线程的ParseHtmlTool对象 
 * @param acqu 采集参数对象 
 * @return 当前主线程的ParseHtmlTool对象 
 */ 
private static ParseHtmlTool getParseHtmlTool(CmsAcquisition acqu){ 
if(parseHtmlToolThreadLocal.get() == null){ 
ParseHtmlTool parseHtmlTool = new HtmlParserImpl(acqu); 
parseHtmlToolThreadLocal.set(parseHtmlTool); 
return parseHtmlTool; 
}else{ 
return parseHtmlToolThreadLocal.get(); 
} 
} 
/** 
 * 连接和标题map对象入队列 
 * @param map 连接和标题map对象 
 */ 
private synchronized void addUrlAndTitleMap(Map map,UrlQueue urlQueue){ 
System.out.println("====线程:"+Thread.currentThread().getName() + ",添加  urlQueue:"+urlQueue); 
urlQueue.addUnVisitedUrl(map); 
} 
/** 
 * 连接和标题map对象出队列 
 * @param urlQueue 当前线程的队列 
 * @return 连接和标题map对象 
 */ 
private synchronized Map getUrlAndTitleMap(UrlQueue urlQueue){ 
System.out.println("====线程:"+Thread.currentThread().getName() + ",取得 urlQueue:"+urlQueue); 
return urlQueue.unVisitedUrlDeQueue(); 
} 
/** 
 *  判断当前对象是否为空 
 * @param urlQueue 当前线程的队列 
 * @return true/flase 
 */ 
private synchronized boolean urlAndTitleMapIsEmpty(UrlQueue urlQueue){ 
System.out.println("====线程:"+Thread.currentThread().getName() + ",判断 urlQueue:"+urlQueue); 
return urlQueue.isEmpty(); 
} 
/** 
 * 取得当前线程下所有计划,并加入队列 
 * @param acqu 采集参数对象 
 * @param urlQueue 队列 
 * @throws URISyntaxException  
 * @throws IOException  
 * @throws ClientProtocolException  
 */ 
private void getAllPlans(CmsAcquisition acqu,UrlQueue urlQueue){ 
String[] plans = acqu.getAllPlans(); 
Map planMap = new HashMap(); 
for (int i = plans.length - 1; i >= 0; i--) { 
planMap.put(LINK_KEY, plans[i]); 
planMap.put(TITLE_KEY, acqu.getName()); 
addUrlAndTitleMap(planMap,urlQueue); 
} 
System.out.println("=======当前线程:"+Thread.currentThread().getName() + "计划URL连接数:"+urlQueue.getUnVisitedUrlNum()); 
} 
/** 
 * 取得当前线程下所有计划的连接,并加入队列 
 * @param acqu 采集参数对象 
 * @param handler 字符集对象 
 * @param urlQueue 队列 
 * @throws URISyntaxException  
 * @throws IOException  
 * @throws ClientProtocolException  
 */ 
private void getAllUrls(HttpClient httpClient,ParseHtmlTool parseHtmlTool,CharsetHandler handler,UrlQueue urlQueue,Map map) throws URISyntaxException, ClientProtocolException, IOException{ 
HttpGet httpGet = new HttpGet(new URI(map.get(LINK_KEY).trim())); 
String html = httpClient.execute(httpGet, handler); 
for(Map planMap : parseHtmlTool.getUrlAndTitleMap(html)){ 
addUrlAndTitleMap(planMap,urlQueue); 
} 
    System.out.println("=======当前线程:"+Thread.currentThread().getName() + "URL连接数:"+urlQueue.getUnVisitedUrlNum()); 
 } 
/** 
 * 保存内容 
 * @param acqu 请求参数对象 
 * @param httpClient httpClient对象 
 * @param parseHtmlTool parseHtmlTool对象 
 * @param handler CharsetHandler对象 
 * @param map 连接和标题map对象 
 * @return Content 
 */ 
private synchronized Content saveContent(CmsAcquisition acqu,HttpClient httpClient,ParseHtmlTool parseHtmlTool,CharsetHandler handler,Map map) {
try { 
HttpGet httpGet = null; 
if(map.get(LINK_KEY).contains("http://")){ 
httpGet = new HttpGet(new URI(map.get(LINK_KEY).trim())); 
}else{ 
httpGet = new HttpGet(new URI("http://localhost/v7/"+map.get(LINK_KEY).trim())); 
} 
String html = httpClient.execute(httpGet, handler); 
System.out.println("=============================子线程:"+Thread.currentThread().getName() + "执行"); 
String txt = parseHtmlTool.getHtml(html); 
//return cmsAcquisitionMng.saveContent(map.get(TITLE_KEY), txt,acqu.getId()); 
            return null; 
} catch (Exception e) { 
log.warn(null, e); 
e.printStackTrace(); 
return null; 
} 
} 
/** 
 * 字符集帮助类 
 * @author Administrator 
 * 
 */ 
private class CharsetHandler implements ResponseHandler { 
private String charset; 

public CharsetHandler(String charset) { 
this.charset = charset; 
} 

public String handleResponse(HttpResponse response) 
throws ClientProtocolException, IOException { 
StatusLine statusLine = response.getStatusLine(); 
if (statusLine.getStatusCode() >= 300) { 
throw new HttpResponseException(statusLine.getStatusCode(), 
statusLine.getReasonPhrase()); 
} 
HttpEntity entity = response.getEntity(); 
if (entity != null) { 
if (!StringUtils.isBlank(charset)) { 
return EntityUtils.toString(entity, charset); 
} else { 
return EntityUtils.toString(entity); 
} 
} else { 
return null; 
} 
} 
} 





} 


定时服务关联任务bean SchedulerTaskBean.java 
package com.jeecms.cms.service.scheduler; 
/** 
 * 定时服务关联任务bean 
 * @author javacoo 
 * @since 2011-11-07 
 */ 
public class SchedulerTaskBean { 
/**任务主键*/ 
private Integer id; 
/**任务名称*/ 
private String name; 
public Integer getId() { 
return id; 
} 
public void setId(Integer id) { 
this.id = id; 
} 
public String getName() { 
return name; 
} 
public void setName(String name) { 
this.name = name; 
} 



} 

计划任务Controller CmsSchedulerAct.java 
package com.jeecms.cms.action.admin.assist; 

import java.util.List; 

import javax.servlet.http.HttpServletRequest; 
import javax.servlet.http.HttpServletResponse; 

import org.slf4j.Logger; 
import org.slf4j.LoggerFactory; 
import org.springframework.beans.factory.annotation.Autowired; 
import org.springframework.stereotype.Controller; 
import org.springframework.ui.ModelMap; 
import org.springframework.web.bind.annotation.RequestMapping; 

import com.jeecms.cms.entity.assist.CmsAcquisition; 
import com.jeecms.cms.entity.assist.CmsScheduler; 
import com.jeecms.cms.entity.main.CmsSite; 
import com.jeecms.cms.manager.assist.CmsSchedulerMng; 
import com.jeecms.cms.manager.main.CmsLogMng; 
import com.jeecms.cms.service.scheduler.SchedulerTaskManageSvc; 
import com.jeecms.cms.service.scheduler.SchedulerTaskBean; 
import com.jeecms.cms.web.CmsUtils; 
import com.jeecms.cms.web.WebErrors; 
/** 
 * 计划任务Controller 
 * @author javacoo 
 * @since 2011-11-7 
 */ 
@Controller 
public class CmsSchedulerAct { 
private static final Logger log = LoggerFactory 
.getLogger(CmsSchedulerAct.class); 
/**日志服务*/ 
@Autowired 
private CmsLogMng cmsLogMng; 
/**计划管理服务*/ 
@Autowired 
private CmsSchedulerMng manager; 
/**计划任务管理服务*/ 
@Autowired 
private SchedulerTaskManageSvc schedulerTaskManageSvc; 

@RequestMapping("/scheduler/v_list.do") 
public String list(HttpServletRequest request, ModelMap model) { 
List list = manager.getList(); 
model.addAttribute("list", list); 
return "scheduler/list"; 
} 
@RequestMapping("/scheduler/v_listBy.do") 
public String listBy(String moduleType,HttpServletRequest request, ModelMap model) { 
CmsSite site = CmsUtils.getSite(request); 
CmsScheduler scheduler = new CmsScheduler(); 
scheduler.setModuleType(moduleType); 
scheduler.setSite(site); 
List list = manager.getListBy(scheduler); 
model.addAttribute("list", list); 
model.addAttribute("moduleType", moduleType); 
return "scheduler/list"; 
} 

@RequestMapping("/scheduler/v_add.do") 
public String add(String moduleType,HttpServletRequest request, ModelMap model) { 
CmsSite site = CmsUtils.getSite(request); 
CmsScheduler scheduler = new CmsScheduler(); 
scheduler.setModuleType(moduleType); 
scheduler.setSite(site); 
List schedulerTaskList = schedulerTaskManageSvc.associateTaskList(scheduler); 
model.addAttribute("schedulerTaskList", schedulerTaskList); 
model.addAttribute("moduleType", moduleType); 
return "scheduler/add"; 
} 

@RequestMapping("/scheduler/v_edit.do") 
public String edit(Integer id, HttpServletRequest request, ModelMap model) { 
WebErrors errors = validateEdit(id, request); 
if (errors.hasErrors()) { 
return errors.showErrorPage(model); 
} 
CmsSite site = CmsUtils.getSite(request); 
CmsScheduler scheduler = manager.findById(id); 
scheduler.setSite(site); 
List schedulerTaskList = schedulerTaskManageSvc.associateTaskList(scheduler); 
model.addAttribute("schedulerTaskList", schedulerTaskList); 
model.addAttribute("cmsScheduler", scheduler); 
return "scheduler/edit"; 
} 

@RequestMapping("/scheduler/o_save.do") 
public String save(CmsScheduler bean,HttpServletRequest request, ModelMap model) { 
CmsSite site = CmsUtils.getSite(request); 
bean.setSite(site); 
bean = manager.save(bean); 
model.addAttribute("moduleType", bean.getModuleType()); 
log.info("save CmsScheduler id={}", bean.getId()); 
cmsLogMng.operating(request, "cmsAcquisition.log.save", "id=" 
+ bean.getId() + ";name=" + bean.getName()); 
return "redirect:v_listBy.do"; 
} 

@RequestMapping("/scheduler/o_update.do") 
public String update(CmsScheduler bean, HttpServletRequest request, ModelMap model) { 
WebErrors errors = validateUpdate(bean.getId(), request); 
if (errors.hasErrors()) { 
return errors.showErrorPage(model); 
} 
bean = manager.update(bean); 
log.info("update CmsAcquisition id={}.", bean.getId()); 
cmsLogMng.operating(request, "cmsAcquisition.log.update", "id=" 
+ bean.getId() + ";name=" + bean.getName()); 
return listBy(bean.getModuleType(),request, model); 
} 

@RequestMapping("/scheduler/o_delete.do") 
public String delete(String moduleType,Integer[] ids, HttpServletRequest request, 
ModelMap model) { 
WebErrors errors = validateDelete(ids, request); 
if (errors.hasErrors()) { 
return errors.showErrorPage(model); 
} 
CmsScheduler[] beans = manager.deleteByIds(ids); 
for (CmsScheduler bean : beans) { 
log.info("delete CmsAcquisition id={}", bean.getId()); 
cmsLogMng.operating(request, "cmsScheduler.log.delete", "id=" 
+ bean.getId() + ";name=" + bean.getName()); 
} 
return listBy(moduleType,request, model); 
} 

@RequestMapping("/scheduler/o_start.do") 
public String start(Integer id, HttpServletRequest request, 
HttpServletResponse response, ModelMap model) { 
CmsScheduler scheduler = manager.findById(id); 
schedulerTaskManageSvc.start(scheduler); 
manager.start(id); 
model.addAttribute("moduleType", scheduler.getModuleType()); 
log.info("start CmsAcquisition id={}", id); 
return "redirect:v_listBy.do"; 
} 

@RequestMapping("/scheduler/o_end.do") 
public String end(Integer id, HttpServletRequest request, 
HttpServletResponse response, ModelMap model) { 
manager.end(id); 
CmsScheduler scheduler = manager.findById(id); 
schedulerTaskManageSvc.stop(scheduler); 
model.addAttribute("moduleType", scheduler.getModuleType()); 
log.info("end CmsScheduler id={}", id); 
return "redirect:v_listBy.do"; 
} 




private WebErrors validateEdit(Integer id, HttpServletRequest request) { 
WebErrors errors = WebErrors.create(request); 
CmsSite site = CmsUtils.getSite(request); 
if (vldExist(id, site.getId(), errors)) { 
return errors; 
} 
return errors; 
} 

private WebErrors validateUpdate(Integer id, HttpServletRequest request) { 
WebErrors errors = WebErrors.create(request); 
CmsSite site = CmsUtils.getSite(request); 
if (vldExist(id, site.getId(), errors)) { 
return errors; 
} 
return errors; 
} 

private WebErrors validateDelete(Integer[] ids, HttpServletRequest request) { 
WebErrors errors = WebErrors.create(request); 
CmsSite site = CmsUtils.getSite(request); 
if (errors.ifEmpty(ids, "ids")) { 
return errors; 
} 
for (Integer id : ids) { 
vldExist(id, site.getId(), errors); 
} 
return errors; 
} 

private boolean vldExist(Integer id, Integer siteId, WebErrors errors) { 
if (errors.ifNull(id, "id")) { 
return true; 
} 
CmsScheduler entity = manager.findById(id); 
if (errors.ifNotExist(entity, CmsAcquisition.class, id)) { 
return true; 
} 
return false; 
} 



} 

持久对象基类 BaseCmsScheduler.java 
package com.jeecms.cms.entity.assist.base; 

import java.io.Serializable; 
import java.util.Date; 


public abstract class BaseCmsScheduler implements Serializable { 

public static String REF = "CmsScheduler"; 
public static String PROP_ID = "id"; 
public static String PROP_SITE = "site"; 
public static String PROP_ASSOCIATE_ID = "associateId"; 
public static String PROP_MODULE_TYPE = "moduleType"; 
public static String PROP_NAME = "name"; 
public static String PROP_START_TIME = "startTime"; 
public static String PROP_END_TIME = "endTime"; 
public static String PROP_STATUS = "status"; 
public static String PROP_EXPRESSION = "expression"; 




// constructors 
public BaseCmsScheduler () { 
initialize(); 
} 

/** 
 * Constructor for primary key 
 */ 
public BaseCmsScheduler (java.lang.Integer id) { 
this.setId(id); 
initialize(); 
} 
     
public BaseCmsScheduler(Integer id,String name, Date startTime, Date endTime, 
Integer status, Integer associateId, String moduleType, String expression,com.jeecms.cms.entity.main.CmsSite site) { 
super(); 
this.id = id; 
this.name = name; 
this.startTime = startTime; 
this.endTime = endTime; 
this.status = status; 
this.associateId = associateId; 
this.moduleType = moduleType; 
this.expression = expression; 
this.site = site; 
} 

protected void initialize () {} 



private int hashCode = Integer.MIN_VALUE; 

// primary key 
private java.lang.Integer id; 

// fields 
private java.lang.String name; 
private java.util.Date startTime; 
private java.util.Date endTime; 
private java.lang.Integer status; 
private java.lang.Integer associateId; 
private java.lang.String moduleType; 
private java.lang.String expression; 
private com.jeecms.cms.entity.main.CmsSite site; 




public int getHashCode() { 
return hashCode; 
} 

public void setHashCode(int hashCode) { 
this.hashCode = hashCode; 
} 

public java.lang.Integer getId() { 
return id; 
} 

public void setId(java.lang.Integer id) { 
this.id = id; 
} 

public java.lang.String getName() { 
return name; 
} 

public void setName(java.lang.String name) { 
this.name = name; 
} 

public java.util.Date getStartTime() { 
return startTime; 
} 

public void setStartTime(java.util.Date startTime) { 
this.startTime = startTime; 
} 

public java.util.Date getEndTime() { 
return endTime; 
} 

public void setEndTime(java.util.Date endTime) { 
this.endTime = endTime; 
} 

public java.lang.Integer getStatus() { 
return status; 
} 

public void setStatus(java.lang.Integer status) { 
this.status = status; 
} 

public java.lang.Integer getAssociateId() { 
return associateId; 
} 

public void setAssociateId(java.lang.Integer associateId) { 
this.associateId = associateId; 
} 

public java.lang.String getModuleType() { 
return moduleType; 
} 

public void setModuleType(java.lang.String moduleType) { 
this.moduleType = moduleType; 
} 
public java.lang.String getExpression() { 
return expression; 
} 

public void setExpression(java.lang.String expression) { 
this.expression = expression; 
} 

public com.jeecms.cms.entity.main.CmsSite getSite() { 
return site; 
} 

public void setSite(com.jeecms.cms.entity.main.CmsSite site) { 
this.site = site; 
} 
} 
持久对象 CmsScheduler.java 
package com.jeecms.cms.entity.assist; 



import java.util.Date; 

import com.jeecms.cms.entity.assist.base.BaseCmsScheduler; 

/** 
 * 计划持久对象 
 * @author javacoo 
 * @since 2011-11-07 
 */ 
public class CmsScheduler extends BaseCmsScheduler { 
private static final long serialVersionUID = 1L; 
/** 
 * 停止状态 
 */ 
public static final int STOP = 0; 
/** 
 * 采集状态 
 */ 
public static final int START = 1; 

/** 
 * 是否停止 
 *  
 * @return 
 */ 
public boolean isStop() { 
int status = getStatus(); 
return status == 0; 
} 

public void init() { 
if (getStatus() == null) { 
setStatus(STOP); 
} 
} 


public CmsScheduler(){ 
super(); 
} 
public CmsScheduler(java.lang.Integer id){ 
super(id); 
} 
public CmsScheduler(Integer id,String name, Date startTime, Date endTime, 
Integer status, Integer associateId, String moduleType, String expression,com.jeecms.cms.entity.main.CmsSite site) { 
super(id,name,startTime,endTime,status,associateId,moduleType,expression,site); 
} 


} 

HBM文件 CmsScheduler.hbm.xml 
 
 
 
 
false 
 
 
 
 
 
 
 
 
 
 
 

==============================定时任务模块相关互助类==================================== 
计划框架 
计划框架-任务调度 Scheduler.java 
package com.jeecms.common.scheduling.core; 

import java.util.Date; 
import java.util.Timer; 
import java.util.TimerTask; 
/** 
 * 计划框架-任务调度 
 * 
  • * 用于提供必要的计划,Scheduler 的每一个实例都拥有 Timer 的一个实例,用于提供底层计划 * 它将一组单次定时器串接在一起,以便在由 ScheduleIterator 指定的各个时间执行 SchedulerTask 类 *
  • * @author javacoo * @since 2011-11-02 */ public class Scheduler { /**Timer实例*/ private final Timer timer = new Timer(); /** * 定时任务计划 * @author javacoo * @since 2011-11-02 */ class SchedulerTimerTask extends TimerTask { private SchedulerTask schedulerTask; private ScheduleIterator iterator; public SchedulerTimerTask(SchedulerTask schedulerTask, ScheduleIterator iterator) { this.schedulerTask = schedulerTask; this.iterator = iterator; } public void run() { schedulerTask.run(); reschedule(schedulerTask, iterator); } } public Scheduler() { } /** * 取消执行 */ public void cancel() { timer.cancel(); } /** * 计划的入口点 *
  • * 通过调用 ScheduleIterator 接口的 next(),发现第一次执行 SchedulerTask 的时间。 * 然后通过调用底层 Timer 类的单次 schedule() 方法,启动计划在这一时刻执行。 * 为单次执行提供的 TimerTask 对象是嵌入的 SchedulerTimerTask 类的一个实例, * 它包装了任务和迭代器(iterator)。在指定的时间,调用嵌入类的 run() 方法, * 它使用包装的任务和迭代器引用以便重新计划任务的下一次执行 *
  • * @param schedulerTask SchedulerTimerTask 类的一个实例 * @param iterator ScheduleIterator 接口的一个实例 */ public void schedule(SchedulerTask schedulerTask, ScheduleIterator iterator) { Date time = iterator.next(); if (time == null) { schedulerTask.cancel(); } else { synchronized (schedulerTask.lock) { if (schedulerTask.state != SchedulerTask.VIRGIN) { throw new IllegalStateException("任务已经执行/取消"); } schedulerTask.state = SchedulerTask.SCHEDULED; schedulerTask.timerTask = new SchedulerTimerTask(schedulerTask,iterator); timer.schedule(schedulerTask.timerTask, time); } } } /** * 重新制定计划 * @param schedulerTask SchedulerTimerTask 类的一个实例 * @param iterator ScheduleIterator 接口的一个实例 */ private void reschedule(SchedulerTask schedulerTask, ScheduleIterator iterator) { Date time = iterator.next(); if (time == null) { schedulerTask.cancel(); } else { synchronized (schedulerTask.lock) { if (schedulerTask.state != SchedulerTask.CANCELLED) { schedulerTask.timerTask = new SchedulerTimerTask( schedulerTask, iterator); timer.schedule(schedulerTask.timerTask, time); } } } } }

    计划框架-时间生成器接口 ScheduleIterator.java 
    package com.jeecms.common.scheduling.core; 
    
    import java.util.Date; 
    /** 
     * 计划框架-时间生成器接口 
     * 
  • 将 SchedulerTask 的计划执行时间指定为一系列 java.util.Date 对象的接口 * 然后 next() 方法按时间先后顺序迭代 Date 对象,返回值 null 会使任务取消(即它再也不会运行)
  • * @author javacoo * @since 2011-11-02 */ public interface ScheduleIterator { /** * 返回下次计划执行时间 * @return 下次计划执行时间 */ Date next(); }

    计划任务抽象类 SchedulerTask.java 
    package com.jeecms.common.scheduling.core; 
    
    import java.util.TimerTask; 
    /** 
     * 计划任务抽象类 
     * 
  • * SchedulerTask 在其生命周期中要经历一系列的状态。创建后,它处于 VIRGIN 状态, * 这表明它从没有计划过。计划以后,它就变为 SCHEDULED 状态, * 再用下面描述的方法之一取消任务后,它就变为 CANCELLED 状态。 * 管理正确的状态转变 —— 如保证不对一个非 VIRGIN 状态的任务进行两次计划 —— * 增加了 Scheduler 和 SchedulerTask 类的复杂性。在进行可能改变任务状态的操作时, * 代码必须同步任务的锁对象 *
  • * @author javacoo * @since 2011-11-02 */ public abstract class SchedulerTask implements Runnable { /**同步任务的锁对象*/ final Object lock = new Object(); /**状态*/ int state = VIRGIN; /**初始状态*/ static final int VIRGIN = 0; /**任务状态*/ static final int SCHEDULED = 1; /**取消状态*/ static final int CANCELLED = 2; /**TimerTask 对象*/ TimerTask timerTask; protected SchedulerTask() { } /**执行的任务,由子类实现*/ public abstract void run(); /**取消任务 *
  • * 任务再也不会运行了,不过已经运行的任务仍会运行完成 *
  • */ public boolean cancel() { synchronized (lock) { if (timerTask != null) { timerTask.cancel(); } boolean result = (state == SCHEDULED); state = CANCELLED; return result; } } public long scheduledExecutionTime() { synchronized (lock) { return timerTask == null ? 0 : timerTask.scheduledExecutionTime(); } } }

    计划框架-时间生成器接口实现类 SimpleScheduleIterator.java 
    package com.jeecms.common.scheduling.impl; 
    import java.util.Calendar; 
    import java.util.Date; 
    import java.util.GregorianCalendar; 
    
    import com.jeecms.common.scheduling.core.ScheduleIterator; 
    /** 
     * 计划框架-时间生成器接口实现类 
     * 
  • 返回 月/周/天/小时/分钟/秒 计划的下一次执行时间
  • *
  • 约定:参数以逗号分隔,*号表示无值
  • *
  • 参数解释: *
    第一位:每个月的第几周
    *
    第二位:每周的第几天
    *
    第三位:天(几号)
    *
    第四位:小时(24小时制)
    *
    第五位:分钟
    *
    第六位:秒
    *
  • *
  • 参数样例: *
    1,6,4,15,20,30 表示 从今天的15:20:30开始,每隔一个月执行一次,即下次执行时间是 下个月的第一周的第6天的15:20:30
    *
    *,6,4,15,20,30 表示 从今天的15:20:30开始,每隔一周执行一次,即下次执行时间是 下一周的第6天的15:20:30
    *
    *,*,4,15,20,30 表示 从今天的15:20:30开始,每隔一天执行一次,即下次执行时间是 下一天的15:20:30
    *
    *,*,*,15,20,30 表示 从今天的15:20:30开始,每隔一小时执行一次,即下次执行时间是 16:20:30
    *
    *,*,*,*,20,30 表示 从这个小时的20:30开始,每隔一分钟执行一次,即下次执行时间是 *:21:30
    *
    *,*,*,*,*,30 表示 从当前时间的30秒开始,每隔一秒执行一次,即下次执行时间是 *:*:31
    *
  • * @author javacoo * @since 2011-11-03 */ public class SimpleScheduleIterator implements ScheduleIterator { private final ScheduleParamBean scheduleParamBean; private final Calendar calendar = Calendar.getInstance(); private final Calendar orginCalendar = Calendar.getInstance(); public SimpleScheduleIterator(final ScheduleParamBean scheduleParamBean) { this(scheduleParamBean, new Date()); } public SimpleScheduleIterator(final ScheduleParamBean scheduleParamBean, Date date) { this.scheduleParamBean = scheduleParamBean; orginCalendar.setTime(date); calendar.setTime(date); if(null != scheduleParamBean.getWeekOfMonth()){ calendar.set(Calendar.WEEK_OF_MONTH, scheduleParamBean.getWeekOfMonth()); } //如果设置了每周的第几天和一个月的第几天,则忽略一个月的第几天 if(null != scheduleParamBean.getDayOfWeek()){ calendar.set(Calendar.DAY_OF_WEEK, scheduleParamBean.getDayOfWeek()); }else if(null != scheduleParamBean.getDayOfMonth()){ calendar.set(Calendar.DAY_OF_MONTH, scheduleParamBean.getDayOfMonth()); } if(null != scheduleParamBean.getHourOfDay()){ calendar.set(Calendar.HOUR_OF_DAY, scheduleParamBean.getHourOfDay()); } if(null != scheduleParamBean.getMinute()){ calendar.set(Calendar.MINUTE, scheduleParamBean.getMinute()); } if(null != scheduleParamBean.getSecond()){ calendar.set(Calendar.SECOND, scheduleParamBean.getSecond()); } calendar.set(Calendar.MILLISECOND, 0); //如果设置时间 大于当前时间 if (!calendar.getTime().before(date)) { System.out.println(calendar.getTime() +"大于当前时间:"+date); if(null != scheduleParamBean.getWeekOfMonth()){ calendar.add(Calendar.MONTH, -1); }else if(null != scheduleParamBean.getDayOfWeek()){ calendar.add(Calendar.DAY_OF_WEEK, -6); }else if(null != scheduleParamBean.getDayOfMonth()){ calendar.add(Calendar.DAY_OF_MONTH, -1); }else if(null != scheduleParamBean.getHourOfDay()){ calendar.add(Calendar.HOUR_OF_DAY, -1); }else if(null != scheduleParamBean.getMinute()){ calendar.add(Calendar.MINUTE, -1); }else if(null != scheduleParamBean.getSecond()){ calendar.add(Calendar.SECOND, -1); } }else{//如果小于,则会一下执行多次,所以在天,小时,分钟,秒 都加上相应时间差 System.out.println(calendar.getTime() +"小于当前时间:"+date); if(null != scheduleParamBean.getDayOfMonth()){ calendar.add(Calendar.DAY_OF_MONTH, orginCalendar.get(Calendar.DAY_OF_MONTH) - scheduleParamBean.getDayOfMonth()); }else if(null != scheduleParamBean.getHourOfDay()){ calendar.add(Calendar.HOUR_OF_DAY, orginCalendar.get(Calendar.HOUR_OF_DAY) - scheduleParamBean.getHourOfDay()); }else if(null != scheduleParamBean.getMinute()){ calendar.add(Calendar.MINUTE, orginCalendar.get(Calendar.MINUTE) - scheduleParamBean.getMinute()); }else if(null != scheduleParamBean.getSecond()){ calendar.add(Calendar.SECOND, orginCalendar.get(Calendar.SECOND) - scheduleParamBean.getSecond()); } } } public Date next() { if(null != scheduleParamBean.getWeekOfMonth()){ calendar.add(Calendar.MONTH, 1); }else if(null != scheduleParamBean.getDayOfWeek()){ calendar.add(Calendar.DAY_OF_WEEK, 6); }else if(null != scheduleParamBean.getDayOfMonth()){ calendar.add(Calendar.DAY_OF_MONTH, 1); }else if(null != scheduleParamBean.getHourOfDay()){ calendar.add(Calendar.HOUR_OF_DAY, 1); }else if(null != scheduleParamBean.getMinute()){ calendar.add(Calendar.MINUTE, 1); }else if(null != scheduleParamBean.getSecond()){ calendar.add(Calendar.SECOND, 1); } System.out.println("下次执行时间:"+calendar.getTime()); return calendar.getTime(); } } 时间计划参数bean ScheduleParamBean.java package com.jeecms.common.scheduling.impl; /** * 时间计划参数bean * @author javacoo * @since 2011-11-04 */ public class ScheduleParamBean { /**每个月的第几周,每周的第几天,每个月的第几天,小时(24小时制),分钟,秒*/ private Integer weekOfMonth,dayOfWeek,dayOfMonth,hourOfDay, minute, second; public ScheduleParamBean(){ } public ScheduleParamBean(Integer weekOfMonth, Integer dayOfWeek, Integer dayOfMonth, Integer hourOfDay, Integer minute, Integer second) { super(); this.weekOfMonth = weekOfMonth; this.dayOfWeek = dayOfWeek; this.dayOfMonth = dayOfMonth; this.hourOfDay = hourOfDay; this.minute = minute; this.second = second; } public Integer getWeekOfMonth() { return weekOfMonth; } public void setWeekOfMonth(Integer weekOfMonth) { this.weekOfMonth = weekOfMonth; } public Integer getDayOfWeek() { return dayOfWeek; } public void setDayOfWeek(Integer dayOfWeek) { this.dayOfWeek = dayOfWeek; } public Integer getDayOfMonth() { return dayOfMonth; } public void setDayOfMonth(Integer dayOfMonth) { this.dayOfMonth = dayOfMonth; } public Integer getHourOfDay() { return hourOfDay; } public void setHourOfDay(Integer hourOfDay) { this.hourOfDay = hourOfDay; } public Integer getMinute() { return minute; } public void setMinute(Integer minute) { this.minute = minute; } public Integer getSecond() { return second; } public void setSecond(Integer second) { this.second = second; } @Override public String toString() { return "ScheduleParamBean [dayOfMonth=" + dayOfMonth + ", dayOfWeek=" + dayOfWeek + ", hourOfDay=" + hourOfDay + ", minute=" + minute + ", second=" + second + ", weekOfMonth=" + weekOfMonth + "]"; } }

    采集相关 
    HTML解析工具类接口 ParseHtmlTool.java 
    package com.jeecms.common.crawler.util; 
    
    import java.util.List; 
    import java.util.Map; 
    
    /** 
     * HTML解析工具类接口 
     * @author javacoo 
     * @since 2011-10-31 
     */ 
    public interface ParseHtmlTool { 
    /** 
     * 取得连接集合 
     * @param orginHtml 原始HTML 
     * @return 连接集合 
     */ 
    List getUrlList( String orginHtml); 
    /** 
     * 取得标题集合 
     * @param orginHtml 原始HTML 
     * @return 标题集合 
     */ 
    List getTitleList(String orginHtml); 
        /** 
         * 取得指定区域的HTML内容 
         * @return 指定区域的HTML内容 
         */ 
    String getHtml(String orginHtml); 
    /** 
     * 取得连接标题Map集合 
     * @param orginHtml 原始HTML 
     * @return 连接标题Map集合 
     */ 
    List> getUrlAndTitleMap(String orginHtml); 
    } 
    

    HTML解析工具,HtmlParser实现类 HtmlParserImpl.java 
    package com.jeecms.common.crawler.util; 
    
    import java.io.BufferedReader; 
    import java.io.File; 
    import java.io.FileInputStream; 
    import java.io.IOException; 
    import java.io.InputStreamReader; 
    import java.net.URISyntaxException; 
    import java.util.ArrayList; 
    import java.util.HashMap; 
    import java.util.Iterator; 
    import java.util.List; 
    import java.util.Map; 
    import java.util.regex.Matcher; 
    import java.util.regex.Pattern; 
    
    import org.apache.commons.lang.StringUtils; 
    import org.htmlparser.Node; 
    import org.htmlparser.NodeFilter; 
    import org.htmlparser.Parser; 
    import org.htmlparser.filters.HasAttributeFilter; 
    import org.htmlparser.filters.NodeClassFilter; 
    import org.htmlparser.filters.TagNameFilter; 
    import org.htmlparser.nodes.RemarkNode; 
    import org.htmlparser.util.NodeList; 
    import org.htmlparser.util.ParserException; 
    
    import com.jeecms.cms.entity.assist.CmsAcquisition; 
    import com.jeecms.common.crawler.ParamBean; 
    /** 
     * HTML解析工具,HtmlParser实现类 
     * @author javacoo 
     * @since 2011-10-31 
     */ 
    public class HtmlParserImpl implements ParseHtmlTool{ 
    /**连接集合标志*/ 
        private static String LINK_KEY = "linkKey"; 
        /**标题集合标志*/ 
    private static String TITLE_KEY = "titleKey"; 
    /**单标签标志*/ 
    private static String SINGLE_TAG = "singleTag"; 
    /**连接正则表达式*/ 
    private static String LINK_REGX = "(.*?)"; 
    /**正则表达式对象*/ 
    private Pattern pt = Pattern.compile(LINK_REGX); 
    /**采集参数bean*/ 
    private ParamBean paramBean; 
    
    public HtmlParserImpl(CmsAcquisition acqu){ 
    parseRequestParam(acqu); 
    } 
    
    /** 
     * 取得标题集合 
     * @param orginHtml 原始HTML 
     * @return 标题集合 
     */ 
    public List getTitleList(String orginHtml) { 
    orginHtml = getHtmlByFilter(paramBean.getLinksetStartMap(), paramBean.getLinksetEndMap(),orginHtml); 
    if (StringUtils.isNotEmpty(orginHtml)) { 
    return getUrlOrTitleListByType(orginHtml,TITLE_KEY); 
    } 
    return null; 
    } 
    
    /** 
     * 取得连接集合 
     * @param orginHtml 原始HTML 
     * @return 连接集合 
     */ 
    public List getUrlList(String orginHtml) { 
    orginHtml = getHtmlByFilter(paramBean.getLinksetStartMap(), paramBean.getLinksetEndMap(),orginHtml); 
    if (StringUtils.isNotEmpty(orginHtml)) { 
    return getUrlOrTitleListByType(orginHtml,LINK_KEY); 
    } 
    return null; 
    } 
    /** 
         * 取得指定区域的HTML内容 
         * @param orginHtml 原始HTML 
         * @return 指定区域的HTML内容 
         * @throws ParserException 
         */ 
    public String getHtml(String orginHtml) { 
    orginHtml = getHtmlByFilter(paramBean.getContentStartMap(), paramBean.getContentEndMap(),orginHtml); 
    return orginHtml; 
    } 
    /** 
     * 取得连接标题Map 
     * @param orginHtml 原始HTML 
     * @return 连接标题Map 
     */ 
    public List> getUrlAndTitleMap(String orginHtml){ 
    return getUrlAandTitleMap(orginHtml); 
    } 
    /** 
     * 解析采集参数,并封装到ParamBean 
     * @param acqu 原始采集参数 
     * @return 采集参数封装bean 
     */ 
    private void parseRequestParam(CmsAcquisition acqu){ 
    paramBean = new ParamBean(); 
    if(!StringUtils.isEmpty(acqu.getLinksetStart())){ 
    paramBean.setLinksetStartMap(populateParamMap(acqu.getLinksetStart())); 
    } 
    if(!StringUtils.isEmpty(acqu.getLinksetEnd())){ 
    paramBean.setLinksetEndMap(populateParamMap(acqu.getLinksetEnd())); 
    } 
    if(!StringUtils.isEmpty(acqu.getContentStart())){ 
    paramBean.setContentStartMap(populateParamMap(acqu.getContentStart())); 
    } 
    if(!StringUtils.isEmpty(acqu.getContentEnd())){ 
    paramBean.setContentEndMap(populateParamMap(acqu.getContentEnd())); 
    } 
    } 
    /** 
     * 得到连接标题MAP 
     * @param html html内容 
     * @return 连接或者标题集合 
     */ 
    private List> getUrlAandTitleMap(String html) { 
    html = getHtmlByFilter(paramBean.getLinksetStartMap(), paramBean.getLinksetEndMap(),html); 
    List> resultMapList = new ArrayList>(); 
    Map resultMap = null; 
    Matcher m = pt.matcher(html); 
    while (m.find()) { 
    if(StringUtils.isNotEmpty(m.group(1)) && StringUtils.isNotEmpty(m.group(2))){ 
    resultMap = new HashMap(); 
    resultMap.put(LINK_KEY, m.group(1)); 
    resultMap.put(TITLE_KEY, m.group(2)); 
    resultMapList.add(resultMap); 
    } 
    } 
    return resultMapList; 
    } 
    /** 
     * 得到地址集 
     * @param html html内容 
     * @param type 1 :取得连接集合,2:取得标题集合 
     * @return 连接或者标题集合 
     */ 
    private List getUrlOrTitleListByType(String html, String type) { 
    List resultList = new ArrayList(); 
    Matcher m = pt.matcher(html); 
    String result = ""; 
    int pos = 1; 
    if(TITLE_KEY.equals(type)){ 
    pos = 2; 
    } 
    while (m.find()) { 
    result = m.group(pos); 
    resultList.add(result); 
    } 
    return resultList; 
    } 
    /** 
         * 取得指定区域的HTML内容 
         * @param tagMap 标签MAP 
         * @param removeTagMap 要过滤的标签MAP 
         * @param orginHtml 原始HTML 
         * @return 指定区域的HTML内容 
         * @throws ParserException 
         */ 
    private String getHtmlByFilter(Map tagMap, 
    Map removeTagMap, String orginHtml) { 
    try { 
    Parser parser = new Parser(); 
    parser.setInputHTML(orginHtml); 
    // 第一步取得指定属性/标签内容 
    String tempKey = null; 
    String tempValue = null; 
    String[] tempValueArr = null; 
    StringBuilder sb = new StringBuilder(); 
    NodeFilter filter = null; 
    for(Iterator it = tagMap.keySet().iterator(); it.hasNext();){ 
    tempKey = it.next(); 
    tempValue = tagMap.get(tempKey); 
    if(tempValue.contains("|")){ 
    tempValueArr = tempValue.split("\\|"); 
    }else{ 
    tempValueArr = new String[]{tempValue}; 
    } 
    for(String value : tempValueArr){ 
    filter = populateFilter(tempKey,value); 
    appendHtmlByFilter(parser, filter, sb); 
    } 
    } 
    // 第二步过滤指定属性/标签内容 
    String contentHtml = sb.toString(); 
    for (Iterator it = removeTagMap.keySet().iterator(); it 
    .hasNext();) { 
    tempKey = it.next(); 
    tempValue = removeTagMap.get(tempKey); 
    if(tempValue.contains("|")){ 
    tempValueArr = tempValue.split("\\|"); 
    }else{ 
    tempValueArr = new String[]{tempValue}; 
    } 
    for(String value : tempValueArr){ 
    filter = populateFilter(tempKey,value); 
    contentHtml = removeHtmlByFilter(parser, filter, contentHtml); 
    } 
    } 
    //第三步过滤注释 
    filter = new NodeClassFilter(RemarkNode.class); 
    contentHtml = removeHtmlByFilter(parser, filter, contentHtml); 
    System.out.println("=================================结果======================================="); 
    System.out.println(contentHtml); 
    return contentHtml; 
    } catch (ParserException e) { 
    // TODO Auto-generated catch block 
    e.printStackTrace(); 
    } 
    return ""; 
    } 
    
    /** 
     * 解析并组装采集参数,支持标签属性/值形式和标签名称形式,可混合使用 
     * 
  • 约定采集参数格式如下
  • *
  • 1,标签属性/值形式,如:class=articleList|tips,id=fxwb|fxMSN|fxMSN
  • *
  • 2,标签名称形式,如:div,p,span
  • *
  • 3,混合形式,如:class=articleList|tips,id=fxwb|fxMSN|fxMSN,div,p,span
  • * @param paramStr 参数字符串 */ private Map populateParamMap(String paramStr) { Map paramMap = new HashMap(); String[] paramStrArr = paramStr.split(","); String[] tempStrArr = null; StringBuilder sb = new StringBuilder(); for(String temp : paramStrArr){ if(temp.contains("=")){ tempStrArr = temp.split("="); paramMap.put(tempStrArr[0], tempStrArr[1]); }else{ if(StringUtils.isNotEmpty(temp)){ sb.append(temp).append("|"); } } } if(StringUtils.isNotEmpty(sb.toString())){ paramMap.put(SINGLE_TAG, sb.substring(0, sb.length() - 1)); } return paramMap; } /** * 组装过滤器 * @param key 键 * @param value 值 * @return 过滤器 */ private NodeFilter populateFilter(String key,String value) { NodeFilter filter; if(SINGLE_TAG.equals(key)){ filter = new TagNameFilter(value); }else{ filter = new HasAttributeFilter(key,value); } return filter; } /** * 过滤指定属性标签HTML * @param parser 解析器 * @param filter 属性过滤器 * @param orginHtml 原始HTML * @return 过滤后HTML * @throws ParserException */ private String removeHtmlByFilter(Parser parser, NodeFilter filter,String orginHtml) throws ParserException { parser.setInputHTML(orginHtml); NodeList nodes = parser.extractAllNodesThatMatch(filter); for (int i = 0; i < nodes.size(); i++) { Node textnode = (Node) nodes.elementAt(i); orginHtml = StringUtils.remove(orginHtml, textnode.toHtml()); } return orginHtml; } /** * 取得所有指定属性/标签的HTML * @param parser 解析器 * @param filter 过滤器 * @param sb * @throws ParserException */ private void appendHtmlByFilter(Parser parser, NodeFilter filter, StringBuilder sb) throws ParserException { NodeList nodes = parser.extractAllNodesThatMatch(filter); for (int i = 0; i < nodes.size(); i++) { Node textnode = (Node) nodes.elementAt(i); sb.append(textnode.toHtml()); } } /** * 解析并组装采集参数,支持标签属性/值形式和标签名称形式,可混合使用 *
  • 约定采集参数格式如下
  • *
  • 1,标签属性/值形式,如:class=articleList|tips,id=fxwb|fxMSN|fxMSN
  • *
  • 2,标签名称形式,如:div,p,span
  • *
  • 3,混合形式,如:class=articleList|tips,id=fxwb|fxMSN|fxMSN,div,p,span
  • * @param paramMap 参数map * @param str 参数字符串 */ private void populateParamMap(Map paramMap,String paramStr) { String[] paramStrArr = paramStr.split(","); String[] tempStrArr = null; StringBuilder sb = new StringBuilder(); for(String temp : paramStrArr){ if(temp.contains("=")){ tempStrArr = temp.split("="); paramMap.put(tempStrArr[0], tempStrArr[1]); }else{ if(StringUtils.isNotEmpty(temp)){ sb.append(temp).append("|"); } } } if(StringUtils.isNotEmpty(sb.toString())){ paramMap.put(SINGLE_TAG, sb.substring(0, sb.length() - 1)); } } /** * 测试方法-打开文件并返回内容 * @param szFileName 文件绝对地址 * @param charset 字符集 * @return 内容 */ public static String openFile(String szFileName,String charset) { try { BufferedReader bis = new BufferedReader(new InputStreamReader( new FileInputStream(new File(szFileName)), charset)); StringBuilder szContent = new StringBuilder(); String szTemp; while ((szTemp = bis.readLine()) != null) { szContent.append(szTemp).append("\n"); } bis.close(); return szContent.toString(); } catch (Exception e) { return ""; } } /** * 测试取得连接地址和标题 * @throws ParserException */ public void testFetchLinkAndTitle() throws ParserException{ String html = openFile("F:\\4.htm","UTF-8"); String result = ""; Map map = new HashMap(); map.put("class", "m_list"); Map notMap = new HashMap(); //notMap.put("class", "atc_ic_f"); result = getHtmlByFilter(map,notMap,html); System.out.println("=============================result============================"); System.out.println(result); System.out.println("=========================================================="); Pattern pt = Pattern.compile("(.*?)"); Matcher m = pt.matcher(result); String link = null; String title = null; while (m.find()) { link = m.group(1); title = m.group(2); if (StringUtils.isNotEmpty(link)) { System.out.println("url : " + link); System.out.println("title : " + title); } } } /** * 测试取得内容 * @throws ParserException */ public void testFetchContent() throws ParserException{ String html = openFile("F:\\6.shtml","GB2312"); Map map = new HashMap(); map.put("id", "artibody"); Map notMap = new HashMap(); notMap.put(SINGLE_TAG, "style|script"); notMap.put("type", "text/javascript"); notMap.put("class", "icon_fx|blkComment otherContent_01"); notMap.put("style", "text-align: right;padding-right:10px;|margin-top:6px;|font-size: 12px ! important;|font-size:12px"); notMap.put("id", "fxwb|fxMSN|fxMSN|comment_t_show_top"); getHtmlByFilter(map,notMap,html); } /** * 测试解析参数 */ public void testParseParam(){ Map map = new HashMap(); populateParamMap(map,"class=articleList|tips,p,div"); String tempKey = null; String tempValue = null; String[] tempValueArr = null; for (Iterator it = map.keySet().iterator(); it.hasNext();) { tempKey = it.next(); tempValue = map.get(tempKey); if(tempValue.contains("|")){ tempValueArr = tempValue.split("\\|"); }else{ tempValueArr = new String[]{tempValue}; } for(String value : tempValueArr){ System.out.println("tempKey:" + tempKey); System.out.println("value:" + value); } } } /** * 测试过滤标签 * @throws ParserException */ public void testRemarkFilter() throws ParserException{ String html = openFile("F:\\6.shtml","GB2312"); System.out.println("=========================过滤注释前HTML=================================="); System.out.println(html); NodeFilter filter = new NodeClassFilter(RemarkNode.class); html = removeHtmlByFilter(new Parser(), filter, html); System.out.println("=========================过滤注释后HTML=================================="); System.out.println(html); } public static void main(String[] args) throws ParserException, URISyntaxException, IOException { HtmlParserImpl parseHtmlTool = new HtmlParserImpl(new CmsAcquisition()); //parseHtmlTool.testParseParam(); //parseHtmlTool.testFetchLinkAndTitle(); //parseHtmlTool.testFetchContent(); //parseHtmlTool.testRemarkFilter(); } }

    采集参数封装bean ParamBean.java 
    package com.jeecms.common.crawler; 
    
    import java.util.HashMap; 
    import java.util.Map; 
    /** 
     * 采集参数封装bean 
     * @author javacoo 
     * @since 2011-10-31 
     */ 
    public class ParamBean { 
    /**待采集连接区域属性MAP*/ 
    private Map linksetStartMap = new HashMap(); 
    /**待采集连接区域过滤属性MAP*/ 
    private Map linksetEndMap = new HashMap(); 
    /**待采集内容区域属性MAP*/ 
    private Map contentStartMap = new HashMap(); 
    /**待采集内容区域过滤属性MAP*/ 
    private Map contentEndMap = new HashMap(); 
    
    public Map getLinksetStartMap() { 
    return linksetStartMap; 
    } 
    public void setLinksetStartMap(Map linksetStartMap) { 
    this.linksetStartMap = linksetStartMap; 
    } 
    public Map getLinksetEndMap() { 
    return linksetEndMap; 
    } 
    public void setLinksetEndMap(Map linksetEndMap) { 
    this.linksetEndMap = linksetEndMap; 
    } 
    public Map getContentStartMap() { 
    return contentStartMap; 
    } 
    public void setContentStartMap(Map contentStartMap) { 
    this.contentStartMap = contentStartMap; 
    } 
    public Map getContentEndMap() { 
    return contentEndMap; 
    } 
    public void setContentEndMap(Map contentEndMap) { 
    this.contentEndMap = contentEndMap; 
    } 
    
    
    } 
    

    队列 Queue.java 
    package com.jeecms.common.crawler; 
    
    import java.util.LinkedList; 
    /** 
     * 队列 
     * @author javacoo 
     * @since 2011-11-01 
     * @param  
     */ 
    public class Queue { 
    private LinkedList queue = new LinkedList(); 
    /** 
     * 入队列 
     * @param t 
     */ 
    public void enQueue(T t){ 
    queue.addLast(t); 
    } 
    /** 
     * 出队列 
     * @return t 
     */ 
    public T deQueue(){ 
    return queue.removeFirst(); 
    } 
    /** 
     * 判断队列是否为空 
     * @return 
     */ 
    public boolean isEmpty(){ 
    return queue.isEmpty(); 
    } 
    /** 
     * 判断队列是否含有t 
     * @param t 
     * @return 
     */ 
    public boolean contains(T t){ 
    return queue.contains(t); 
    } 
    /** 
     * 取得队列大小 
     * @return 
     */ 
    public int getSize(){ 
    return queue.size(); 
    } 
    
    } 
    

    URL队列 UrlQueue.java 
    package com.jeecms.common.crawler; 
    
    import java.util.HashSet; 
    import java.util.Map; 
    import java.util.Set; 
    
    import org.springframework.util.CollectionUtils; 
    
    /** 
     * URL队列 
     * @author javacoo 
     * @since 2011-11-01 
     * @param > 
     */ 
    public class UrlQueue { 
    /**已访问URL集合*/ 
    private Set> visitedUrl = new HashSet>(); 
    /**待访问URL集合*/ 
    private Queue> unVisitedUrl = new Queue>(); 
    
    /** 
     * 获得 URL 队列 
     * @return 
     */ 
    public Queue> getUnVisitedUrl() { 
    return unVisitedUrl; 
    } 
    /** 
     * 未访问的 URL 出队列 
     * @return 
     */ 
    public Map unVisitedUrlDeQueue() { 
    return unVisitedUrl.deQueue(); 
    } 
    /** 
     * 保证每个 URL 只被访问一次 
     * @param url 
     */ 
    public void addUnVisitedUrl(Map urlMap) { 
    if (!CollectionUtils.isEmpty(urlMap) && !unVisitedUrl.contains(urlMap) && !visitedUrl.contains(urlMap)){ 
    unVisitedUrl.enQueue(urlMap); 
    } 
    } 
    /** 
     * 判断是否为空 
     * @return 
     */ 
    public boolean isEmpty(){ 
    return unVisitedUrl.isEmpty(); 
    } 
    /** 
     * 未访问URL数量 
     * @return 
     */ 
    public int getUnVisitedUrlNum(){ 
    return unVisitedUrl.getSize(); 
    } 
    /** 
     * 添加到访问过的URL队列中 
     * @param urlMap 
     */ 
    public void addVisitedUrl(Map urlMap){ 
    visitedUrl.add(urlMap); 
    } 
    /** 
     * 删除访问过的URL 
     * @param urlMap 
     */ 
    public void removeVisitedUrl(Map urlMap){ 
    visitedUrl.remove(urlMap); 
    } 
    /** 
     * 已访问URL数量 
     * @return 
     */ 
    public int getVisitedUrlNum(){ 
    return visitedUrl.size(); 
    } 
    
    } 
    

    接下来是XML配置 
    ==============================定时任务模块XML配置==================================== 
    dao配置 
     
    manage配置 
     
    SERVICE配置 
     
     
    

    接下来是messages_zh_CN.properties 添加了常量 
    ==============================messages_zh_CN.properties==================================== 
    
    cmsScheduler.acquisition.function=\u91C7\u96C6\u4EFB\u52A1\u7BA1\u7406 
    cmsScheduler.name=\u4EFB\u52A1\u540D\u79F0 
    cmsScheduler.expression=\u8BA1\u5212\u8868\u8FBE\u5F0F 
    cmsScheduler.expression.help=\u53C2\u6570\u4EE5\u9017\u53F7\u5206\u9694,*\u53F7\u8868\u793A\u65E0\u503C,\u51716\u4F4D\:\u6BCF\u4E2A\u6708\u7684\u7B2C\u51E0\u5468,\u6BCF\u5468\u7684\u7B2C\u51E0\u5929,\u5929(\u51E0\u53F7),\u5C0F\u65F6(24\u5C0F\u65F6\u5236),\u5206\u949F,\u79D2\u3002\u5982\uFF1A1,6,4,15,20,30       \u8868\u793A \u4ECE\u4ECA\u5929\u768415\:20\:30\u5F00\u59CB\uFF0C\u6BCF\u9694\u4E00\u4E2A\u6708\u6267\u884C\u4E00\u6B21,\u5373\u4E0B\u6B21\u6267\u884C\u65F6\u95F4\u662F  \u4E0B\u4E2A\u6708\u7684\u7B2C\u4E00\u5468\u7684\u7B2C6\u5929\u768415\:20\:30 
    cmsScheduler.associate=\u5173\u8054\u4EFB\u52A1 
    cmsScheduler.status.0=\u505C\u6B62 
    cmsScheduler.status.1=\u8FD0\u884C 
    cmsScheduler.opt.start=\u5F00\u59CB 
    cmsScheduler.opt.end=\u505C\u6B62 
    cmsScheduler.status=\u72B6\u6001 
    cmsScheduler.startTime=\u5F00\u59CB\u65F6\u95F4 
    cmsScheduler.endTime=\u7ED3\u675F\u65F6\u95F4 
    cmsScheduler.log.delete=\u5220\u9664\u4EFB\u52A1 
    

    ==============================模板==================================== 
    scheduler/add.html 
     
     
     
     
     
    <#include "/jeecms_sys/head.html"/> 
     
     
     
     
    
    <@s.m "global.position"/>: <@s.m "cmsScheduler.acquisition.function"/> - <@s.m "global.add"/>
    " onclick="this.form.action='v_listBy.do';"/>
    <@p.form id="jvForm" action="o_save.do" labelWidth="12"> <@p.text colspan="1" width="50" label="cmsScheduler.name" name="name" required="true" class="required" maxlength="50"/> <@p.td colspan="1" width="50" label="cmsScheduler.associate" required="true"> <@p.select list=schedulerTaskList name="associateId" listKey="id" listValue="name"/> <@p.tr/> <@p.textarea colspan="2" label="cmsScheduler.expression" name="expression" help="cmsScheduler.expression.help" helpPosition="3" rows="1" cols="70" required="true" class="required" /><@p.tr/> <@p.td colspan="2"><@p.submit code="global.submit"/>   <@p.reset code="global.reset"/>
    scheduler/edit.html <#include "/jeecms_sys/head.html"/>
    <@s.m "global.position"/>: <@s.m "cmsScheduler.acquisition.function"/> - <@s.m "global.edit"/>
    " onclick="history.back();"/>
    <@p.form id="jvForm" action="o_update.do" labelWidth="12"> <@p.text colspan="1" width="50" label="cmsScheduler.name" name="name" value=cmsScheduler.name required="true" class="required" maxlength="50"/> <@p.td colspan="1" width="50" label="cmsScheduler.associate" required="true"> <@p.select list=schedulerTaskList name="associateId" value=cmsScheduler.associateId listKey="id" listValue="name"/> <@p.tr/> <@p.textarea colspan="2" label="cmsScheduler.expression" name="expression" rows="1" help="cmsScheduler.expression.help" helpPosition="3" value=cmsScheduler.expression required="true" class="required" cols="70" /><@p.tr/> <@p.td colspan="2"> <@p.hidden name="id" value=cmsScheduler.id/> <@p.submit code="global.submit"/>   <@p.reset code="global.reset"/>
    scheduler/list.html <#include "/jeecms_sys/head.html"/>
    <@s.m "global.position"/>: <@s.m "cmsScheduler.acquisition.function"/> - <@s.m "global.list"/>
    " onclick="this.form.action='v_add.do';"/>
    <@p.table value=list;cmsScheduler,i,has_next><#rt/> <@p.column title="" width="20"> <#t/> <#t/> <@p.column title="ID">${cmsScheduler.id}<#t/> <@p.column code="cmsScheduler.name">${cmsScheduler.name}<#t/> <@p.column code="cmsScheduler.status" align="center"><#if cmsScheduler.status==1><@s.m "cmsScheduler.status."+cmsScheduler.status/><#if cmsScheduler.status==1><#t/> <@p.column code="cmsScheduler.startTime" align="center">${(cmsScheduler.startTime?string('yyyy-MM-dd HH:mm:ss'))!}<#t/> <@p.column code="cmsScheduler.endTime" align="center">${(cmsScheduler.endTime?string('yyyy-MM-dd HH:mm:ss'))!}<#t/> <@p.column code="global.operate" align="center"> <#if cmsScheduler.status==0> <@s.m "cmsScheduler.opt.start"/> | <#rt/> <#else> <@s.m "cmsScheduler.opt.start"/> | <#rt/> <#if cmsScheduler.status==1 || cmsScheduler.status==1> <@s.m "cmsScheduler.opt.end"/> | <#rt/> <#else> <@s.m "cmsScheduler.opt.end"/> | <#rt/> <@s.m "global.edit"/> | <#rt/> ')) {return false;}"><@s.m "global.delete"/><#t/> <#t/>
    " onclick="optDelete();"/>
    <#include "/common/alert_message.html"/>

    generate_left.html 有修改 
    加上 
    <@cms_perm url="/scheduler/v_listBy.do"> 
    
  • <@s.m "cmsScheduler.acquisition.function"/>
  • 你可能感兴趣的:(定时任务管理)