自己封装的一个Solr Data Import Request Handler Scheduler

经过将近一天的努力,终于搞定了Solr的 Data Import Request Handler Scheduler。

Scheduler主要解决两个问题:

1.定时增量更新索引。

2.定时重做索引。

经过测试,Scheduler已经可以实现完全基于配置,无需开发功能,无需人工干预的情况下实现以上两个功能(结合 Solr 的 Data Import Request Handler前提下)。

为了方便以后使用,我将代码放到http://code.google.com上,地址是:http://code.google.com/p/solr-dataimport-scheduler/ 

这里贴出一下主要的代码备忘:

SolrDataImportProperties.java 配置文件读取:

View Code

BaseTimerTask.java  TimerTask基类,封装了一些基本的属性读取、请求发送方法:

View Code
  1 package org.apache.solr.handler.dataimport.scheduler;

  2 

  3 import java.io.IOException;

  4 import java.net.HttpURLConnection;

  5 import java.net.MalformedURLException;

  6 import java.net.URL;

  7 import java.text.DateFormat;

  8 import java.text.ParseException;

  9 import java.text.SimpleDateFormat;

 10 import java.util.Date;

 11 import java.util.Timer;

 12 import java.util.TimerTask;

 13 

 14 import org.slf4j.Logger;

 15 import org.slf4j.LoggerFactory;

 16 

 17 public abstract class BaseTimerTask extends TimerTask {

 18     protected String syncEnabled;

 19     protected String[] syncCores;

 20     protected String server;

 21     protected String port;

 22     protected String webapp;

 23     protected String params;

 24     protected String interval;

 25     protected String cores;

 26     protected SolrDataImportProperties p;

 27     protected boolean singleCore;

 28 

 29     protected String reBuildIndexParams;

 30     protected String reBuildIndexBeginTime;

 31     protected String reBuildIndexInterval;

 32 

 33     protected static final Logger logger = LoggerFactory

 34             .getLogger(BaseTimerTask.class);

 35 

 36     public BaseTimerTask(String webAppName, Timer t) throws Exception {

 37         // load properties from global dataimport.properties

 38         p = new SolrDataImportProperties();

 39         reloadParams();

 40         fixParams(webAppName);

 41 

 42         if (!syncEnabled.equals("1"))

 43             throw new Exception("Schedule disabled");

 44 

 45         if (syncCores == null

 46                 || (syncCores.length == 1 && syncCores[0].isEmpty())) {

 47             singleCore = true;

 48             logger.info("<index update process> Single core identified in dataimport.properties");

 49         } else {

 50             singleCore = false;

 51             logger.info("<index update process> Multiple cores identified in dataimport.properties. Sync active for: "

 52                     + cores);

 53         }

 54     }

 55 

 56     protected void reloadParams() {

 57         p.loadProperties(true);

 58         syncEnabled = p.getProperty(SolrDataImportProperties.SYNC_ENABLED);

 59         cores = p.getProperty(SolrDataImportProperties.SYNC_CORES);

 60         server = p.getProperty(SolrDataImportProperties.SERVER);

 61         port = p.getProperty(SolrDataImportProperties.PORT);

 62         webapp = p.getProperty(SolrDataImportProperties.WEBAPP);

 63         params = p.getProperty(SolrDataImportProperties.PARAMS);

 64         interval = p.getProperty(SolrDataImportProperties.INTERVAL);

 65         syncCores = cores != null ? cores.split(",") : null;

 66 

 67         reBuildIndexParams = p

 68                 .getProperty(SolrDataImportProperties.REBUILDINDEXPARAMS);

 69         reBuildIndexBeginTime = p

 70                 .getProperty(SolrDataImportProperties.REBUILDINDEXBEGINTIME);

 71         reBuildIndexInterval = p

 72                 .getProperty(SolrDataImportProperties.REBUILDINDEXINTERVAL);

 73 

 74     }

 75 

 76     protected void fixParams(String webAppName) {

 77         if (server == null || server.isEmpty())

 78             server = "localhost";

 79         if (port == null || port.isEmpty())

 80             port = "8080";

 81         if (webapp == null || webapp.isEmpty())

 82             webapp = webAppName;

 83         if (interval == null || interval.isEmpty() || getIntervalInt() <= 0)

 84             interval = "30";

 85         if (reBuildIndexBeginTime == null || reBuildIndexBeginTime.isEmpty())

 86             interval = "00:00:00";

 87         if (reBuildIndexInterval == null || reBuildIndexInterval.isEmpty()

 88                 || getReBuildIndexIntervalInt() <= 0)

 89             reBuildIndexInterval = "0";

 90     }

 91 

 92     protected void prepUrlSendHttpPost(String params) {

 93         String coreUrl = "http://" + server + ":" + port + "/" + webapp

 94                 + params;

 95         sendHttpPost(coreUrl, null);

 96     }

 97 

 98     protected void prepUrlSendHttpPost(String coreName, String params) {

 99         String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/"

100                 + coreName + params;

101         sendHttpPost(coreUrl, coreName);

102     }

103 

104     protected void sendHttpPost(String completeUrl, String coreName) {

105         DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS");

106         Date startTime = new Date();

107 

108         // prepare the core var

109         String core = coreName == null ? "" : "[" + coreName + "] ";

110 

111         logger.info(core

112                 + "<index update process> Process started at .............. "

113                 + df.format(startTime));

114 

115         try {

116 

117             URL url = new URL(completeUrl);

118             HttpURLConnection conn = (HttpURLConnection) url.openConnection();

119 

120             conn.setRequestMethod("POST");

121             conn.setRequestProperty("type", "submit");

122             conn.setDoOutput(true);

123 

124             // Send HTTP POST

125             conn.connect();

126 

127             logger.info(core + "<index update process> Full URL\t\t\t\t"

128                     + conn.getURL());

129             logger.info(core + "<index update process> Response message\t\t\t"

130                     + conn.getResponseMessage());

131             logger.info(core + "<index update process> Response code\t\t\t"

132                     + conn.getResponseCode());

133 

134             // listen for change in properties file if an error occurs

135             if (conn.getResponseCode() != 200) {

136                 reloadParams();

137             }

138 

139             conn.disconnect();

140             logger.info(core

141                     + "<index update process> Disconnected from server\t\t"

142                     + server);

143             Date endTime = new Date();

144             logger.info(core

145                     + "<index update process> Process ended at ................ "

146                     + df.format(endTime));

147         } catch (MalformedURLException mue) {

148             logger.error("Failed to assemble URL for HTTP POST", mue);

149         } catch (IOException ioe) {

150             logger.error(

151                     "Failed to connect to the specified URL while trying to send HTTP POST",

152                     ioe);

153         } catch (Exception e) {

154             logger.error("Failed to send HTTP POST", e);

155         }

156     }

157 

158     public int getIntervalInt() {

159         try {

160             return Integer.parseInt(interval);

161         } catch (NumberFormatException e) {

162             logger.warn(

163                     "Unable to convert 'interval' to number. Using default value (30) instead",

164                     e);

165             return 30; // return default in case of error

166         }

167     }

168 

169     public int getReBuildIndexIntervalInt() {

170         try {

171             return Integer.parseInt(reBuildIndexInterval);

172         } catch (NumberFormatException e) {

173             logger.info(

174                     "Unable to convert 'reBuildIndexInterval' to number. do't rebuild index.",

175                     e);

176             return 0; // return default in case of error

177         }

178     }

179 

180     public Date getReBuildIndexBeginTime() {

181         Date beginDate = null;

182         try {

183             SimpleDateFormat sdfDate = new SimpleDateFormat("yyyy-MM-dd");

184             String dateStr = sdfDate.format(new Date());

185             beginDate = sdfDate.parse(dateStr);

186             if (reBuildIndexBeginTime == null

187                     || reBuildIndexBeginTime.isEmpty()) {

188                 return beginDate;

189             }

190             if (reBuildIndexBeginTime.matches("\\d{2}:\\d{2}:\\d{2}")) {

191                 SimpleDateFormat sdf = new SimpleDateFormat(

192                         "yyyy-MM-dd HH:mm:ss");

193                 beginDate = sdf.parse(dateStr + " " + reBuildIndexBeginTime);

194             } else if (reBuildIndexBeginTime

195                     .matches("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}")) {

196                 SimpleDateFormat sdf = new SimpleDateFormat(

197                         "yyyy-MM-dd HH:mm:ss");

198                 beginDate = sdf.parse(reBuildIndexBeginTime);

199             }

200             return beginDate;

201         } catch (ParseException e) {

202             logger.warn(

203                     "Unable to convert 'reBuildIndexBeginTime' to date. use now time.",

204                     e);

205             return beginDate;

206         }

207     }

208 

209 }

DeltaImportHTTPPostScheduler.java 增量索引更新任务计划:

View Code
 1 package org.apache.solr.handler.dataimport.scheduler;

 2 

 3 import java.util.Timer;

 4 

 5 import org.slf4j.Logger;

 6 import org.slf4j.LoggerFactory;

 7 

 8 /**

 9  * 增量更新索引的任务

10  * @author zhangliang

11  *

12  */

13 public class DeltaImportHTTPPostScheduler extends BaseTimerTask {

14 

15     private static final Logger logger = LoggerFactory

16             .getLogger(DeltaImportHTTPPostScheduler.class);

17 

18     public DeltaImportHTTPPostScheduler(String webAppName, Timer t)

19             throws Exception {

20         super(webAppName, t);

21         logger.info("<index update process> DeltaImportHTTPPostScheduler init");

22     }

23 

24     public void run() {

25         try {

26             // check mandatory params

27             if (server.isEmpty() || webapp.isEmpty() || params == null

28                     || params.isEmpty()) {

29                 logger.warn("<index update process> Insuficient info provided for data import");

30                 logger.info("<index update process> Reloading global dataimport.properties");

31                 reloadParams();

32                 // single-core

33             } else if (singleCore) {

34                 prepUrlSendHttpPost(params);

35 

36                 // multi-core

37             } else if (syncCores.length == 0

38                     || (syncCores.length == 1 && syncCores[0].isEmpty())) {

39                 logger.warn("<index update process> No cores scheduled for data import");

40                 logger.info("<index update process> Reloading global dataimport.properties");

41                 reloadParams();

42 

43             } else {

44                 for (String core : syncCores) {

45                     prepUrlSendHttpPost(core, params);

46                 }

47             }

48         } catch (Exception e) {

49             logger.error("Failed to prepare for sendHttpPost", e);

50             reloadParams();

51         }

52     }

53 }

FullImportHTTPPostScheduler.java 重做索引任务计划:

View Code
 1 package org.apache.solr.handler.dataimport.scheduler;

 2 

 3 import java.util.Timer;

 4 

 5 import org.slf4j.Logger;

 6 import org.slf4j.LoggerFactory;

 7 

 8 /**

 9  * 重做索引的任务

10  * @author zhangliang

11  *

12  */

13 public class FullImportHTTPPostScheduler extends BaseTimerTask {

14 

15     private static final Logger logger = LoggerFactory

16             .getLogger(FullImportHTTPPostScheduler.class);

17 

18     public FullImportHTTPPostScheduler(String webAppName, Timer t)

19             throws Exception {

20         super(webAppName, t);

21         logger.info("<index update process> DeltaImportHTTPPostScheduler init");

22     }

23 

24     public void run() {

25         try {

26             // check mandatory params

27             if (server.isEmpty() || webapp.isEmpty()

28                     || reBuildIndexParams == null

29                     || reBuildIndexParams.isEmpty()) {

30                 logger.warn("<index update process> Insuficient info provided for data import, reBuildIndexParams is null");

31                 logger.info("<index update process> Reloading global dataimport.properties");

32                 reloadParams();

33                 // single-core

34             } else if (singleCore) {

35                 prepUrlSendHttpPost(reBuildIndexParams);

36 

37                 // multi-core

38             } else if (syncCores.length == 0

39                     || (syncCores.length == 1 && syncCores[0].isEmpty())) {

40                 logger.warn("<index update process> No cores scheduled for data import");

41                 logger.info("<index update process> Reloading global dataimport.properties");

42                 reloadParams();

43 

44             } else {

45                 for (String core : syncCores) {

46                     prepUrlSendHttpPost(core, reBuildIndexParams);

47                 }

48             }

49         } catch (Exception e) {

50             logger.error("Failed to prepare for sendHttpPost", e);

51             reloadParams();

52         }

53     }

54 }

ApplicationListener.java 调用任务计划的Listener:

View Code
  1 package org.apache.solr.handler.dataimport.scheduler;

  2 

  3 import java.util.Calendar;

  4 import java.util.Date;

  5 import java.util.Timer;

  6 

  7 import javax.servlet.ServletContext;

  8 import javax.servlet.ServletContextEvent;

  9 import javax.servlet.ServletContextListener;

 10 

 11 import org.slf4j.Logger;

 12 import org.slf4j.LoggerFactory;

 13 

 14 public class ApplicationListener implements ServletContextListener {

 15 

 16     private static final Logger logger = LoggerFactory

 17             .getLogger(ApplicationListener.class);

 18 

 19     @Override

 20     public void contextDestroyed(ServletContextEvent servletContextEvent) {

 21         ServletContext servletContext = servletContextEvent.getServletContext();

 22 

 23         // get our timer from the context

 24         Timer timer = (Timer) servletContext.getAttribute("timer");

 25         Timer fullImportTimer = (Timer) servletContext

 26                 .getAttribute("fullImportTimer");

 27 

 28         // cancel all active tasks in the timers queue

 29         if (timer != null)

 30             timer.cancel();

 31         if (fullImportTimer != null)

 32             fullImportTimer.cancel();

 33 

 34         // remove the timer from the context

 35         servletContext.removeAttribute("timer");

 36         servletContext.removeAttribute("fullImportTimer");

 37 

 38     }

 39 

 40     @Override

 41     public void contextInitialized(ServletContextEvent servletContextEvent) {

 42         ServletContext servletContext = servletContextEvent.getServletContext();

 43         try {

 44             // 增量更新任务计划

 45             // create the timer and timer task objects

 46             Timer timer = new Timer();

 47             DeltaImportHTTPPostScheduler task = new DeltaImportHTTPPostScheduler(

 48                     servletContext.getServletContextName(), timer);

 49 

 50             // get our interval from HTTPPostScheduler

 51             int interval = task.getIntervalInt();

 52 

 53             // get a calendar to set the start time (first run)

 54             Calendar calendar = Calendar.getInstance();

 55 

 56             // set the first run to now + interval (to avoid fireing while the

 57             // app/server is starting)

 58             calendar.add(Calendar.MINUTE, interval);

 59             Date startTime = calendar.getTime();

 60 

 61             // schedule the task

 62             timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * interval);

 63 

 64             // save the timer in context

 65             servletContext.setAttribute("timer", timer);

 66 

 67             // 重做索引任务计划

 68             Timer fullImportTimer = new Timer();

 69             FullImportHTTPPostScheduler fullImportTask = new FullImportHTTPPostScheduler(

 70                     servletContext.getServletContextName(), fullImportTimer);

 71 

 72             int reBuildIndexInterval = fullImportTask

 73                     .getReBuildIndexIntervalInt();

 74             if (reBuildIndexInterval <= 0) {

 75                 logger.warn("Full Import Schedule disabled");

 76                 return;

 77             }

 78 

 79             Calendar fullImportCalendar = Calendar.getInstance();

 80             Date beginDate = fullImportTask.getReBuildIndexBeginTime();

 81             fullImportCalendar.setTime(beginDate);

 82             fullImportCalendar.add(Calendar.MINUTE, reBuildIndexInterval);

 83             Date fullImportStartTime = fullImportCalendar.getTime();

 84 

 85             // schedule the task

 86             fullImportTimer.scheduleAtFixedRate(fullImportTask,

 87                     fullImportStartTime, 1000 * 60 * reBuildIndexInterval);

 88 

 89             // save the timer in context

 90             servletContext.setAttribute("fullImportTimer", fullImportTimer);

 91 

 92         } catch (Exception e) {

 93             if (e.getMessage().endsWith("disabled")) {

 94                 logger.warn("Schedule disabled");

 95             } else {

 96                 logger.error("Problem initializing the scheduled task: ", e);

 97             }

 98         }

 99 

100     }

101 

102 }

 

使用说明
1.将上面的编译文件打包成 apache-solr-dataimportscheduler-1.0.jar, 然后和solr自带的 apache-solr-dataimporthandler-*.jar, apache-solr-dataimporthandler-extras-*.jar 放到solr.war的lib目录下面
2.修改solr.war中WEB-INF/web.xml, 在servlet节点前面增加:

<listener>

<listener-class>

org.apache.solr.handler.dataimport.scheduler.ApplicationListener

</listener-class>

</listener>

3.将apache-solr-dataimportscheduler-.jar 中 dataimport.properties 取出并根据实际情况修改,然后放到 solr.home/conf (不是solr.home/core/conf) 目录下面
4.重启tomcat或者jboss 即可

dataimport.properties 配置项说明

#################################################

# #

# dataimport scheduler properties #

# #

#################################################



# to sync or not to sync

# 1 - active; anything else - inactive

syncEnabled=1



# which cores to schedule

# in a multi-core environment you can decide which cores you want syncronized

# leave empty or comment it out if using single-core deployment

syncCores=core1,core2



# solr server name or IP address

# [defaults to localhost if empty]

server=localhost



# solr server port

# [defaults to 80 if empty]

port=8080



# application name/context

# [defaults to current ServletContextListener's context (app) name]

webapp=solr



# URL params [mandatory]

# remainder of URL

params=/dataimport?command=delta-import&clean=false&commit=true# schedule interval

# number of minutes between two runs

# [defaults to 30 if empty]

interval=1



# 重做索引的时间间隔,单位分钟,默认7200,即5天; 

# 为空,为0,或者注释掉:表示永不重做索引

reBuildIndexInterval=7200



# 重做索引的参数

reBuildIndexParams=/dataimport?command=full-import&clean=true&commit=true# 重做索引时间间隔的计时开始时间,第一次真正执行的时间=reBuildIndexBeginTime+reBuildIndexInterval*60*1000;

# 两种格式:2012-04-11 03:10:00 或者 03:10:00,后一种会自动补全日期部分为服务启动时的日期

reBuildIndexBeginTime=03:10:00

 

 

你可能感兴趣的:(scheduler)