Solrj是Solr搜索服务器的一个比较基础的客户端工具,可以非常方便地与Solr搜索服务器进行交互,最基本的功能就是管理Solr索引,包括添加、更新、删除和查询等。对于一些比较基础的应用,用Solj基本够用,而且你可以非常容易地通过使用Solrj的API实现与Solr搜索服务器进行交互,实现对Solr的基本管理功能。如果你的应用比较复杂,可以扩展Solrj来满足需要。
下面是一个使用Solrj的API实现与Solr服务器交互的工具类SolrPostServer,能够实现索引的添加、更新、删除和查询功能。SolrPostServer类中两个内部类是与访问MongoDB的配置和工具。
在实际应用中,对于是否进行commit,可以有两种方式:
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler"> <maxPendingDeletes>10000</maxPendingDeletes> <autoCommit> <maxDocs>20</maxDocs> <maxTime>86000</maxTime> </autoCommit> </requestHandler>上面autoCommit中的maxDocs指定的pending多少个文档后执行一次commit,而maxTime指定了多长时间间隔进行一次commit,一般这两个选项只需要配置一个即可满足需要。另外,每次commit会将最近的更新生效,但是如果一次commit操作尚未完成,又达到了下一次commit的时刻,这样做会严重影响索引的吞吐量。
实现代码如下所示:
package org.shirdrn.solr.solrj; import java.io.IOException; import java.io.Serializable; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.httpclient.HttpClient; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.solr.client.solrj.ResponseParser; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.Mongo; import com.mongodb.MongoException; /** * Solr server for indexes operations. * * @author shirdrn * @date 2011-12-20 */ public class SolrPostServer { private static final Logger LOG = Logger.getLogger(SolrPostServer.class); private CommonsHttpSolrServer server; private ResponseParser responseParser; private MongoConfig mongoConfig; private String[] collectionNames; private int maxCommitCount = 100; private boolean manualOptimize = true; private boolean manualCommit = false; private Collection<SolrInputDocument> docContainer = new ArrayList<SolrInputDocument>(); private static int totalCount = 0; public SolrPostServer(String url, HttpClient httpClient, MongoConfig mongoConfig) { try { if(httpClient==null) { server = new CommonsHttpSolrServer(url); server.setSoTimeout(500000); // socket read timeout server.setConnectionTimeout(5000); server.setDefaultMaxConnectionsPerHost(10); server.setMaxTotalConnections(100); server.setAllowCompression(true); server.setMaxRetries(1); // defaults to 0. > 1 not recommended. } else { server = new CommonsHttpSolrServer(url, httpClient); } } catch (MalformedURLException e) { e.printStackTrace(); } this.mongoConfig = mongoConfig; initialize(); } /** * Initialize the {@link CommonsHttpSolrServer}'s basic parameters. */ private void initialize() { if(responseParser!=null) { server.setParser(responseParser); } else { server.setParser(new XMLResponseParser()); } } @SuppressWarnings("unchecked") public void postUpdate() { DBCursor cursor = null; try { for (String c : collectionNames) { LOG.info("MongoDB collection name: " + c); DBCollection collection = MongoHelper.newHelper(mongoConfig).getCollection(c); DBObject q = new BasicDBObject(); cursor = collection.find(q); while(cursor.hasNext()) { try { Map<Object, Object> m = cursor.next().toMap(); if(manualCommit) { add(m, true); } else { add(m, false); } ++totalCount; LOG.info("Add fragment: _id = " + m.get("_id").toString()); } catch (IOException e) { e.printStackTrace(); } } cursor.close(); } LOG.info("Add totalCount: " + totalCount); finallyCommit(); optimize(manualOptimize); } catch (MongoException e) { e.printStackTrace(); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * Detele lucene {@link Document} by IDs. * @param strings */ public void deleteById(List<String> strings) { try { server.deleteById(strings); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * Detele lucene {@link Document} by query. * @param query */ public void deleteByQuery(String query) { try { server.deleteByQuery(query); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * Query. * @param params * @param fields * @return */ public List<Map<String, Object>> query(SolrParams params, String[] fields) { List<Map<String, Object>> results = new ArrayList<Map<String, Object>>(); try { SolrDocumentList documents = server.query(params).getResults(); Iterator<SolrDocument> iter = documents.iterator(); while(iter.hasNext()) { SolrDocument doc = iter.next(); Map<String, Object> map = new HashMap<String, Object>(); for(String field : fields) { map.put(field, doc.getFieldValue(field)); } results.add(map); } } catch (SolrServerException e) { e.printStackTrace(); } return results; } /** * When controlling the committing action at client side, finally execute committing. * @throws SolrServerException * @throws IOException */ private void finallyCommit() throws SolrServerException, IOException { if(!docContainer.isEmpty()) { server.add(docContainer); commit(false, false); } } /** * Commit. * @param waitFlush * @param waitSearcher * @throws SolrServerException * @throws IOException */ public void commit(boolean waitFlush, boolean waitSearcher) { try { server.commit(waitFlush, waitSearcher); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * When controlling the optimizing action at client side, finally execute optimizing. * @param waitFlush * @param waitSearcher * @throws SolrServerException * @throws IOException */ public void optimize(boolean waitFlush, boolean waitSearcher) { try { server.optimize(waitFlush, waitSearcher); commit(waitFlush, waitSearcher); } catch (Exception e) { LOG.error("Encounter error when optimizing.", e); try { server.rollback(); } catch (SolrServerException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } } } /** * Optimize. * @param optimize * @throws SolrServerException * @throws IOException */ private void optimize(boolean optimize) { if(optimize) { optimize(true, true); } } /** * Add a {@link SolrInputDocument} or collect object and add to the a collection for batch updating * from a mongodb's recored, a Map object. * @param m * @param oneByOne * @throws SolrServerException * @throws IOException */ private void add(Map<Object, Object> m, boolean oneByOne) throws SolrServerException, IOException { SolrInputDocument doc = createDocument(m); if(oneByOne) { server.add(doc); } else { docContainer.add(doc); if(docContainer.size()>maxCommitCount) { server.add(docContainer); server.commit(false, false); docContainer = new ArrayList<SolrInputDocument>(); } } } /** * Create a {@link SolrInputDocument} object. * @param record * @return */ private SolrInputDocument createDocument(Map<Object, Object> record) { String id = record.get("_id").toString(); String articleId = (String) record.get("articleId"); String title = (String) record.get("title"); String url = (String) record.get("url"); String spiderName = (String) record.get("spiderName"); String fragment = makeFragment((BasicDBObject) record.get("fragment")); String word = (String) record.get("word"); int pictureCount = (Integer) record.get("pictureCount"); int selectedCount = (Integer) record.get("selectedCount"); int fragmentSize = (Integer) record.get("fragmentSize"); SolrInputDocument doc = new SolrInputDocument(); doc.addField("_id", id, 1.0f); doc.addField("articleId", articleId, 1.0f); doc.addField("title", title, 1.0f); doc.addField("url", url, 1.0f); doc.addField("spiderName", spiderName, 1.0f); doc.addField("fragment", fragment, 1.0f); doc.addField("word", word, 1.0f); // Additional processing for lucene payload metadata. doc.addField("pictureCount", word + "|" + pictureCount); doc.addField("coverage", word + "|" + (float)selectedCount/fragmentSize); return doc; } @SuppressWarnings("unchecked") private String makeFragment(BasicDBObject fragment) { StringBuilder builder = new StringBuilder(); Iterator<Map.Entry<Integer, String>> iter = fragment.toMap().entrySet().iterator(); while(iter.hasNext()) { Map.Entry<Integer, String> entry = iter.next(); builder.append(entry.getValue().trim()).append("<br>"); } return builder.toString(); } /** * Set {@link ResponseParser}, default value is {@link XMLResponseParser}. * @param responseParser */ public void setResponseParser(ResponseParser responseParser) { this.responseParser = responseParser; } /** * Pulling document resource from multiple collections of MongoDB. * @param collectionNames */ public void setCollectionNames(String[] collectionNames) { this.collectionNames = collectionNames; } public void setMaxCommitCount(int maxCommitCount) { this.maxCommitCount = maxCommitCount; } public void setManualCommit(boolean manualCommit) { this.manualCommit = manualCommit; } public void setManualOptimize(boolean manualOptimize) { this.manualOptimize = manualOptimize; } /** * Mongo database configuration. * * @author shirdrn * @date 2011-12-20 */ public static class MongoConfig implements Serializable { private static final long serialVersionUID = -3028092758346115702L; private String host; private int port; private String dbname; private String collectionName; public MongoConfig(String host, int port, String dbname, String collectionName) { super(); this.host = host; this.port = port; this.dbname = dbname; this.collectionName = collectionName; } @Override public boolean equals(Object obj) { MongoConfig other = (MongoConfig) obj; return host.equals(other.host) && port==other.port && dbname.equals(other.dbname) && collectionName.equals(other.collectionName); } } /** * Mongo database utility. * * @author shirdrn * @date 2011-12-20 */ static class MongoHelper { private static Mongo mongo; private static MongoHelper helper; private MongoConfig mongoConfig; private MongoHelper(MongoConfig mongoConfig) { super(); this.mongoConfig = mongoConfig; } public synchronized static MongoHelper newHelper(MongoConfig mongoConfig) { try { if(helper==null) { helper = new MongoHelper(mongoConfig); mongo = new Mongo(mongoConfig.host, mongoConfig.port); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { if(mongo!=null) { mongo.close(); } } }); } } catch (Exception e) { e.printStackTrace(); } return helper; } public DBCollection getCollection(String collectionName) { DBCollection c = null; try { c = mongo.getDB(mongoConfig.dbname).getCollection(collectionName); } catch (Exception e) { e.printStackTrace(); } return c; } } }
下面,我们可以通过写一个测试用例测试一下。
首先,我的Solr搜索服务器已经部署好并启动成功,对应的url为http://192.168.0.197:8080/server/fragment/。测试用例如下所示:
package org.shirdrn.solr.solrj; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.MapSolrParams; import org.shirdrn.solr.solrj.SolrPostServer.MongoConfig; @SuppressWarnings("deprecation") public class TestSolrPostServer extends TestCase { SolrPostServer myServer; MongoConfig config; String url; String[] collectionNames; @Override protected void setUp() throws Exception { url = "http://192.168.0.197:8080/server/fragment/"; config = new MongoConfig("192.168.0.184", 27017, "fragment", ""); myServer = new SolrPostServer(url, null, config); myServer.setMaxCommitCount(100); } @Override protected void tearDown() throws Exception { super.tearDown(); } public void testPostUpdate() { collectionNames = new String[] { "sina", "lvping", "daodao", "go2eu", "mafengwo", "lotour", "17u", "sohu", "baseSe", "bytravel" }; myServer.setCollectionNames(collectionNames); myServer.setManualCommit(true); myServer.setManualOptimize(false); myServer.postUpdate(); } public void testPostDelete() { List<String> strings = new ArrayList<String>(); strings.add("4ef051342c4117a38f63ee97"); strings.add("4ef051322c4117a38f63ee36"); strings.add("4ef051a42c4117a38f63fb51"); strings.add("4ef050d92c4117a38f63dda4"); strings.add("4ef051fe2c4117a38f640bc8"); strings.add("4ef048ef2c4117a38f6207ce"); strings.add("4ef049062c4117a38f620e13"); strings.add("4ef046f12c4117a38f6185c0"); myServer.deleteById(strings); myServer.commit(false, false); myServer.optimize(true, false); } @SuppressWarnings({ "rawtypes", "unchecked" }) public void testQuery() { Map map = new HashMap(); map.put(CommonParams.Q, "法国"); map.put(CommonParams.START, "0"); map.put(CommonParams.ROWS, "10"); map.put(CommonParams.FQ, "word:卢浮宫"); SolrParams params = new MapSolrParams(map); List<Map<String, Object>> results = myServer.query(params, new String[] {"_id", "title", "url"}); assertEquals(10, results.size()); } }
在实际开发的过程中,使用Solrj客户端可以非常容易为测试做一些基本操作,如创建索引,测试Solr基本参数及其开发定制Solr相关接口(Retrieval、Highlighting、Faceted Search、Clustering等等)。