插件安装见鸿智兄的博客
http://blog.csdn.net/laigood12345/article/details/7691068
安装目前最新的插件命令如下:
1.
./plugin -install elasticsearch/elasticsearch-mapper-attachments/1.6.0
2.
./plugin -install richardwilly98/elasticsearch-river-mongodb/1.4.0
ES mongodb river 的时候, 如果是Mongo中数据量过大,会造成内存溢出。
原因是river 从mong里抓取数据的时候是100M/s. 但是ES索引的速度为5000-10000docs/s.
说明见:https://github.com/richardwilly98/elasticsearch-river-mongodb/issues/30
这样就会导致抓取过快, 索引速度跟不上从而导致OOM.
解决方案:
修改mong river源码。
/* * Licensed to Elastic Search and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Elastic Search licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.river.mongodb; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.elasticsearch.client.Requests.indexRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import java.io.IOException; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.regex.Pattern; import org.bson.types.BSONTimestamp; import org.bson.types.ObjectId; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.jsr166y.LinkedTransferQueue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.indices.IndexAlreadyExistsException; import org.elasticsearch.river.AbstractRiverComponent; import org.elasticsearch.river.River; import org.elasticsearch.river.RiverIndexName; import org.elasticsearch.river.RiverName; import org.elasticsearch.river.RiverSettings; import org.elasticsearch.river.mongodb.util.GridFSHelper; import org.elasticsearch.script.ScriptService; import com.mongodb.BasicDBObject; import com.mongodb.Bytes; import com.mongodb.CommandResult; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.Mongo; import com.mongodb.MongoException; import com.mongodb.ReadPreference; import com.mongodb.ServerAddress; import com.mongodb.gridfs.GridFS; import com.mongodb.gridfs.GridFSDBFile; import com.mongodb.util.JSON; /** * @author richardwilly98 (Richard Louapre) * @author flaper87 (Flavio Percoco Premoli) * @author aparo (Alberto Paro) * @author kryptt (Rodolfo Hansen) */ public class MongoDBRiver extends AbstractRiverComponent implements River { public final static String RIVER_TYPE = "mongodb"; public final static String ROOT_NAME = RIVER_TYPE; public final static String DB_FIELD = "db"; public final static String SERVERS_FIELD = "servers"; public final static String HOST_FIELD = "host"; public final static String PORT_FIELD = "port"; public final static String OPTIONS_FIELD = "options"; public final static String SECONDARY_READ_PREFERENCE_FIELD = "secondary_read_preference"; public final static String FILTER_FIELD = "filter"; public final static String CREDENTIALS_FIELD = "credentials"; public final static String USER_FIELD = "user"; public final static String PASSWORD_FIELD = "password"; public final static String SCRIPT_FIELD = "script"; public final static String COLLECTION_FIELD = "collection"; public final static String GRIDFS_FIELD = "gridfs"; public final static String INDEX_OBJECT = "index"; public final static String NAME_FIELD = "name"; public final static String TYPE_FIELD = "type"; public final static String DB_LOCAL = "local"; public final static String DB_ADMIN = "admin"; public final static String DEFAULT_DB_HOST = "localhost"; public final static int DEFAULT_DB_PORT = 27017; public final static String BULK_SIZE_FIELD = "bulk_size"; [color=red] public final static String THROTTLE_SIZE_FIELD = "throttle_size";[/color] public final static String BULK_TIMEOUT_FIELD = "bulk_timeout"; public final static String LAST_TIMESTAMP_FIELD = "_last_ts"; public final static String MONGODB_LOCAL = "local"; public final static String MONGODB_ADMIN = "admin"; public final static String OPLOG_COLLECTION = "oplog.rs"; public final static String OPLOG_NAMESPACE = "ns"; public final static String OPLOG_OBJECT = "o"; public final static String OPLOG_UPDATE = "o2"; public final static String OPLOG_OPERATION = "op"; public final static String OPLOG_UPDATE_OPERATION = "u"; public final static String OPLOG_INSERT_OPERATION = "i"; public final static String OPLOG_DELETE_OPERATION = "d"; public final static String OPLOG_TIMESTAMP = "ts"; protected final Client client; protected final String riverIndexName; protected final List<ServerAddress> mongoServers = new ArrayList<ServerAddress>(); protected final String mongoDb; protected final String mongoCollection; protected final boolean mongoGridFS; protected final String mongoAdminUser; protected final String mongoAdminPassword; protected final String mongoLocalUser; protected final String mongoLocalPassword; protected final String mongoDbUser; protected final String mongoDbPassword; protected final String mongoOplogNamespace; protected final boolean mongoSecondaryReadPreference; protected final String indexName; protected final String typeName; protected final int bulkSize; protected final int throttleSize; protected final TimeValue bulkTimeout; protected Thread tailerThread; protected Thread indexerThread; protected volatile boolean active = true; [color=red] private final BlockingQueue<Map<String, Object>> stream;[/color] @SuppressWarnings("unchecked") @Inject public MongoDBRiver(final RiverName riverName, final RiverSettings settings, @RiverIndexName final String riverIndexName, final Client client, final ScriptService scriptService) { super(riverName, settings); if (logger.isDebugEnabled()) { logger.debug("Prefix: " + logger.getPrefix() + " - name: " + logger.getName()); } this.riverIndexName = riverIndexName; this.client = client; String mongoHost; int mongoPort; if (settings.settings().containsKey(RIVER_TYPE)) { Map<String, Object> mongoSettings = (Map<String, Object>) settings .settings().get(RIVER_TYPE); if (mongoSettings.containsKey(SERVERS_FIELD)) { Object mongoServersSettings = mongoSettings.get(SERVERS_FIELD); logger.info("mongoServersSettings: " + mongoServersSettings); boolean array = XContentMapValues.isArray(mongoServersSettings); if (array) { ArrayList<Map<String, Object>> feeds = (ArrayList<Map<String, Object>>) mongoServersSettings; for (Map<String, Object> feed : feeds) { mongoHost = XContentMapValues.nodeStringValue(feed.get(HOST_FIELD), null); mongoPort = XContentMapValues.nodeIntegerValue(feed.get(PORT_FIELD), 0); logger.info("Server: " + mongoHost + " - " + mongoPort); try { mongoServers.add(new ServerAddress(mongoHost, mongoPort)); } catch (UnknownHostException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } else { mongoHost = XContentMapValues.nodeStringValue( mongoSettings.get(HOST_FIELD), DEFAULT_DB_HOST); mongoPort = XContentMapValues.nodeIntegerValue( mongoSettings.get(PORT_FIELD), DEFAULT_DB_PORT); try { mongoServers.add(new ServerAddress(mongoHost, mongoPort)); } catch (UnknownHostException e) { // TODO Auto-generated catch block e.printStackTrace(); } } // MongoDB options if (mongoSettings.containsKey(OPTIONS_FIELD)) { Map<String, Object> mongoOptionsSettings = (Map<String, Object>) mongoSettings.get(OPTIONS_FIELD); mongoSecondaryReadPreference = XContentMapValues.nodeBooleanValue( mongoOptionsSettings.get(SECONDARY_READ_PREFERENCE_FIELD), false); } else { mongoSecondaryReadPreference = false; } // Credentials if (mongoSettings.containsKey(CREDENTIALS_FIELD)) { String dbCredential; String mau = ""; String map = ""; String mlu = ""; String mlp = ""; String mdu = ""; String mdp = ""; Object mongoCredentialsSettings = mongoSettings.get(CREDENTIALS_FIELD); boolean array = XContentMapValues.isArray(mongoCredentialsSettings); if (array) { ArrayList<Map<String, Object>> credentials = (ArrayList<Map<String, Object>>) mongoCredentialsSettings; for (Map<String, Object> credential : credentials) { dbCredential = XContentMapValues.nodeStringValue(credential.get(DB_FIELD), null); if (DB_ADMIN.equals(dbCredential)) { mau = XContentMapValues.nodeStringValue(credential.get(USER_FIELD), null); map = XContentMapValues.nodeStringValue(credential.get(PASSWORD_FIELD), null); } else if (DB_LOCAL.equals(dbCredential)) { mlu = XContentMapValues.nodeStringValue(credential.get(USER_FIELD), null); mlp = XContentMapValues.nodeStringValue(credential.get(PASSWORD_FIELD), null); } else { mdu = XContentMapValues.nodeStringValue(credential.get(USER_FIELD), null); mdp = XContentMapValues.nodeStringValue(credential.get(PASSWORD_FIELD), null); } } } mongoAdminUser = mau; mongoAdminPassword = map; mongoLocalUser = mlu; mongoLocalPassword = mlp; mongoDbUser = mdu; mongoDbPassword = mdp; } else { mongoAdminUser = ""; mongoAdminPassword = ""; mongoLocalUser = ""; mongoLocalPassword = ""; mongoDbUser = ""; mongoDbPassword = ""; } mongoDb = XContentMapValues.nodeStringValue( mongoSettings.get(DB_FIELD), riverName.name()); mongoCollection = XContentMapValues.nodeStringValue( mongoSettings.get(COLLECTION_FIELD), riverName.name()); mongoGridFS = XContentMapValues.nodeBooleanValue( mongoSettings.get(GRIDFS_FIELD), false); } else { mongoHost = DEFAULT_DB_HOST; mongoPort = DEFAULT_DB_PORT; try { mongoServers.add(new ServerAddress(mongoHost, mongoPort)); } catch (UnknownHostException e) { e.printStackTrace(); } mongoSecondaryReadPreference = false; mongoDb = riverName.name(); mongoCollection = riverName.name(); mongoGridFS = false; mongoAdminUser = ""; mongoAdminPassword = ""; mongoLocalUser = ""; mongoLocalPassword = ""; mongoDbUser = ""; mongoDbPassword = ""; } mongoOplogNamespace = mongoDb + "." + mongoCollection; if (settings.settings().containsKey(INDEX_OBJECT)) { Map<String, Object> indexSettings = (Map<String, Object>) settings .settings().get(INDEX_OBJECT); indexName = XContentMapValues.nodeStringValue( indexSettings.get(NAME_FIELD), mongoDb); typeName = XContentMapValues.nodeStringValue( indexSettings.get(TYPE_FIELD), mongoDb); bulkSize = XContentMapValues.nodeIntegerValue( indexSettings.get(BULK_SIZE_FIELD), 100); if (indexSettings.containsKey(BULK_TIMEOUT_FIELD)) { bulkTimeout = TimeValue.parseTimeValue( XContentMapValues.nodeStringValue( indexSettings.get(BULK_TIMEOUT_FIELD), "10ms"), TimeValue.timeValueMillis(10)); } else { bulkTimeout = TimeValue.timeValueMillis(10); } [color=red]throttleSize = XContentMapValues.nodeIntegerValue(indexSettings.get(THROTTLE_SIZE_FIELD), bulkSize * 5);[/color] } else { indexName = mongoDb; typeName = mongoDb; bulkSize = 100; bulkTimeout = TimeValue.timeValueMillis(10); [color=red]throttleSize = bulkSize * 5;[/color] } [color=red] if (throttleSize == -1) { stream = new LinkedTransferQueue<Map<String, Object>>(); } else { stream = new ArrayBlockingQueue<Map<String, Object>>(throttleSize); }[/color] //构造方法里初始化throttleSize及stream. } @Override public void start() { for (ServerAddress server : mongoServers) { logger.info( "Using mongodb server(s): host [{}], port [{}]", server.getHost(), server.getPort()); } logger.info( "starting mongodb stream: options: secondaryreadpreference [{}], gridfs [{}], filter [{}], db [{}], indexing to [{}]/[{}]", mongoSecondaryReadPreference, mongoGridFS, mongoDb, indexName, typeName); try { client.admin().indices().prepareCreate(indexName).execute() .actionGet(); } catch (Exception e) { if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) { // that's fine } else if (ExceptionsHelper.unwrapCause(e) instanceof ClusterBlockException) { // ok, not recovered yet..., lets start indexing and hope we // recover by the first bulk // TODO: a smarter logic can be to register for cluster event // listener here, and only start sampling when the // block is removed... } else { logger.warn("failed to create index [{}], disabling river...", e, indexName); return; } } if (mongoGridFS) { try { client.admin().indices().preparePutMapping(indexName) .setType(typeName).setSource(getGridFSMapping()) .execute().actionGet(); } catch (Exception e) { logger.warn("Failed to set explicit mapping (attachment): {}", e); if (logger.isDebugEnabled()) { logger.debug("Set explicit attachment mapping.", e); } } } tailerThread = EsExecutors.daemonThreadFactory( settings.globalSettings(), "mongodb_river_slurper").newThread( new Slurper()); indexerThread = EsExecutors.daemonThreadFactory( settings.globalSettings(), "mongodb_river_indexer").newThread( new Indexer()); indexerThread.start(); tailerThread.start(); } @Override public void close() { if (active) { logger.info("closing mongodb stream river"); active = false; tailerThread.interrupt(); indexerThread.interrupt(); } } private class Indexer implements Runnable { @Override public void run() { while (active) { try { BSONTimestamp lastTimestamp = null; BulkRequestBuilder bulk = client.prepareBulk(); // 1. Attempt to fill as much of the bulk request as // possible Map<String, Object> data = stream.take(); lastTimestamp = updateBulkRequest(bulk, data); while ((data = stream.poll(bulkTimeout.millis(), MILLISECONDS)) != null) { lastTimestamp = updateBulkRequest(bulk, data); if (bulk.numberOfActions() >= bulkSize) { break; } } // 2. Update the timestamp if (lastTimestamp != null) { updateLastTimestamp(mongoOplogNamespace, lastTimestamp, bulk); } // 3. Execute the bulk requests try { BulkResponse response = bulk.execute().actionGet(); if (response.hasFailures()) { // TODO write to exception queue? logger.warn("failed to execute" + response.buildFailureMessage()); } } catch (Exception e) { logger.warn("failed to execute bulk", e); } } catch (InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("river-mongodb indexer interrupted"); } } } } private BSONTimestamp updateBulkRequest(final BulkRequestBuilder bulk, final Map<String, Object> data) { if (data.get("_id") == null) { logger.warn( "Cannot get object id. Skip the current item: [{}]", data); return null; } BSONTimestamp lastTimestamp = (BSONTimestamp) data .get(OPLOG_TIMESTAMP); String operation = data.get(OPLOG_OPERATION).toString(); String objectId = data.get("_id").toString(); data.remove(OPLOG_TIMESTAMP); data.remove(OPLOG_OPERATION); try { if (OPLOG_INSERT_OPERATION.equals(operation)) { if (logger.isDebugEnabled()) { logger.debug( "Insert operation - id: {} - contains attachment: {}", operation, objectId, data.containsKey("attachment")); } bulk.add(indexRequest(indexName).type(typeName) .id(objectId).source(build(data, objectId))); } if (OPLOG_UPDATE_OPERATION.equals(operation)) { if (logger.isDebugEnabled()) { logger.debug( "Update operation - id: {} - contains attachment: {}", objectId, data.containsKey("attachment")); } bulk.add(new DeleteRequest(indexName, typeName, objectId)); bulk.add(indexRequest(indexName).type(typeName) .id(objectId).source(build(data, objectId))); // new UpdateRequest(indexName, typeName, objectId) } if (OPLOG_DELETE_OPERATION.equals(operation)) { logger.info("Delete request [{}], [{}], [{}]", indexName, typeName, objectId); bulk.add(new DeleteRequest(indexName, typeName, objectId)); } } catch (IOException e) { logger.warn("failed to parse {}", e, data); } return lastTimestamp; } private XContentBuilder build(final Map<String, Object> data, final String objectId) throws IOException { if (data.containsKey("attachment")) { logger.info("Add Attachment: {} to index {} / type {}", objectId, indexName, typeName); return GridFSHelper.serialize((GridFSDBFile) data .get("attachment")); } else { return XContentFactory.jsonBuilder().map(data); } } } private class Slurper implements Runnable { private Mongo mongo; private DB slurpedDb; private DBCollection slurpedCollection; private DB oplogDb; private DBCollection oplogCollection; private boolean assignCollections() { DB adminDb = mongo.getDB(MONGODB_ADMIN); oplogDb = mongo.getDB(MONGODB_LOCAL); if (!mongoAdminUser.isEmpty() && !mongoAdminPassword.isEmpty()) { logger.info("Authenticate {} with {}", MONGODB_ADMIN, mongoAdminUser); CommandResult cmd = adminDb.authenticateCommand(mongoAdminUser, mongoAdminPassword.toCharArray()); if (! cmd.ok()) { logger.error("Autenticatication failed for {}: {}", MONGODB_ADMIN, cmd.getErrorMessage()); // Can still try with mongoLocal credential if provided. // return false; } oplogDb = adminDb.getMongo().getDB(MONGODB_LOCAL); } if (!mongoLocalUser.isEmpty() && !mongoLocalPassword.isEmpty() && !oplogDb.isAuthenticated()) { logger.info("Authenticate {} with {}", MONGODB_LOCAL, mongoLocalUser); CommandResult cmd = oplogDb.authenticateCommand(mongoLocalUser, mongoLocalPassword.toCharArray()); if (! cmd.ok()) { logger.error("Autenticatication failed for {}: {}", MONGODB_LOCAL, cmd.getErrorMessage()); return false; } } Set<String> collections = oplogDb.getCollectionNames(); if (! collections.contains(OPLOG_COLLECTION)){ logger.error("Cannot find " + OPLOG_COLLECTION + " collection. Please use check this link: http://goo.gl/2x5IW"); return false; } oplogCollection = oplogDb.getCollection(OPLOG_COLLECTION); slurpedDb = mongo.getDB(mongoDb); if (!mongoAdminUser.isEmpty() && !mongoAdminUser.isEmpty() && adminDb.isAuthenticated()) { slurpedDb = adminDb.getMongo().getDB(mongoDb); } if (!mongoDbUser.isEmpty() && !mongoDbPassword.isEmpty() && !slurpedDb.isAuthenticated()) { logger.info("Authenticate {} with {}", mongoDb, mongoDbUser); CommandResult cmd = slurpedDb.authenticateCommand(mongoDbUser, mongoDbPassword.toCharArray()); if (! cmd.ok()) { logger.error("Autenticatication failed for {}: {}", mongoDb, cmd.getErrorMessage()); return false; } } slurpedCollection = slurpedDb.getCollection(mongoCollection); return true; } @Override public void run() { mongo = new Mongo(mongoServers); if (mongoSecondaryReadPreference) { mongo.setReadPreference(ReadPreference.SECONDARY); } while (active) { try { if (!assignCollections()) { break; // failed to assign oplogCollection or // slurpedCollection } DBCursor oplogCursor = oplogCursor(null); if (oplogCursor == null) { oplogCursor = processFullCollection(); } while (oplogCursor.hasNext()) { DBObject item = oplogCursor.next(); processOplogEntry(item); } Thread.sleep(5000); } catch (MongoException mEx) { logger.error("Mongo gave an exception", mEx); } catch (NoSuchElementException nEx) { logger.warn("A mongoDB cursor bug ?", nEx); } catch (InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("river-mongodb slurper interrupted"); } } } } /* * Remove fscynlock and unlock - https://github.com/richardwilly98/elasticsearch-river-mongodb/issues/17 */ private DBCursor processFullCollection() { // CommandResult lockResult = mongo.fsyncAndLock(); // if (lockResult.ok()) { try { BSONTimestamp currentTimestamp = (BSONTimestamp) oplogCollection .find() .sort(new BasicDBObject(OPLOG_TIMESTAMP, -1)) .limit(1).next().get(OPLOG_TIMESTAMP); addQueryToStream("i", currentTimestamp, null); return oplogCursor(currentTimestamp); } finally { // mongo.unlock(); } // } else { // throw new MongoException( // "Could not lock the database for FullCollection sync"); // } } @SuppressWarnings("unchecked") private void processOplogEntry(final DBObject entry) { String operation = entry.get(OPLOG_OPERATION).toString(); String namespace = entry.get(OPLOG_NAMESPACE).toString(); BSONTimestamp oplogTimestamp = (BSONTimestamp) entry .get(OPLOG_TIMESTAMP); DBObject object = (DBObject) entry.get(OPLOG_OBJECT); // Not interested by chunks - skip all if (namespace.endsWith(".chunks")) { return; } if (logger.isTraceEnabled()) { logger.trace("oplog processing item {}", entry); } if (mongoGridFS && namespace.endsWith(".files") && ("i".equals(operation) || "u".equals(operation))) { String objectId = object.get("_id").toString(); GridFS grid = new GridFS(mongo.getDB(mongoDb), mongoCollection); GridFSDBFile file = grid.findOne(new ObjectId(objectId)); if (file != null) { logger.info("Caught file: {} - {}", file.getId(), file.getFilename()); object = file; } else { logger.warn("Cannot find file from id: {}", objectId); } } if (object instanceof GridFSDBFile) { logger.info("Add attachment: {}", object.get("_id")); HashMap<String, Object> data = new HashMap<String, Object>(); data.put("attachment", object); data.put("_id", object.get("_id")); addToStream(operation, oplogTimestamp, data); } else { if ("u".equals(operation)) { DBObject update = (DBObject) entry.get(OPLOG_UPDATE); addQueryToStream(operation, oplogTimestamp, update); } else { addToStream(operation, oplogTimestamp, object.toMap()); } } } private DBObject getIndexFilter(final BSONTimestamp timestampOverride) { BSONTimestamp time = timestampOverride == null ? getLastTimestamp(mongoOplogNamespace) : timestampOverride; if (time == null) { logger.info("No known previous slurping time for this collection"); return null; } else { BasicDBObject filter = new BasicDBObject(); filter.put(OPLOG_TIMESTAMP, new BasicDBObject("$gt", time)); filter.put(OPLOG_NAMESPACE, Pattern.compile(mongoOplogNamespace)); if (logger.isDebugEnabled()) { logger.debug("Using filter: {}", filter); } return filter; } } private DBCursor oplogCursor(final BSONTimestamp timestampOverride) { DBObject indexFilter = getIndexFilter(timestampOverride); if (indexFilter == null) { return null; } return oplogCollection.find(indexFilter) .sort(new BasicDBObject("$natural", 1)) .addOption(Bytes.QUERYOPTION_TAILABLE) .addOption(Bytes.QUERYOPTION_AWAITDATA); } @SuppressWarnings("unchecked") private void addQueryToStream(final String operation, final BSONTimestamp currentTimestamp, final DBObject update) { for (DBObject item : slurpedCollection.find(update)) { addToStream(operation, currentTimestamp, item.toMap()); } } private void addToStream(final String operation, final BSONTimestamp currentTimestamp, final Map<String, Object> data) { data.put(OPLOG_TIMESTAMP, currentTimestamp); data.put(OPLOG_OPERATION, operation); try { [color=red] stream.put(data); //将add方法改为put. 这样,加入的时候,会等待直到有空间才加入。[/color] } catch (InterruptedException e) { e.printStackTrace(); } } } private XContentBuilder getGridFSMapping() throws IOException { XContentBuilder mapping = jsonBuilder().startObject() .startObject(typeName).startObject("properties") .startObject("content").field("type", "attachment").endObject() .startObject("filename").field("type", "string").endObject() .startObject("contentType").field("type", "string").endObject() .startObject("md5").field("type", "string").endObject() .startObject("length").field("type", "long").endObject() .startObject("chunkSize").field("type", "long").endObject() .endObject().endObject().endObject(); logger.info("Mapping: {}", mapping.string()); return mapping; } /** * Get the latest timestamp for a given namespace. */ @SuppressWarnings("unchecked") private BSONTimestamp getLastTimestamp(final String namespace) { GetResponse lastTimestampResponse = client .prepareGet(riverIndexName, riverName.getName(), namespace) .execute().actionGet(); if (lastTimestampResponse.exists()) { Map<String, Object> mongodbState = (Map<String, Object>) lastTimestampResponse .sourceAsMap().get(ROOT_NAME); if (mongodbState != null) { String lastTimestamp = mongodbState.get(LAST_TIMESTAMP_FIELD) .toString(); if (lastTimestamp != null) { if (logger.isDebugEnabled()) { logger.debug("{} last timestamp: {}", namespace, lastTimestamp); } return (BSONTimestamp) JSON.parse(lastTimestamp); } } } return null; } /** * Adds an index request operation to a bulk request, updating the last * timestamp for a given namespace (ie: host:dbName.collectionName) * * @param bulk */ private void updateLastTimestamp(final String namespace, final BSONTimestamp time, final BulkRequestBuilder bulk) { try { bulk.add(indexRequest(riverIndexName) .type(riverName.getName()) .id(namespace) .source(jsonBuilder().startObject().startObject(ROOT_NAME) .field(LAST_TIMESTAMP_FIELD, JSON.serialize(time)) .endObject().endObject())); } catch (IOException e) { logger.error("error updating last timestamp for namespace {}", namespace); } } }
关键改动如上。 其实, 对源码的改动, 只修改stream.add(data);这句应该也可以。
只不过加入throttle_size的话, 可以像设置bulksize一样在新建river的时候去初始化设置这个抓取值。
花了一个上午解决这个问题, 留个笔记。
如果大家有更好的方案请留言。附件是我重新编译的mongo river 插件jar包, 替换ES 目录下的plugins\river-mongodb下的相关Jar就可以了。
注: 目前mongo-river 1.6及以后版本, 上述问题,作者已经修复,可放心使用。