本文描述分片设置命令的流程.在分析分片设置命令流程前先来看看configserver服务器config数据库中各个collection的作用.
version: 保存当前configserver的版本信息,这个是随mongodb升级而变动的.
settings: 保存分片系统设置信息如chunksize大小,balancer配置信息.
shards: 保存shard中的配置信息包括每一个mongod的shard id如shard0以及其地址.
databases: 保存shard系统中的数据库的分片信息,是否分片,primary服务器的id.
collections: 保存分片的collection.
locks: 分布式锁状态,state=0,表示没锁,1表示准备加锁,2表示正拥有锁.
lockpings: 分布式服务器的ping信息,为分布式锁设计的,保留其ping时间,超时后其它服务器将能剥夺超时服务器的锁.
chunks: 保存分片collection的chunks信息,chunks的数据范围,collection名,所在服务器的id.
mongos: mongos进程的ping信息,确保mongos是可连接的.
tags: 似乎和replset中的tags作用类似,确保某些chunk只能在某些类型的服务器上.
在分片系统中,每一个mongod服务器是一个分shard,其内部名称在不指定的情况下会自动生成名字如shard0000 shard0001等编号.每一个数据库在初始加入分片系统中将会有一个primary shard,表示该数据库最早存于哪个服务器上.若是新添加的数据库,没有指定其存的位置,则其初始位置存于分片系统中当前数据最少的服务器上.
对于sharded collection,其由一个chunkManager管理,collection的分片是按照chunk来做单位的,每一个chunk默认大小为64M,可以通过命令修改,大小达到上限后将发生分片,chunkManager负责记录分片以及每一个分片所在的范围,以后对数据修改时将首先查询chunkManager,通过它将知道将请求发往哪一台服务器上.
下面开始分析代码.首先来看看添加一台服务器到分片系统db.runCommand(addshard:"127.0.0.1:27040").其将执行addshard命令.
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( !ClientBasic::getCurrent()->getAuthenticationInfo()->isAuthorized("admin") ) {//非admin权限不能执行该命令 errmsg = "unauthorized. Need admin authentication to add a shard "; return false; } // get replica set component hosts ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg ); // using localhost in server names implies every other process must use localhost addresses too vector<HostAndPort> serverAddrs = servers.getServers();//得到添加的服务器地址 for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ) { if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ) { return false; } // it's fine if mongods of a set all use default port if ( ! serverAddrs[i].hasPort() ) {//没有端口加上默认端口 serverAddrs[i].setPort( CmdLine::ShardServerPort ); } } // name is optional; addShard will provide one if needed string name = ""; if ( cmdObj["name"].type() == String ) {//得到shard的名称,没有设置将自动生成一个如shard0000这样的. name = cmdObj["name"].valuestrsafe(); } // maxSize is the space usage cap in a shard in MBs long long maxSize = 0; if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ) { maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong(); } if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ) {//实际的添加过程. return false; } result << "shardAdded" << name; return true; }run->addShard,删除了一大部分的检查流程.
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. vector<string> dbNames; { scoped_ptr<ScopedDbConnection> newShardConnPtr(//与配置要加入shard的服务器建立连接 ScopedDbConnection::getInternalScopedDbConnection( servers.toString() ) ); ScopedDbConnection& newShardConn = *newShardConnPtr; BSONObj resIsMongos; BSONObj resIsMaster; ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = servers.getSetName(); // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of // the set. It is fine if not all members of the set are present in 'servers'. bool foundAll = true; string offendingHost; // shard name defaults to the name of the replica set if ( name->empty() && ! setName.empty() ) *name = setName; // In order to be accepted as a new shard, that mongod must not have any database name that exists already // in any other shards. If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. BSONObj resListDB;//列出所有的新加入的服务器的数据库,将其加入到shard ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( newShardConn->type() == ConnectionString::SET ) rsMonitor = ReplicaSetMonitor::get( setName ); newShardConn.done(); } //判断这里数据库一定为null,否则失败 // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false );//得到数据库的配置. } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) {//对于新加入的服务器给他一个新的shardid. errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append( "_id" , *name ); b.append( "host" , rsMonitor ? rsMonitor->getServerAddress() : servers.toString() ); if ( maxSize > 0 ) { b.append( ShardFields::maxSize.name() , maxSize ); } BSONObj shardDoc = b.obj(); { scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( configServer.getPrimary().getConnString() ) ); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->get()->findOne( ShardNS::shard , BSON( "host" << servers.toString() ) ); conn->get()->insert( ShardNS::shard , shardDoc );//配置信息保存到configserver的shards集合中. conn->done(); } Shard::reloadShardInfo();//新配置了shard,重新加载shard的信息. // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name );//建立每一个新添加的数据库的配置信息,参数true表示创建 } return true; }继续这里的shard::reloadShardInfo.
void Shard::reloadShardInfo() {staticShardInfo.reload();} void reload() { list<BSONObj> all; { scoped_ptr<ScopedDbConnection> conn(//shards collection中取出所有配置 ScopedDbConnection::getInternalScopedDbConnection( configServer.getPrimary().getConnString() ) ); auto_ptr<DBClientCursor> c = conn->get()->query( ShardNS::shard , Query() ); while ( c->more() ) { all.push_back( c->next().getOwned() ); } conn->done(); } scoped_lock lk( _mutex ); // We use the _lookup table for all shards and for the primary config DB. The config DB info, // however, does not come from the ShardNS::shard. So when cleaning the _lookup table we leave // the config state intact. The rationale is that this way we could drop shards that // were removed without reinitializing the config DB information. ShardMap::iterator i = _lookup.find( "config" );//config数据库不变 if ( i != _lookup.end() ) { ShardPtr config = i->second; _lookup.clear(); _lookup[ "config" ] = config; } else { _lookup.clear(); } _rsLookup.clear();//根据shards collection中的信息从新配置shard for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); ++i ) { BSONObj o = *i; string name = o["_id"].String(); string host = o["host"].String(); long long maxSize = 0; BSONElement maxSizeElem = o[ ShardFields::maxSize.name() ]; if ( ! maxSizeElem.eoo() ) maxSize = maxSizeElem.numberLong(); bool isDraining = false; BSONElement isDrainingElem = o[ ShardFields::draining.name() ]; if ( ! isDrainingElem.eoo() ) isDraining = isDrainingElem.Bool(); ShardPtr s( new Shard( name , host , maxSize , isDraining ) );//建立shard结构. if ( o["tags"].type() == Array ) {//添加tag信息. vector<BSONElement> v = o["tags"].Array(); for ( unsigned j=0; j<v.size(); j++ ) { s->addTag( v[j].String() ); } } _lookup[name] = s; _installHost( host , s ); } }run->addShard->getDBConfig
DBConfigPtr Grid::getDBConfig( string database , bool create , const string& shardNameHint ) { string::size_type i = database.find( "." ); if ( i != string::npos ) database = database.substr( 0 , i ); if ( database == "config" )//config数据库不需要创建了 return configServerPtr; scoped_lock l( _lock ); DBConfigPtr& dbConfig = _databases[database];//_databases中没有,建立新的DBConfig. if( ! dbConfig ){ dbConfig.reset(new DBConfig( database )); // Protect initial load from connectivity errors bool loaded = false; loaded = dbConfig->load();//这里将从database集合中加载这个数据库的配置,同时再从collections中加载该数据库collection的配置 if( ! loaded ){//当然,首次时因为这里数据库还没保存到database这个collection中,所以loaded=false. if( create ){ try{ Shard primary; if ( database == "admin" ) { primary = configServer.getPrimary(); } else if ( shardNameHint.empty() ) { primary = Shard::pick();//没有shard信息,选取一个map最少数据的服务器,将其指定为primary } else { // use the shard name if provided Shard shard; shard.reset( shardNameHint );//将shard设置为shardNameHint指定name对应的shard primary = shard; } if ( primary.ok() ) {//设置这个配置的shard的名称,内部会根据shard名找到对应的shard,这里也将把DBConfig信息保存到databases和collections集合中 dbConfig->setPrimary( primary.getName() ); // saves 'cc' to configDB } } } else { dbConfig.reset(); } } } return dbConfig; }run->addShard->getDBConfig->DBConfig::load
bool DBConfig::load() { scoped_lock lk( _lock ); return _load(); } bool DBConfig::_load() { scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( configServer.modelServer(), 30.0 ) ); BSONObj o = conn->get()->findOne( ShardNS::database , BSON( "_id" << _name ) ); unserialize( o );//设置数据库配置的primary服务器,及其是否开启了sharding BSONObjBuilder b; b.appendRegex( "_id" , (string)"^" + pcrecpp::RE::QuoteMeta( _name ) + "\\." ); int numCollsErased = 0; int numCollsSharded = 0; auto_ptr<DBClientCursor> cursor = conn->get()->query( ShardNS::collection, b.obj() ); while ( cursor->more() ) {//根据数据库加载其collection信息 BSONObj o = cursor->next(); string collName = o["_id"].String(); if( o["dropped"].trueValue() ){ _collections.erase( collName ); numCollsErased++; } else{ _collections[ collName ] = CollectionInfo( o ); if( _collections[ collName ].isSharded() ) numCollsSharded++; } } conn->done(); return true; }继续来看DBConfig的保存.run->addShard->getDBConfig->DBConfig::setPrimary
void DBConfig::setPrimary( string s ) { scoped_lock lk( _lock ); _primary.reset( s ); _save(); } void DBConfig::_save( bool db, bool coll ) { scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( configServer.modelServer(), 30.0 ) ); if( db ){ BSONObj n; { BSONObjBuilder b; serialize(b);//序列化信息 n = b.obj(); }//将信息保存到数据库中 conn->get()->update( ShardNS::database , BSON( "_id" << _name ) , n , true ); } if( coll ){//存在collection信息则将其保存,初次加载时没有这个信息,没有这里的流程 for ( Collections::iterator i=_collections.begin(); i!=_collections.end(); ++i ) { if ( ! i->second.isDirty() ) continue; i->second.save( i->first , conn->get() ); } } conn->done(); }到这里addShard的过程结束,继续看enablesharding的流程.db.runCommand({enablesharding: "fool"}),其命令在mongo\s\commands_admin.cpp中.
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string dbname = cmdObj.firstElement().valuestrsafe(); if ( dbname.size() == 0 ) {//各种检查 errmsg = "no db"; return false; } if ( dbname == "admin" ) { errmsg = "can't shard the admin db"; return false; } if ( dbname == "local" ) { errmsg = "can't shard the local db"; return false; } DBConfigPtr config = grid.getDBConfig( dbname );//根据数据库得到相应的数据库配置,前面已经分析过了 if ( config->isShardingEnabled() ) { errmsg = "already enabled"; return false; } if ( ! okForConfigChanges( errmsg ) )//所有configserver起来后才能更改配置 return false; config->enableSharding();//真正的设置点 return true; }
void DBConfig::enableSharding( bool save ) {//默认save=true if ( _shardingEnabled ) return; 、 scoped_lock lk( _lock );//设置shardingEnabled=true并将其保存到配置保存到config.databases中. _shardingEnabled = true; if( save ) _save(); }继续来看看collection的分片设置,命令为:db.runCommand({shardcollection: fool.coll,key:{_id:1}}),这个命令的源码在mongo\s\commands_admin.cpp中,其中的出错处理代码太多,不得不删除,这里以文字的形式简单描述下:
1. collection名存在.
2. collection所在db是shardenable状态.
3. 给collection没有被shardenable.
4. shardkey必须是数字1
5. 所有configserver可达.
6. 不能sharding system类的collection
7 不能sharding capped collection
10gen注释的sharding key要求:
1. 除以_id开始的unique索引外的所有unique的索引的前缀必须包括sharding key.
2. collection不为空则必须存在至少一条以sharding key做前缀的索引,且索引不能是sparse,不能带null值,不能是多值索引(multikey index,这一条限制可能会被移除)
3. 如果sharding key被配置为unique,则必须存在对应的unique index,必须完全相等而不只是前缀问题.
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { DBConfigPtr config = grid.getDBConfig( ns ); BSONObj proposedKey = cmdObj.getObjectField( "key" ); //the rest of the checks require a connection to the primary db scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getScopedDbConnection( config->getPrimary().getConnString() ) ); //check that collection is not capped BSONObj res = conn->get()->findOne( config->getName() + ".system.namespaces", BSON( "name" << ns ) ); // The proposed shard key must be validated against the set of existing indexes. // In particular, we must ensure the following constraints // // 1. All existing unique indexes, except those which start with the _id index, // must contain the proposed key as a prefix (uniqueness of the _id index is // ensured by the _id generation process or guaranteed by the user). // // 2. If the collection is not empty, there must exist at least one index that // is "useful" for the proposed key. A "useful" index is defined as follows // Useful Index: // i. contains proposedKey as a prefix // ii. is not sparse // iii. contains no null values // iv. is not multikey (maybe lift this restriction later) // // 3. If the proposed shard key is specified as unique, there must exist a useful, // unique index exactly equal to the proposedKey (not just a prefix). // // After validating these constraint: // // 4. If there is no useful index, and the collection is non-empty, we // must fail. // // 5. If the collection is empty, and it's still possible to create an index // on the proposed key, we go ahead and do so. string indexNS = config->getName() + ".system.indexes"; // 1. Verify consistency with existing unique indexes BSONObj uniqueQuery = BSON( "ns" << ns << "unique" << true ); auto_ptr<DBClientCursor> uniqueQueryResult = conn->get()->query( indexNS , uniqueQuery ); while ( uniqueQueryResult->more() ) { BSONObj idx = uniqueQueryResult->next(); BSONObj currentKey = idx["key"].embeddedObject(); bool isCurrentID = str::equals( currentKey.firstElementFieldName() , "_id" ); if( ! isCurrentID && ! proposedKey.isPrefixOf( currentKey) ) { errmsg = str::stream() << "can't shard collection '" << ns << "' " << "with unique index on " << currentKey << " " << "and proposed shard key " << proposedKey << ". " << "Uniqueness can't be maintained unless " << "shard key is a prefix"; conn->done(); return false; } } // 2. Check for a useful index bool hasUsefulIndexForKey = false; BSONObj allQuery = BSON( "ns" << ns ); auto_ptr<DBClientCursor> allQueryResult = conn->get()->query( indexNS , allQuery ); BSONArrayBuilder allIndexes; while ( allQueryResult->more() ) { BSONObj idx = allQueryResult->next(); allIndexes.append( idx ); BSONObj currentKey = idx["key"].embeddedObject(); // Check 2.i. and 2.ii. if ( ! idx["sparse"].trueValue() && proposedKey.isPrefixOf( currentKey ) ) { hasUsefulIndexForKey = true; } } // 3. If proposed key is required to be unique, additionally check for exact match. bool careAboutUnique = cmdObj["unique"].trueValue(); if ( hasUsefulIndexForKey && careAboutUnique ) { BSONObj eqQuery = BSON( "ns" << ns << "key" << proposedKey ); BSONObj eqQueryResult = conn->get()->findOne( indexNS, eqQuery ); if ( eqQueryResult.isEmpty() ) { hasUsefulIndexForKey = false; // if no exact match, index not useful, // but still possible to create one later } else { bool isExplicitlyUnique = eqQueryResult["unique"].trueValue(); BSONObj currKey = eqQueryResult["key"].embeddedObject(); bool isCurrentID = str::equals( currKey.firstElementFieldName() , "_id" ); if ( ! isExplicitlyUnique && ! isCurrentID ) { errmsg = str::stream() << "can't shard collection " << ns << ", " << proposedKey << " index not unique, " << "and unique index explicitly specified"; conn->done(); return false; } } } if ( hasUsefulIndexForKey ) { // Check 2.iii and 2.iv. Make sure no null entries in the sharding index // and that there is a useful, non-multikey index available BSONObjBuilder cmd; cmd.append( "checkShardingIndex" , ns ); cmd.append( "keyPattern" , proposedKey ); BSONObj cmdObj = cmd.obj(); if ( ! conn->get()->runCommand( "admin" , cmdObj , res ) ) { errmsg = res["errmsg"].str(); conn->done(); return false; } } // 4. if no useful index, and collection is non-empty, fail else if ( conn->get()->count( ns ) != 0 ) { errmsg = str::stream() << "please create an index that starts with the " << "shard key before sharding."; result.append( "proposedKey" , proposedKey ); result.appendArray( "curIndexes" , allIndexes.done() ); conn->done(); return false; } // 5. If no useful index exists, and collection empty, create one on proposedKey. // Only need to call ensureIndex on primary shard, since indexes get copied to // receiving shard whenever a migrate occurs. else {//对空的collection,建立索引 // call ensureIndex with cache=false, see SERVER-1691 bool ensureSuccess = conn->get()->ensureIndex( ns , proposedKey , careAboutUnique , "" , false ); } conn->done();//真正的shardCollection动作. config->shardCollection( ns , proposedKey , careAboutUnique ); return true; }
run->shardCollection
ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique , vector<BSONObj>* initPoints, vector<Shard>* initShards ) { ChunkManagerPtr manager;//每一个分片的collection由一个chunkManager管理,其负责定位每一个chunk的位置. { //以及负责每一份数据该发往哪一个shard scoped_lock lk( _lock ); CollectionInfo& ci = _collections[ns]; ChunkManager* cm = new ChunkManager( ns, fieldsAndOrder, unique ); cm->createFirstChunks( configServer.getPrimary().getConnString(),//这里第一次创建chunk可能会分片 getPrimary(), initPoints, initShards );//这里将collection的信息(包括分片信息)保存到chunks集合中. ci.shard( cm );//从chunks collection中读出所有的信息,初始化chunkmanager以及chunk的范围. _save();//前面已经分析过,将更改的配置保存到databases和collections集合中. // Save the initial chunk manager for later, no need to reload if we're in this lock manager = ci.getCM(); } // Tell the primary mongod to refresh it's data // TODO: Think the real fix here is for mongos to just assume all collections sharded, when we get there for( int i = 0; i < 4; i++ ){ { ShardConnection conn( getPrimary(), ns ); conn.setVersion();//提醒shard端版本已更新,需重新加载配置 conn.done(); break; } sleepsecs( i ); } return manager; }run->shardCollection->setVersion
bool setVersion() { _finishInit(); return _setVersion; } void ShardConnection::_finishInit() { if ( _finishedInit ) return; _finishedInit = true; if ( _ns.size() && versionManager.isVersionableCB( _conn ) ) { // Make sure we specified a manager for the correct namespace if( _manager ) verify( _manager->getns() == _ns ); _setVersion = versionManager.checkShardVersionCB( this , false , 1 ); } else { // Make sure we didn't specify a manager for an empty namespace _setVersion = false; } } bool VersionManager::checkShardVersionCB( ShardConnection* conn_in , bool authoritative , int tryNumber ) { return checkShardVersion( conn_in->get(), conn_in->getNS(), conn_in->getManager(), authoritative, tryNumber ); }
run->shardCollection->setVersion->_finishInit->checkShardVersion
bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) { WriteBackListener::init( *conn_in );//初始化写回线程.比如说某些数据本来发往shard0,但是因为平衡数据被移动到其它shard了,所以得回收 DBConfigPtr conf = grid.getDBConfig( ns );//然后再次将数据发到正确的位置 DBClientBase* conn = getVersionable( conn_in ); unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // Check this manager against the reference manager if( isSharded && manager ){ Shard shard = Shard::make( conn->getServerAddress() ); if( refManager && ! refManager->compatibleWith( manager, shard ) ){//比照版本,版本不对(如分片或者banlance发生)则抛出异常 throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refManager->getVersion( shard ).toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")", refManager->getVersion( shard ), manager->getVersion( shard ) ); } } else if( refManager ){ Shard shard = Shard::make( conn->getServerAddress() ); string msg( str::stream() << "not sharded (" << ( (manager.get() == 0) ? string( "<none>" ) : str::stream() << manager->getSequenceNumber() ) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")" ); throw SendStaleConfigException( ns, msg, refManager->getVersion( shard ), ShardChunkVersion( 0, OID() )); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = ShardChunkVersion( 0, OID() ); if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) {//通知远端shard版本已经改变 // success! connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; }//下面部分因为各种原因需要重复调用checkSHardVersion向远端通知版本已改变. if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( conn , ns , refManager, 1 , tryNumber + 1 ); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { sleepmillis( 10 * tryNumber ); checkShardVersion( conn , ns , refManager, true , tryNumber + 1 ); return true; } massert( 10429 , errmsg , 0 ); return true; }
run->shardCollection->setVersion->_finishInit->checkShardVersion->setShardVersion
bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ) { BSONObjBuilder cmdBuilder;//向远端mongod服务发送命令告诉其shardversion已经改变,通知其重新加载配置 cmdBuilder.append( "setShardVersion" , ns.c_str() ); cmdBuilder.append( "configdb" , configServer.modelServer() ); version.addToBSON( cmdBuilder ); cmdBuilder.appendOID( "serverID" , &serverID ); if ( authoritative ) cmdBuilder.appendBool( "authoritative" , 1 ); Shard s = Shard::make( conn.getServerAddress() ); cmdBuilder.append( "shard" , s.getName() ); cmdBuilder.append( "shardHost" , s.getConnString() ); BSONObj cmd = cmdBuilder.obj();//这里不再深入下去,具体流程为各种检查,最后因为shardversion改变,远端shard重新从configserver中 return conn.runCommand( "admin",cmd,result,0,//加载相应的chunkmanager,这里的改变指chunk被搬迁了,才会发生重新加载chunkmanager的情况 &AuthenticationTable::getInternalSecurityAuthenticationTable() ); }到这里分片系统服务器的添加,数据库的分片设置,collection的分片设置分析完毕,这部分是后面分析部分的基础,将这里弄清楚了后面的查询修改分片迁移就都简单了.
原文链接:mongodb源码分析(二十)mongos分片的配置
作者: yhjj0108,杨浩.