MongoDB 复制集

技能目标

  • 理解MongoDB复制集概念
  • 学会部署MongoDB复制集
  • 理解MongoDB选举过程
  • 学会MongoDB复制集管理和维护

    复制集概述

    复制集的优势如下

  • 让数据更安全
  • 提高数据可用性(24*7)
  • 灾难恢复
  • 无停机维护(如:索引重建,备份,故障转移)
  • 读缩放(额外的副本读取)
  • 副本集对应用程序是透明的

复制集的工作原理

MongoDB的复制集至少需要两个节点。其中一个是主节点(primary),负责处理客户请求,其余都是从节点(Secondary),负责复制主节点上的数据

MongoDB各个节点搭配方式为:一主一从或一主多从,主节点记录其上的所有操作到oplog中,从节点定期轮询主节点获取操作,然后对数据副本执行这些操作从而保证从节点的数据与主节点数据是一致的

MongoDB复制结构如图

复制集特点如下

  • N个节点的群集
  • 任何节点可作为主节点
  • 所有写入操作都在主节点上
  • 自动故上装一
  • 自动恢复

MongoDB复制集部署

首先创建数据文件夹个日志文件夹

# 数据存放文件夹
[root@localhost ~] cd /data/mongodb/
[root@localhost mongodb]# ls
mongodb1  mongodb2  mongodb3  mongodb4
# 日志存放文件夹
[root@localhost ~]# cd /data/logs/
[root@localhost logs]# touch mongodb{2,3,4}.log 
[root@localhost logs]# ls
mongodb1.log  mongodb2.log  mongodb3.log  mongodb4.log
[root@localhost logs]# chmod 777 ./*.log #赋予当前文件夹以.log结尾做大权限
[root@localhost logs]# ls
mongodb1.log  mongodb2.log  mongodb3.log  mongodb4.log
[root@localhost logs]# 

复制etc/mongod.conf配置文件并开启复制集#replication:去掉#号在下一行加入replSetName: kgcrs 复制集名称为kgcrs

[root@localhost logs]# cp -p /etc/mongod.conf /etc/mongod1.conf  
cp:是否覆盖"/etc/mongod1.conf"? y
[root@localhost logs]# vim /etc/mongod1.conf 
#修改配置文件里面的数据文件位置,日志文件位置和端口号
#复制出三个配置文件并修改文件位置,端口号
[root@localhost logs]# cp -p /etc/mongod1.conf /etc/mongod2.conf 
[root@localhost logs]# cp -p /etc/mongod1.conf /etc/mongod3.conf 
[root@localhost logs]# cp -p /etc/mongod1.conf /etc/mongod4.conf 
[root@localhost logs]# vim /etc/mongod2.conf 
[root@localhost logs]# vim /etc/mongod3.conf 
[root@localhost logs]# vim /etc/mongod4.conf 

开启节点

#开启节点
[root@localhost logs]# mongod -f /etc/mongod1.conf 
about to fork child process, waiting until server is ready for connections.
forked process: 25849
child process started successfully, parent exiting
[root@localhost logs]# mongod -f /etc/mongod2.conf 
about to fork child process, waiting until server is ready for connections.
forked process: 25781
child process started successfully, parent 
[root@localhost logs]# mongod -f /etc/mongod3.conf 
about to fork child process, waiting until server is ready for connections.
forked process: 25431
child process started successfully, parent exiting[root@localhost logs]# mongod -f /etc/mongod4.conf 
about to fork child process, waiting until server is ready for connections.
forked process: 25851
child process started successfully, parent exiting

配置三个节点的复制集

> cfg={"_id":"kgcrs","members":[{"_id":0,"host":"127.0.0.1:27017"},{"_id":1,"host":"127.0.0.1:27018"},{"_id":2,"host":"127.0.0.1:27019"},{"_id":3,"host":"127.0.0.1:27020"},{"_id":4,"host":"127.0.0.1:27021"}]}
{
    "_id" : "kgcrs",
    "members" : [
        {
            "_id" : 0,
            "host" : "127.0.0.1:27017"
        },
        {
            "_id" : 1,
            "host" : "127.0.0.1:27018"
        },
        {
            "_id" : 2,
            "host" : "127.0.0.1:27019"
        },
        {
            "_id" : 3,
            "host" : "127.0.0.1:27020"
        },
        {
            "_id" : 4,
            "host" : "127.0.0.1:27021"
        }
    ]
}

#初始化配置时保证从节点没有数据
> rs.initiate(cfg) 

#添加节点
kgcrs:PRIMARY> rs.add("IP地址:端口号")
#删除节点
kgcrs:PRIMARY> rs.remove("IP地址:端口号)

查看复制集(rs复制集的简称)

> rs.status()
kgcrs:PRIMARY> rs.status()
{
    "set" : "kgcrs",
    "date" : ISODate("2018-07-17T06:03:15.378Z"),
    "myState" : 1,
    "term" : NumberLong(1),
    "syncingTo" : "",
    "syncSourceHost" : "",
    "syncSourceId" : -1,
    "heartbeatIntervalMillis" : NumberLong(2000),
    "optimes" : {
        "lastCommittedOpTime" : {
            "ts" : Timestamp(1531807392, 1),
            "t" : NumberLong(1)
        },
        "readConcernMajorityOpTime" : {
            "ts" : Timestamp(1531807392, 1),
            "t" : NumberLong(1)
        },
        "appliedOpTime" : {
            "ts" : Timestamp(1531807392, 1),
            "t" : NumberLong(1)
        },
        "durableOpTime" : {
            "ts" : Timestamp(1531807392, 1),
            "t" : NumberLong(1)
        }
    },
    "members" : [
        {
            "_id" : 0,
            "name" : "127.0.0.1:27017",
            "health" : 1,
            "state" : 1,
            "stateStr" : "PRIMARY",
            "uptime" : 1603,
            "optime" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-17T06:03:12Z"),
            "syncingTo" : "",
            "syncSourceHost" : "",
            "syncSourceId" : -1,
            "infoMessage" : "",
            "electionTime" : Timestamp(1531807251, 1),
            "electionDate" : ISODate("2018-07-17T06:00:51Z"),
            "configVersion" : 1,
            "self" : true,
            "lastHeartbeatMessage" : ""
        },
        {
            "_id" : 1,
            "name" : "127.0.0.1:27018",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 154,
            "optime" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-17T06:03:12Z"),
            "optimeDurableDate" : ISODate("2018-07-17T06:03:12Z"),
            "lastHeartbeat" : ISODate("2018-07-17T06:03:13.741Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-17T06:03:14.290Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "127.0.0.1:27017",
            "syncSourceHost" : "127.0.0.1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        },
        {
            "_id" : 2,
            "name" : "127.0.0.1:27019",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 154,
            "optime" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-17T06:03:12Z"),
            "optimeDurableDate" : ISODate("2018-07-17T06:03:12Z"),
            "lastHeartbeat" : ISODate("2018-07-17T06:03:13.741Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-17T06:03:14.245Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "127.0.0.1:27017",
            "syncSourceHost" : "127.0.0.1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        },
        {
            "_id" : 3,
            "name" : "127.0.0.1:27020",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 154,
            "optime" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-17T06:03:12Z"),
            "optimeDurableDate" : ISODate("2018-07-17T06:03:12Z"),
            "lastHeartbeat" : ISODate("2018-07-17T06:03:13.741Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-17T06:03:14.291Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "127.0.0.1:27017",
            "syncSourceHost" : "127.0.0.1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        },
        {
            "_id" : 4,
            "name" : "127.0.0.1:27021",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 154,
            "optime" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531807392, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-17T06:03:12Z"),
            "optimeDurableDate" : ISODate("2018-07-17T06:03:12Z"),
            "lastHeartbeat" : ISODate("2018-07-17T06:03:13.741Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-17T06:03:14.142Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "127.0.0.1:27017",
            "syncSourceHost" : "127.0.0.1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        }
    ],
    "ok" : 1,
    "operationTime" : Timestamp(1531807392, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531807392, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}

故障转移切换

kill掉一个进程把主节点down掉
# down掉mongodb
[root@localhost logs]# ps aux | grep mongo
root     27647  0.3  2.8 1634180 53980 ?       Sl   13:36   0:06 mongod -f /etc/mongod.conf
root     27683  0.3  2.7 1482744 52596 ?       Sl   13:36   0:06 mongod -f /etc/mongod1.conf
root     27715  0.2  2.8 1466104 52796 ?       Sl   13:36   0:04 mongod -f /etc/mongod2.conf
root     27747  0.2  2.7 1474360 52364 ?       Sl   13:36   0:04 mongod -f /etc/mongod3.conf
root     27779  0.2  2.8 1465280 52936 ?       Sl   13:36   0:04 mongod -f /etc/mongod4.conf
root     28523  0.0  0.0 112676   984 pts/1    R+   14:05   0:00 grep --color=auto mongo
[root@localhost logs]# kill -9 27647
[root@localhost logs]# ps aux | grep mongo
root     27683  0.3  2.8 1490940 53304 ?       Sl   13:36   0:06 mongod -f /etc/mongod1.conf
root     27715  0.2  2.8 1490692 53420 ?       Sl   13:36   0:05 mongod -f /etc/mongod2.conf
root     27747  0.2  2.8 1618796 53596 ?       Sl   13:36   0:05 mongod -f /etc/mongod3.conf
root     27779  0.2  2.8 1489868 53252 ?       Sl   13:36   0:05 mongod -f /etc/mongod4.conf
root     28566  0.0  0.0 112676   980 pts/1    R+   14:06   0:00 grep --color=auto mongo

#自动切换(原本为27017为主服务器down掉之后自动切换为了27020)

kgcrs:SECONDARY> rs.isMaster() #查看主从
{
    "hosts" : [
        "127.0.0.1:27017",
        "127.0.0.1:27018",
        "127.0.0.1:27019",
        "127.0.0.1:27020",
        "127.0.0.1:27021"
    ],
    "setName" : "kgcrs",
    "setVersion" : 1,
    "ismaster" : false,
    "secondary" : true,
    "primary" : "127.0.0.1:27020", 
    "me" : "127.0.0.1:27017",

手动切换

kgcrs:PRIMARY> rs.freeze(30)     //暂停30s不参与选举
{
    "ok" : 0,
    "errmsg" : "cannot freeze node when primary or running for election. state: Primary",
    "code" : 95,
    "codeName" : "NotSecondary",
    "operationTime" : Timestamp(1531808896, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531808896, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}
#//交出主节点位置,维持从节点状态不少于60秒,等待30秒使主节点和从节点日志同步
kgcrs:PRIMARY> rs.stepDown(60,30)  
2018-07-17×××4:28:43.302+0800 E QUERY    [thread1] Error: error doing query: failed: network error while attempting to run command 'replSetStepDown' on host '127.0.0.1:27020'  :
DB.prototype.runCommand@src/mongo/shell/db.js:168:1
DB.prototype.adminCommand@src/mongo/shell/db.js:186:16
rs.stepDown@src/mongo/shell/utils.js:1341:12
@(shell):1:1
2018-07-17×××4:28:43.305+0800 I NETWORK  [thread1] trying reconnect to 127.0.0.1:27020 (127.0.0.1) failed
2018-07-17×××4:28:43.306+0800 I NETWORK  [thread1] reconnect 127.0.0.1:27020 (127.0.0.1) ok

#切换为了27018
kgcrs:SECONDARY> rs.isMaster()
{
    "hosts" : [
        "127.0.0.1:27017",
        "127.0.0.1:27018",
        "127.0.0.1:27019",
        "127.0.0.1:27020",
        "127.0.0.1:27021"
    ],
    "setName" : "kgcrs",
    "setVersion" : 1,
    "ismaster" : false,
    "secondary" : true,
    "primary" : "127.0.0.1:27018",
    "me" : "127.0.0.1:27020",

尝试创建数据库写入数据(mongodb的增删改查操作)

 #增
kgcrs:PRIMARY> use kgc
switched to db kgc
kgcrs:PRIMARY> db.t1.insert({"id":1,"name":"zhangsan"}) 
WriteResult({ "nInserted" : 1 })
kgcrs:PRIMARY> db.t2.insert({"id":2,"name":"zhangsan"})
WriteResult({ "nInserted" : 1 })                         
kgcrs:PRIMARY> show collections
t1
t2
#查
kgcrs:PRIMARY> db.t1.insert({"id":2,"name":"lisi"})   
WriteResult({ "nInserted" : 1 })
kgcrs:PRIMARY> db.t1.find()
{ "_id" : ObjectId("5b4da41868504a94462710e1"), "id" : 1, "name" : "zhangsan" }
{ "_id" : ObjectId("5b4da5a468504a94462710e3"), "id" : 2, "name" : "lisi" }
#改
kgcrs:PRIMARY> db.t1.update({"id":1},{$set:{"name":"tom"}})
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })
kgcrs:PRIMARY> db.t1.find()                         
{ "_id" : ObjectId("5b4da41868504a94462710e1"), "id" : 1, "name" : "tom" }
{ "_id" : ObjectId("5b4da5a468504a94462710e3"), "id" : 2, "name" : "lisi" }
#删
kgcrs:PRIMARY> db.t1.remove({"id":2})
WriteResult({ "nRemoved" : 1 })
kgcrs:PRIMARY> db.t1.find()
{ "_id" : ObjectId("5b4da41868504a94462710e1"), "id" : 1, "name" : "tom" }

上面所做的增删改查的操作在local的数据库里哦我们可以从看看一下

kgcrs:PRIMARY> show dbs
admin    0.000GB
config   0.000GB
kgc      0.000GB
local    0.000GB
school   0.000GB
school2  0.000GB
school8  0.000GB
kgcrs:PRIMARY> use local
switched to db local
kgcrs:PRIMARY> show tables
me
oplog.rs                  #做复制集之后所有操作的记录
replset.election
replset.minvalid
replset.oplogTruncateAfterPoint
startup_log
system.replset
system.rollback.id
kgcrs:PRIMARY> db.oplog.rs.find()
{ "ts" : Timestamp(1531814965, 1), "t" : NumberLong(3), "h" : NumberLong("8639784432240761376"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:09:25.013Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531814975, 1), "t" : NumberLong(3), "h" : NumberLong("6221196488842671080"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:09:35.014Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531814985, 1), "t" : NumberLong(3), "h" : NumberLong("-8535865731309768612"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:09:45.013Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531814995, 1), "t" : NumberLong(3), "h" : NumberLong("4999394607928512799"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:09:55.024Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531815005, 1), "t" : NumberLong(3), "h" : NumberLong("-5991841109696910698"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:10:05.024Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531815015, 1), "t" : NumberLong(3), "h" : NumberLong("-8100024743592064147"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:10:15.014Z"), "o" : { "msg" : "periodic noop" } }
{ "ts" : Timestamp(1531815025, 1), "t" : NumberLong(3), "h" : NumberLong("4558143489540169854"), "v" : 2, "op" : "n", "ns" : "", "wall" : ISODate("2018-07-17T08:10:25.025Z"), "o" : { "msg" : "periodic noo

选举复制

1:标准节点(在标准节点里面选举主节点)
2:仲裁节点(能够选择谁作为主节点,自己不参与选举,仲裁节点上没有数据)
3:被动节点(不会被选举为主节点)

passives 被动节点 arbiters 仲裁节点

-------------允许从节点读取数据-----------

[root@localhost logs]# mongo --port 27018

kgcrs:SECONDARY> show dbs

kgcrs:SECONDARY> rs.slaveOk() #允许默认从节点读取数据

-------------查看复制状态信息------------

kgcrs:SECONDARY> rs.help()
kgcrs:SECONDARY> rs.printReplicationInfo()
configured oplog size:   990MB
log length start to end: 1544secs (0.43hrs)
oplog first event time:  Mon Jul 16 2018 05:49:12 GMT+0800 (CST)
oplog last event time:   Mon Jul 16 2018 06:14:56 GMT+0800 (CST)
now:                     Mon Jul 16 2018 06:14:59 GMT+0800 (CST)

kgcrs:SECONDARY> rs.printSlaveReplicationInfo()
source: 192.168.235.200:27018
    syncedTo: Mon Jul 16 2018 06:16:16 GMT+0800 (CST)
    0 secs (0 hrs) behind the primary 
source: 192.168.235.200:27019
    syncedTo: Mon Jul 16 2018 06:16:16 GMT+0800 (CST)
    0 secs (0 hrs) behind the primary 

#你会发现仲裁节点并不具备数据复制

--------------更改oplog大小---------------

kgcrs:SECONDARY> use local

kgcrs:SECONDARY> db.oplog.rs.stats()

        "ns" : "local.oplog.rs",
    "size" : 20292,
    "count" : 178,
    "avgObjSize" : 114,
    "storageSize" : 45056,
        ...

kgcrs:SECONDARY> rs.printReplicationInfo()

configured oplog size:   990MB
log length start to end: 2024secs (0.56hrs)
oplog first event time:  Mon Jul 16 2018 05:49:12 GMT+0800 (CST)
oplog last event time:   Mon Jul 16 2018 06:22:56 GMT+0800 (CST)
now:                     Mon Jul 16 2018 06:23:05 GMT+0800 (CST)

[root@localhost logs]# mongo --port 27018

kgcrs:SECONDARY> use admin
kgcrs:SECONDARY> db.shutdownServer()

注销replication:相关启动参数,并修改port端口号27028

[root@localhost logs]# mongod -f /etc/mongod2.conf   #单实例模式启动

全备当前节点的所有oplog记录

[root@localhost logs]# mongodump --port 27028 --db local --collection 'oplog.rs'

[root@localhost logs]# mongo --port 27028

> use local
> db.oplog.rs.drop()
> db.runCommand( { create: "oplog.rs", capped: true, size: (2 * 1024 * 1024 * 1024) } )
> use admin
> db.shutdownServer()

net:
  port: 27018

replication:
    replSetName: kgcrs
    oplogSizeMB: 2048

[root@localhost logs]# mongod -f /etc/mongod2.conf
[root@localhost logs]# mongo --port 27018

kgcrs:PRIMARY> rs.stepDown()   #有效产生选举

-----------------部署认证复制-------------

kgcrs:PRIMARY> use admin
kgcrs:PRIMARY> db.createUser({"user":"root","pwd":"123","roles":["root"]})

[root@localhost bin]# vim /etc/mongod.conf

security:
   keyFile: /usr/bin/kgcrskey1
   clusterAuthMode: keyFile

[root@localhost bin]# vim /etc/mongod2.conf 
[root@localhost bin]# vim /etc/mongod3.conf 
[root@localhost bin]# vim /etc/mongod4.conf 

[root@localhost ~]# cd /usr/bin/

[root@localhost bin]# echo "kgcrs key"> kgcrskey1
[root@localhost bin]# echo "kgcrs key"> kgcrskey2
[root@localhost bin]# echo "kgcrs key"> kgcrskey3
[root@localhost bin]# echo "kgcrs key"> kgcrskey4
[root@localhost bin]# chmod 600 kgcrskey{1..4}

四个实例依次进行重启

进入primary
kgcrs:PRIMARY> show dbs   #无法查看数据库
kgcrs:PRIMARY> rs.status()   #无法查看复制集

kgcrs:PRIMARY> use admin    #身份登录验证
kgcrs:PRIMARY> db.auth("root","123")

kgcrs:PRIMARY> rs.status()  #可以查看数据库
kgcrs:PRIMARY> show dbs    #可以查看复制集