角色
drbdam role r0
drbdam dstate r0
[D_DISKLESS] = "Diskless",
[D_ATTACHING] = "Attaching",
[D_FAILED] = "Failed",
[D_NEGOTIATING] = "Negotiating",
[D_INCONSISTENT] = "Inconsistent",
[D_OUTDATED] = "Outdated",
[D_UNKNOWN] = "DUnknown",
[D_CONSISTENT] = "Consistent",
[D_UP_TO_DATE] = "UpToDate",
连接状态
[C_STANDALONE] = "StandAlone",
[C_DISCONNECTING] = "Disconnecting",
[C_UNCONNECTED] = "Unconnected",
[C_TIMEOUT] = "Timeout",
[C_BROKEN_PIPE] = "BrokenPipe",
[C_NETWORK_FAILURE] = "NetworkFailure",
[C_PROTOCOL_ERROR] = "ProtocolError",
[C_WF_CONNECTION] = "WFConnection",
[C_WF_REPORT_PARAMS] = "WFReportParams",
[C_TEAR_DOWN] = "TearDown",
[C_CONNECTED] = "Connected",
[C_STARTING_SYNC_S] = "StartingSyncS",
[C_STARTING_SYNC_T] = "StartingSyncT",
[C_WF_BITMAP_S] = "WFBitMapS",
[C_WF_BITMAP_T] = "WFBitMapT",
[C_WF_SYNC_UUID] = "WFSyncUUID",
[C_SYNC_SOURCE] = "SyncSource",
[C_SYNC_TARGET] = "SyncTarget",
[C_PAUSED_SYNC_S] = "PausedSyncS",
[C_PAUSED_SYNC_T] = "PausedSyncT",
[C_VERIFY_S] = "VerifyS",
[C_VERIFY_T] = "VerifyT",
[C_AHEAD] = "Ahead",
[C_BEHIND] = "Behind",
static const char *drbd_state_sw_errors[] = {
[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
[-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
[-SS_DEVICE_IN_USE] = "Device is held open by someone",
[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
[-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
[-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
[-SS_NOT_SUPPORTED] = "Peer does not support protocol",
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
};
===================
Drbd.conf is as follows: > # > # drbd.conf example > # > # parameters you _need_ to change are the hostname, device, disk, # meta-disk, address and port in the "on <hostname> {}" sections. # # you ought to know about the protocol, and the various timeouts. # # you probably want to set the rate in the syncer sections > > # > # NOTE common pitfall: > # rate is given in units of _byte_ not bit > # > > # > # increase timeout and maybe ping-int in net{}, if you see > # problems with "connection lost/connection established" > # (or change your setup to reduce network latency; make sure full # duplex behaves as such; check average roundtrip times while # network is saturated; and so on ...) # > > # > # At most ONE global section is allowed. > # It must precede any resource section. > # > global { > # By default we load the module with a minor-count of 32. In case you > # have more devices in your config, the module gets loaded with > # a minor-count that ensures that you have 10 minors spare. > # In case 10 spare minors are too little for you, you can set the > # minor-count exeplicit here. ( Note, in contrast to DRBD-0.7 an > # unused, spare minor has only a very little overhead of allocated > # memory (a single pointer to be exact). ) > # > # minor-count 64; > > # The user dialog counts and displays the seconds it waited so > # far. You might want to disable this if you have the console > # of your server connected to a serial terminal server with > # limited logging capacity. > # The Dialog will print the count each 'dialog-refresh' seconds, > # set it to 0 to disable redrawing completely. [ default = 1 ] > # > # dialog-refresh 5; # 5 seconds > > # You might disable one of drbdadm's sanity check. > # disable-ip-verification; > > # Participate in DRBD's online usage counter at http://usage.drbd.org > # possilbe options: ask, yes, no. Default is ask. In case you do not > # know, set it to ask, and follow the on screen instructions later. > usage-count yes; > } > > > # > # The common section can have all the sections a resource can have but # not the host section (started with the "on" keyword). # The common section must precede all resources. # All resources inherit the settings from the common section. # Whereas settings in the resources have precedence over the common # setting. # > > common { > syncer { rate 10M; } > } > > # > # this need not be r#, you may use phony resource names, > # like "resource web" or "resource mail", too > # > > resource IsodeResource { > > # transfer protocol to use. > # C: write IO is reported as completed, if we know it has > # reached _both_ local and remote DISK. > # * for critical transactional data. > # B: write IO is reported as completed, if it has reached > # local DISK and remote buffer cache. > # * for most cases. > # A: write IO is reported as completed, if it has reached > # local DISK and local tcp send buffer. (see also sndbuf-size) > # * for high latency networks > # > #********** > # uhm, benchmarks have shown that C is actually better than B. > # this note shall disappear, when we are convinced that B is > # the right choice "for most cases". > # Until then, always use C unless you have a reason not to. > # --lge > #********** > # > protocol C; > > handlers { > # what should be done in case the node is primary, degraded > # (=no connection) and has inconsistent data. > pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f"; > > # The node is currently primary, but lost the after split brain > # auto recovery procedure. As as consequence it should go away. > pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f"; > > # In case you have set the on-io-error option to "call-local-io-error", > # this script will get executed in case of a local IO error. It is > # expected that this script will case a immediate failover in the > # cluster. > local-io-error "echo o > /proc/sysrq-trigger ; halt -f"; > > # Commands to run in case we need to downgrade the peer's disk > # state to "Outdated". Should be implemented by the superior > # communication possibilities of our cluster manager. > # The provided script uses ssh, and is for demonstration/development > # purposis. > # outdate-peer "/usr/lib/drbd/outdate-peer.sh on amd 192.168.22.11 192.168.23.11 on alf 192.168.22.12 192.168.23.12"; > # > # Update: Now there is a solution that relies on heartbeat's > # communication layers. You should really use this. > outdate-peer "/usr/sbin/drbd-peer-outdater"; > } > > startup { > # Wait for connection timeout. > # The init script blocks the boot process until the resources > # are connected. This is so when the cluster manager starts later, > # it does not see a resource with internal split-brain. > # In case you want to limit the wait time, do it here. > # Default is 0, which means unlimited. Unit is seconds. > # > wfc-timeout 0; > > # Wait for connection timeout if this node was a degraded cluster. > # In case a degraded cluster (= cluster with only one node left) > # is rebooted, this timeout value is used. > # > degr-wfc-timeout 60; # 1 minute. > } > > disk { > # if the lower level device reports io-error you have the choice of > # "pass_on" -> Report the io-error to the upper layers. > # Primary -> report it to the mounted file system. > # Secondary -> ignore it. > # "call-local-io-error" > # -> Call the script configured by the name "local-io-error". > # "detach" -> The node drops its backing storage device, and > # continues in disk less mode. > # > on-io-error detach; > > # Controls the fencing policy, default is "dont-care". Before you > # set any policy you need to make sure that you have a working > # outdate-peer handler. Possible values are: > # "dont-care" -> Never call the outdate-peer handler. [ DEFAULT ] > # "resource-only" -> Call the outdate-peer handler if we primary and > # loose the connection to the secondary. As well > # whenn a unconnected secondary wants to become > # primary. > # "resource-and-stonith" > # -> Calls the outdate-peer handler and freezes local > # IO immediately after loss of connection. This is > # necessary if your heartbeat can STONITH the other > # node. > # fencing resource-only; > > # In case you only want to use a fraction of the available space > # you might use the "size" option here. > # > # size 10G; > } > > net { > # this is the size of the tcp socket send buffer > # increase it _carefully_ if you want to use protocol A over a > # high latency network with reasonable write throughput. > # defaults to 2*65535; you might try even 1M, but if your kernel or > # network driver chokes on that, you have been warned. > # sndbuf-size 512k; > > # timeout 60; # 6 seconds (unit = 0.1 seconds) > # connect-int 10; # 10 seconds (unit = 1 second) > # ping-int 10; # 10 seconds (unit = 1 second) > # ping-timeout 5; # 500 ms (unit = 0.1 seconds) > > # Maximal number of requests (4K) to be allocated by DRBD. > # The minimum is hardcoded to 32 (=128 kByte). > # For high performance installations it might help if you > # increase that number. These buffers are used to hold > # datablocks while they are written to disk. > # > # max-buffers 2048; > > # When the number of outstanding requests on a standby (secondary) > # node exceeds bdev-threshold, we start to kick the backing device > # to start its request processing. This is an advanced tuning > # parameter to get more performance out of capable storage controlers. > # Some controlers like to be kicked often, other controlers > # deliver better performance when they are kicked less frequently. > # Set it to the value of max-buffers to get the least possible > # number of run_task_queue_disk() / q->unplug_fn(q) calls. > # > # unplug-watermark 128; > > > # The highest number of data blocks between two write barriers. > # If you set this < 10 you might decrease your performance. > # max-epoch-size 2048; > > # if some block send times out this many times, the peer is > # considered dead, even if it still answers ping requests. > # ko-count 4; > > # If you want to use OCFS2/openGFS on top of DRBD enable > # this optione, and only enable it if you are going to use > # one of these filesystems. Do not enable it for ext2, > # ext3,reiserFS,XFS,JFS etc... > # allow-two-primaries; > > # This enables peer authentication. Without this everybody > # on the network could connect to one of your DRBD nodes with > # a program that emulates DRBD's protocoll and could suck off > # all your data. > # Specify one of the kernel's digest algorithms, e.g.: > # md5, sha1, sha256, sha512, wp256, wp384, wp512, michael_mic ... > # an a shared secret. > # Authentication is only done once after the TCP connection > # is establised, there are no disadvantages from using authentication, > # therefore I suggest to enable it in any case. > # cram-hmac-alg "sha1"; > # shared-secret "FooFunFactory"; > > # In case the nodes of your cluster nodes see each other again, after > # an split brain situation in which both nodes where primary > # at the same time, you have two diverged versions of your data. > # > # In case both nodes are secondary you can control DRBD's > # auto recovery strategy by the "after-sb-0pri" options. The > # default is to disconnect. > # "disconnect" ... No automatic resynchronisation, simply disconnect. > # "discard-younger-primary" > # Auto sync from the node that was primary before > # the split brain situation happened. > # "discard-older-primary" > # Auto sync from the node that became primary > # as second during the split brain situation. > # "discard-least-changes" > # Auto sync from the node that touched more > # blocks during the split brain situation. > # "discard-node-NODENAME" > # Auto sync _to_ the named node. > after-sb-0pri disconnect; > > # In one of the nodes is already primary, then the auto-recovery > # strategie is controled by the "after-sb-1pri" options. > # "disconnect" ... always disconnect > # "consensus" ... discard the version of the secondary if the outcome > # of the "after-sb-0pri" algorithm would also destroy > # the current secondary's data. Otherwise disconnect. > # "violently-as0p" Always take the decission of the "after-sb-0pri" > # algorithm. Even if that causes case an erratic change > # of the primarie's view of the data. > # This is only usefull if you use an 1node FS (i.e. > # not OCFS2 or GFS) with the allow-two-primaries > # flag, _AND_ you really know what you are doing. > # This is DANGEROUS and MAY CRASH YOUR MACHINE if you > # have a FS mounted on the primary node. > # "discard-secondary" > # discard the version of the secondary. > # "call-pri-lost-after-sb" Always honour the outcome of the "after-sb-0pri" > # algorithm. In case it decides the the current > # secondary has the right data, it panics the > # current primary. > # "suspend-primary" ??? > after-sb-1pri disconnect; > > # In case both nodes are primary you control DRBD's strategy by > # the "after-sb-2pri" option. > # "disconnect" ... Go to StandAlone mode on both sides. > # "violently-as0p" Always take the decission of the "after-sb-0pri". > # "call-pri-lost-after-sb" ... Honor the outcome of the "after-sb-0pri" > # algorithm and panic the other node. > > after-sb-2pri disconnect; > > # To solve the cases when the outcome of the resync descissions is > # incompatible to the current role asignment in the cluster. > # "disconnect" ... No automatic resynchronisation, simply disconnect. > # "violently" .... Sync to the primary node is allowed, violating the > # assumption that data on a block device is stable > # for one of the nodes. DANGEROUS, DO NOT USE. > # "call-pri-lost" Call the "pri-lost" helper program on one of the > # machines. This program is expected to reboot the > # machine. (I.e. make it secondary.) > rr-conflict disconnect; > > # DRBD-0.7's behaviour is equivalent to > # after-sb-0pri discard-younger-primary; > # after-sb-1pri consensus; > # after-sb-2pri disconnect; > } > > syncer { > # Limit the bandwith used by the resynchronisation process. > # default unit is kByte/sec; optional suffixes K,M,G are allowed. > # > # Even though this is a network setting, the units are based > # on _byte_ (octet for our french friends) not bit. > # We are storage guys. > # > # Note that on 100Mbit ethernet, you cannot expect more than > # 12.5 MByte total transfer rate. > # Consider using GigaBit Ethernet. > # > rate 10M; > > # Normally all devices are resynchronized parallel. > # To achieve better resynchronisation performance you should resync > # DRBD resources which have their backing storage on one physical > # disk sequentially. The express this use the "after" keyword. > # after "r2"; > > # Configures the size of the active set. Each extent is 4M, > # 257 Extents ~> 1GB active set size. In case your syncer > # runs @ 10MB/sec, all resync after a primary's crash will last > # 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds. > # BTW, the hash algorithm works best if the number of al-extents > # is prime. (To test the worst case performace use a power of 2) > al-extents 257; > } > > on amhs-1.tern.is { > device /dev/drbd0; > disk /dev/hda3; > address 10.10.10.217:7788; > flexible-meta-disk internal; > > # meta-disk is either 'internal' or '/dev/ice/name [idx]' > # > # You can use a single block device to store meta-data > # of multiple DRBD's. > # E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1]; > # for two different resources. In this case the meta-disk > # would need to be at least 256 MB in size. > # > # 'internal' means, that the last 128 MB of the lower device > # are used to store the meta-data. > # You must not give an index with 'internal'. > } > > on amhs-2.tern.is { > device /dev/drbd0; > disk /dev/sda3; > address 10.10.10.218:7788; > meta-disk internal; > } > } >