[大数据]hadoop 联邦机制 及 配置文件

hadoop 联邦机制

1 原理

即多个ha 联合组成一套集群环境,对外提供一致接口【ViewFs】(相当于扩容)
即cluserID 相等 但是相同的datanode服务于不同的namenode。
原理是通过namenodeID和blockPool-id来判别文件存放的位置。(不同的namenode存放不同的文件夹)

原理图:
[大数据]hadoop 联邦机制 及 配置文件_第1张图片

2 配置文件

1 vi core-site.xml

<configuration>

<property>
<name>fs.defaultFSname>
<value>viewfs:///value>
property>

<property>
<name>fs.viewfs.mounttable.default.link./biname>
<value>hdfs://bi/value>
property>

<property>
<name>fs.viewfs.mounttable.default.link./dtname>
<value>hdfs://dt/value>
property>


<property>
<name>hadoop.tmp.dirname>
<value>/home/hadoop/apps/hdpdata/value>
property>


<property>
<name>ha.zookeeper.quorumname>
<value>mini5:2181,mini6:2181,mini7:2181value>
property>
configuration>

2 vi hdfs-core.xml

<configuration>

<property>
<name>dfs.nameservicesname>
<value>bi,dtvalue>
property>

<property>
<name>dfs.ha.namenodes.biname>
<value>nn1,nn2value>
property>

<property>
<name>dfs.ha.namenodes.dtname>
<value>nn3,nn4value>
property>


<property>
<name>dfs.namenode.rpc-address.bi.nn1name>
<value>mini1:9000value>
property>

<property>
<name>dfs.namenode.http-address.bi.nn1name>
<value>mini1:50070value>
property>

<property>
<name>dfs.namenode.rpc-address.bi.nn2name>
<value>mini2:9000value>
property>

<property>
<name>dfs.namenode.http-address.bi.nn2name>
<value>mini2:50070value>
property>


<property>
<name>dfs.namenode.rpc-address.dt.nn3name>
<value>mini3:9000value>
property>

<property>
<name>dfs.namenode.http-address.dt.nn3name>
<value>mini3:50070value>
property>

<property>
<name>dfs.namenode.rpc-address.dt.nn4name>
<value>mini4:9000value>
property>

<property>
<name>dfs.namenode.http-address.dt.nn4name>
<value>mini4:50070value>
property>






<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://mini5:8485;mini6:8485;mini7:8485/bivalue>
property>

<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://mini5:8485;mini6:8485;mini7:8485/dtvalue>
property>



<property>
<name>dfs.journalnode.edits.dirname>
<value>/home/hadoop/apps/hdpdata/journaldatavalue>
property>

<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>



<property>
<name>dfs.client.failover.proxy.provider.biname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>

<property>
<name>dfs.client.failover.proxy.provider.dtname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>


<property>
<name>dfs.ha.fencing.methodsname>
<value>
sshfence
shell(/bin/true)
value>
property>

<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/home/hadoop/.ssh/id_rsavalue>
property>

<property>
<name>dfs.ha.fencing.ssh.connect-timeoutname>
<value>30000value>
property>
configuration>

3 vi mapred-site.xml

<configuration>

<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
configuration>    

4 vi yarn-site.xml

<configuration>

<property>
    <name>yarn.resourcemanager.ha.enabledname>
    <value>truevalue>
property>

<property>
    <name>yarn.resourcemanager.cluster-idname>
    <value>yrcvalue>
property>

<property>
    <name>yarn.resourcemanager.ha.rm-idsname>
    <value>rm1,rm2value>
property>

<property>
    <name>yarn.resourcemanager.hostname.rm1name>
    <value>mini3value>
property>
<property>
    <name>yarn.resourcemanager.hostname.rm2name>
    <value>mini4value>
property>

<property>
    <name>yarn.resourcemanager.zk-addressname>
    <value>mini5:2181,mini6:2181,mini7:2181value>
property>
<property>
    <name>yarn.nodemanager.aux-servicesname>
    <value>mapreduce_shufflevalue>
property>
configuration>    

3 初始化步骤

先启动zookeeper集群
再在5/6/7上启动journalnode
hadoop-daemon.sh start journalnode


在bi下nn1上  
hdfs namenode -format –clusterID cyberspace
hdfs zkfc -formatZK
拷贝元数据目录到standby(nn2)

在dt下nn3上  
hdfs namenode -format –clusterID cyberspace   ###clusterID必须与bi的相同
hdfs zkfc -formatZK
拷贝元数据目录到standby(nn4)

在bi下nn1上
sbin/start-dfs.sh

在resoucemanager配置的主机上启动yarn
sbin/start-yarn.sh

你可能感兴趣的:(明治维新---大数据挖掘)