此博文用adfly作为短链与外链,第一是防止爬虫随便爬着玩,访问共享页面;第二是有效避免其它童鞋浪费时间在这上面(收藏癖啊神马的...),第三是adfly弄着玩玩,看看GFW的功能如何。
编号 | 名称 | adfly跳转地址 | 访问密码 |
1 | 教程合集(屏幕录像专家EXE) | http://adf.ly/wLYu6 | hfve |
2 | Ubuntu Server 14.04 x64 | http://adf.ly/wLYnT | |
3 | VMWare Workstation 10.0.0 | http://adf.ly/wLYpY | |
4 | Hadoop 2.5.0 | http://adf.ly/wLYz3 | |
5 | HBase 0.98.5 | http://adf.ly/wLYzs | |
6 | JDK 7u67x64 | http://adf.ly/wLZ1D | |
7 | zookeeper 3.4.6 | http://adf.ly/wLZ3W | |
8 | |||
9 |
# set java environment export JAVA_HOME=/root/java/jdk1.7.0_67 export JRE_HOME=/root/java/jdk1.7.0_67/jre export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH #set hadoop path export HADOOP_HOME=/root/hadoop/hadoop-2.5.0 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export CLASSPATH=.:$HADOOP_HOME/lib:$CLASSPATH #set zookeeper path export ZOOKEEPER_HOME=/root/zookeeper/zookeeper-3.4.6 export PATH=$PATH:$ZOOKEEPER_HOME/bin export CLASSPATH=.:$ZOOKEEPER_HOME/lib:$CLASSPATH
#127.0.0.1 localhost #127.0.1.1 namenode1.localdomain namenode1 # The following lines are desirable for IPv6 capable hosts #::1 localhost ip6-localhost ip6-loopback #ff02::1 ip6-allnodes #ff02::2 ip6-allrouters 10.196.80.31 namenode1 10.196.80.32 namenode2 10.196.80.33 namenode3 10.196.80.41 datanode1 10.196.80.42 datanode2 10.196.80.43 datanode3 10.196.80.44 datanode4 10.196.80.45 datanode5
# This file describes the network interfaces available on your system # and how to activate them. For more information, see interfaces(5). # The loopback network interface auto lo iface lo inet loopback # The primary network interface auto eth0 iface eth0 inet static address 10.196.80.45 gateway 10.196.80.130 netmask 255.255.255.0 network 10.196.80.0 broadcast 10.196.80.255
# The number of milliseconds of each tick tickTime=2000 # The number of ticks that the initial # synchronization phase can take initLimit=10 # The number of ticks that can pass between # sending a request and getting an acknowledgement syncLimit=5 # the directory where the snapshot is stored. # do not use /tmp for storage, /tmp here is just # example sakes. dataDir=/root/zookeeper/zookeeper-3.4.6/data # the port at which the clients will connect clientPort=2181 # the maximum number of client connections. # increase this if you need to handle more clients #maxClientCnxns=60 # # Be sure to read the maintenance section of the # administrator guide before turning on autopurge. # # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance # # The number of snapshots to retain in dataDir #autopurge.snapRetainCount=3 # Purge task interval in hours # Set to "0" to disable auto purge feature #autopurge.purgeInterval=1 server.1=namenode1:2888:3888 server.2=namenode2:2888:3888 server.3=namenode3:2888:3888
# Define some default values that can be overridden by system properties zookeeper.root.logger=INFO, CONSOLE zookeeper.console.threshold=INFO zookeeper.log.dir=/root/log/zookeeper/ zookeeper.log.file=zookeeper.log zookeeper.log.threshold=DEBUG zookeeper.tracelog.dir=. zookeeper.tracelog.file=zookeeper_trace.log
根据不同的namenode,写入不同的数字
#第一个节点的myid是1 ssh namenode1 echo 1 >> /root/zookeeper/zookeeper-3.4.6/data/myid #第三个节点的myid是2 ssh namenode2 echo 2 >> /root/zookeeper/zookeeper-3.4.6/data/myid #第三个节点的myid是3 ssh namenode3 echo 3 >> /root/zookeeper/zookeeper-3.4.6/data/myid
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <configuration> <!--指定数据冗余份数--> <property> <name>dfs.replication</name> <value>3</value> </property> <!-- Step 1. dfs.nameservices注意与core-site.xml中的fs.defaultFS中的value保持一致--> <!--指定hdfs的nameservice为gagcluster,需要和core-site.xml中的保持一致 --> <property> <name>dfs.nameservices</name> <value>gagcluster</value> </property> <!-- Step 2. dfs.ha.namenodes.mycluster每个namenode在名称服务中的唯一标识--> <!-- ns1下面有两个NameNode,分别是nn1,nn2 --> <property> <name>dfs.ha.namenodes.gagcluster</name> <value>namenode1,namenode2</value> </property> <!-- Step 3.两个结点的rpc地址--> <!-- nn1的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.gagcluster.namenode1</name> <value>namenode1:9000</value> </property> <!-- nn2的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.gagcluster.namenode2</name> <value>namenode2:9000</value> </property> <!-- Step 4. servicerpc地址--> <!-- nn1的servicerpc通信地址 --> <property> <name>dfs.namenode.servicerpc-address.gagcluster.namenode1</name> <value>namenode1:53310</value> </property> <!-- nn2的servicerpc通信地址 --> <property> <name>dfs.namenode.servicerpc-address.gagcluster.namenode2</name> <value>namenode2:53310</value> </property> <!-- Step 5.http通信地址--> <!-- nn1的http通信地址 --> <property> <name>dfs.namenode.http-address.gagcluster.namenode1</name> <value>namenode1:50070</value> </property> <!-- nn2的http通信地址 --> <property> <name>dfs.namenode.http-address.gagcluster.namenode2</name> <value>namenode2:50070</value> </property> <!-- Step 6.我们采用3个journalnode节点存储元数据,这是他们的IP与端口 --> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://namenode1:8485;namenode2:8485;namenode3:8485/gagcluster</value> </property> <!-- Step 7. journaldata的存储路径 --> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <property> <name>dfs.journalnode.edits.dir</name> <value>/root/data/DFS_JOURNALNODE_EDITS_DIR</value> </property> <!-- Step 8.该类用来判断哪个namenode处于生效状态 --> <!-- 配置失败自动切换实现方式 --> <property> <name>dfs.client.failover.proxy.provider.gagcluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- Step 9.打开自动切换namenode的功能 --> <!--指定支持高可用自动切换机制--> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!-- Step 10. 运行脚本实现安全机制 --> <!-- 配置隔离机制 --> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <!--指定namenode名称空间的存储地址--> <property> <name>dfs.namenode.name.dir</name> <value>/root/data/DFS_NAMENODE_NAME_DIR</value> </property> <!--指定datanode数据存储地址--> <property> <name>dfs.datanode.data.dir</name> <value>/root/data/DFS_DATANODE_DATA_DIR</value> </property> <!-- 使用隔离机制时需要ssh免密码登陆 --> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> </configuration>
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <configuration> <!-- 指定hdfs的nameservice为gagcluster,是NameNode的URI。hdfs://主机名:端口/ --> <property> <name>fs.defaultFS</name> <value>hdfs://gagcluster</value> </property> <!-- 指定hadoop临时目录 --> <property> <name>hadoop.tmp.dir</name> <value>/root/data/HADOOP_TMP_DIR</value> <description>Abase for other temporary directories.</description> </property> <!-- 指定zookeeper地址 --> <property> <name>ha.zookeeper.quorum</name> <value>namenode1:2181,namenode2:2181,namenode3:2181</value> </property> <!--指定所有用户可以访问--> <property> <name>hadoop.proxyuser.hduser.groups</name> <value>*</value> </property> <!--指定buffer size--> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <!--指定可以在任何IP访问--> <property> <name>hadoop.proxyuser.hduser.hosts</name> <value>*</value> </property> </configuration>
hadoop.root.logger=INFO,console hadoop.log.dir=/root/log/hadoop/ hadoop.log.file=hadoop.log
# The java implementation to use. export JAVA_HOME=/root/java/jdk1.7.0_67/
namenode1 zkServer.sh start
namenode2 zkServer.sh start
namenode3 zkServer.sh start
namenode1 hadoop/bin/hdfs zkfc -formatZK
namenode1 hadoop-daemon.sh start journalnode
namenode2 hadoop-daemon.sh start journalnode
namenode3 hadoop-daemon.sh start journalnode
namenode1 hadoop/bin/hdfs namenode -format
namenode1 hadoop-daemon.sh start namenode
namenode2 hdfs namenode -bootstrapStandby
namenode2 hadoop-daemon.sh start namenode
datanode1-5 hadoop-daemon.sh start datanode (分别在这些节点上启动datanode,当然也有更好的方法)
namenode1 hadoop-daemon.sh start zkfc
namenode2 hadoop-daemon.sh start zkfc
namenode3 hadoop-daemon.sh start zkfc
namenode1 zkServer.sh start
namenode2 zkServer.sh start
namenode3 zkServer.sh start
namenode1 hadoop/bin/hdfs zkfc -formatZK不需要执行(?)
namenode1 hadoop-daemon.sh start journalnode
namenode2 hadoop-daemon.sh start journalnode
namenode3 hadoop-daemon.sh start journalnode
namenode1 hadoop/bin/hdfs namenode -format不需要执行
namenode1 hadoop-daemon.sh start namenode
namenode2 hdfs namenode -bootstrapStandby
namenode2 hadoop-daemon.sh start namenode
datanode1-5 hadoop-daemon.sh start datanode (分别在这些节点上启动datanode,当然也有更好的方法)
namenode1 hadoop-daemon.sh start zkfc
namenode2 hadoop-daemon.sh start zkfc
namenode3 hadoop-daemon.sh start zkfc
http://namenode1:50070 显示应该是active
http://namenode2:50070 显示应该是standby