1、安装JDK
su grid
sudo -i
passwd root
sudo dpkg -l
sudo dpkg -l | grep wget
cd /usr
mkdir java
tar -zxvf eclipse-SDK-4.2-linux-gtk.tar.gz
cp ~/Desktop/jdk-6u24-linux-i586.bin /usr/java
./jdk-6u24-linux-i586.bin
ln -s /usr/java/jdk-xxx /usr/jdk
vi /etc/profile
JAVA_HOME=/usr/jdk
CLASSPATH=$JAVA_HOME/lib
PATH=$PATH:$JAVA_HOME/bin
export JAVA_HOME CLASSPATH PATH
source /etc/profile
java -version
2、安装SSH
ssh
1.安装openssh-server
sudo dpkg -i openssh-client_1%3a5.5p1-4ubuntu6_i386.deb
sudo dpkg -i openssh-server_1%3a5.5p1-4ubuntu6_i386.deb
如果连网的情况下可以sudo apt-get install openssh-server进行安装
2.设置一下ip
sudo ifconfig eth0 192.168.1.222 netmask 255.255.255.0
3. 启动ssh-server。
$ /etc/init.d/ssh restart
service ssh restart
4. 确认ssh-server已经正常工作。
$ netstat -tlp
tcp6 0 0 *:ssh *:* LISTEN -
看到上面这一行输出说明ssh-server已经在运行了。
5. 在客户端通过ssh登录服务器。假设服务器的IP地址是192.168.0.103,登录的用户名是hyx。
$ ssh -l hyx 192.168.0.103
接下来会提示输入密码,然后就能成功登录到服务器上了
ssh 192.168.58.133
grid
3、配置hosts
sudo chmod 777 hosts
vi /etc/hosts
192.168.58.131 grid131
192.168.58.132 grid132
192.168.58.133 grid133
C:\Windows\System32\drivers\etc\hosts
4、配置hadoop
hadoop-env.sh
export JAVA_HOME=/usr/jdk
core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://grid131:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/grid/hadoop/tmp</value><!-- 指定零食目录,否则用采用系统的临时目录,而系统的临时目录重启会清空 -->
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>grid131:9001</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value><!-- 复制因子,不能大于slaves配置的节点数 -->
</property>
</configuration>
masters
grid131 secondNameNode
slaves
grid132 dataNode
grid133 dataNode
5、拷贝hadoop
scp -r /home/grid/hadoop-1.2.1/ grid132:/home/grid
scp -r /home/grid/hadoop-1.2.1/ grid133:/home/grid
6、配置SSH免密
ssh-keygen -t rsa
cp id_rsa.pub authorized_keys
scp authorized_keys grid132:/home/grid/.ssh
ssh grid132
cd ~/.ssh
cat id_rsa.pub >> authorized_keys
7、关闭防火墙
centos
chkconfig iptables off 不重启不会生效:
service iptables stop 不需要重启
ubuntu
ufw disable
sudo iptables -F
sudo ufw status
8、启动hadoop集群
vi /etc/profile
PATH=$PATH:$JAVA_HOME/bin:/home/grid/hadoop-1.2.1/bin
source /etc/profile
cd ~/hadoop-1.2.1
hadoop namenode -format
start-all.sh
jps
hadoop fs -ls
cd ~
mkdir input
echo 'hi hadoop' > text.txt
hadoop fs -put ~/input/ in33
hadoop fs -ls in33
hadoop fs -cat in33/text.txt
hadoop jar hadoop-examples-1.2.1.jar wordcount in33 out33
hadoop jar WordCount.jar com.test.WordCount input7 output7
http://grid131:50070/dfshealth.jsp
http://grid131:50030/jobtracker.jsp