cloudera CDH5 Hadoop集群全套安装部署流程

CDH install

[detail installing PDF]($ sudo ./cloudera-manager-installer.bin)

official

create virtual machine

eth0: hostonly

eth1: bridge

configure network and host

  • set ethX name and write down hardware address

    /etc/udev/rules.d/70-persistent-net.rules

  • /etc/sysconfig/network-scripts/ifcfg-eth0

    DEVICE=eth0
    HWADDR=08:00:27:0F:B6:C1
    TYPE=Ethernet
    UUID=be125edc-4dda-4478-964b-285409fdc735
    ONBOOT=yes
    NM_CONTROLLED=yes
    BOOTPROTO=static
    IPADDR=192.168.56.2

    /etc/sysconfig/network-scripts/ifcfg-eth1

    DEVICE=eth1
    HWADDR=08:00:27:9D:D9:E6
    TYPE=Ethernet
    UUID=be125edc-4dda-4478-964b-285409fdc735
    ONBOOT=yes
    NM_CONTROLLED=yes
    BOOTPROTO=dhcp
  • set host name and gateway

$ vi /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=master
#GATEWAY=192.168.1.1
  • hosts
$vi /etc/hosts 
192.168.56.2 master
192.168.56.3 slave1

install perl

yum install perl

create user

sudo useradd -m cloudera
passwd cloudera

add to sudoer

visudo

无密码sudo权限

cloudera ALL=(root)NOPASSWD:ALL

trun off firewalls

  • disable the selinux at all nodes
vi /etc/selinux/config
SELINUX=disabled #carfore! not SELINUXTYPE!!!
  • close iptables firewall
chkconfig iptables off
chkconfig ip6tables off
service iptables stop

Time

date #view the time
#set all node's time zone to shanghai
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
#update time
yum install ntp
ntpdate pool.ntp.org
#update time manually
date -s 02/25/17
date -s 19:21:00
hwclock -w #white time to BIOS
chkconfig ntpd on

install JDK(alternative)

remove open jdk

java -version
rpm -qa | grep java

rpm -e --nodeps java-1.5.0-gcj-1.5.0.0-29.1.el6.x86_64
rpm -e --nodeps java-1.6.0-openjdk-1.6.0.0-1.66.1.13.0.el6.x86_64
rpm -e --nodeps java-1.6.0-openjdk-devel-1.6.0.0-1.66.1.13.0.el6.x86_64

install JDK

supported version

CDH5 -> jdk-7u55-linux-x64

jdk7

jdk8

mkdir /usr/java/jdk-version
cd /usr/java/jdk-version
tar zxvf jdk.1.7.0_nn.tar.gz
ln -s /usr/java/jdk.1.7.0_nn /usr/java/default
#or rpm -ivh jdk-7u55-linux-x64.rpm

vi /home/cloudera/.bash_profile

export JAVA_HOME=/usr/java/default
export CLASSPATH=.:$JAVA_HOME/lib:$CLASSPATH  
export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin  
source  /home/hadoop/.bash_profile
rm /usr/java/default.tar.gz
java -version

Ubuntu:

sudo apt-get -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -y install oracle-j2sdk1.7

这步如果跳过的话之后在安装的时候就勾上要装JDK

clone virtual machine

clone and configure:

network configure

  1. delete /etc/udev/rules.d/70-persistent-net.rules and reboot, or delete primary eth0 & eth1 change eth3 ð4 to eth0 & eth1

    or you can
    remove modprobe: modprobe -r e1000
    reload modprobe: modprobe e1000

  2. change name and write down hardware address in

    /etc/udev/rules.d/70-persistent-net.rules

  3. change DEVICE, HWADDR, IPADDR in

    /etc/sysconfig/network-scripts/ifcfg-eth0

    restart network: service network restart

  4. change host name:

    in /etc/sysconfig/network

    HOSTNAME=

  5. write host file

    /etc/hosts

generate ssh key

cd /root/.ssh
ssh-keygen

then send the id_rsa.pub from all nodes(including master) to master

ssh-copy-id master
#or send manually
cat ~/.ssh/id_rsa.pub | ssh hadoop@CDHNode1 'cat >> ~/.ssh/authorized_keys'

finally sent this authorized_keys from master to all of the nodes

scp -r authorized_keys cloudera@slave1
...

理解rsa认证:

我有一对玉佩,我送给你其中一个玉佩,我自己拿着另一个玉佩,等我下次到你家的时候我给你家的保安看我这个玉佩就知道是我来了而不是小偷。

mysql

hadoop 本身需要数据库来维护,选择mysql来做这个事

yum install -y mysql-server mysql mysql-devel
#/usr/bin/mysqladmin -u root 'old_password' 'new-password'
#SET PASSWORD = Password('password');
chkconfig mysqld on
service mysqld start
cp  /usr/my.cnf  /etc/my.cnf 

vi /etc/my.cnf

#bind-address           = 127.0.0.1
max_connections  = 400 #(db_number*100+50)
#hive
create database hive DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on hive.* to 'hive'@'master' identified by 'qwerqwer' with grant option;

#activity monitor
create database amon DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on amon.* to 'amon'@'master' identified by 'qwerqwer' with grant option;

#Hue
create database hue DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on hue.* to 'hue'@'master' identified by 'qwerqwer' with grant option;

#Oozie Server
create database oozie_server DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on oozie_server.* to 'oozie_server'@'master' identified by 'qwerqwer' with grant option;

#smon
create database smon DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on smon.* to 'smon'@'master' identified by 'qwerqwer' with grant option;

#hmon
create database hmon DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on hmon.* to 'hmon'@'master' identified by 'qwerqwer' with grant option;

#hmon
create database hmon DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
grant all privileges on hmon.* to 'hmon'@'master' identified by 'qwerqwer' with grant option;
#root
grant all privileges on *.* to 'root'@'master' identified by 'qwerqwer' with grant option;
grant SELECT on *.* to 'root'@'%' identified by 'qwerqwer' with grant option;

#show grants
show grants;

JDBC

yum install mysql-connector-java -y

download cloudera maneger

download CM

$ wget http://archive.cloudera.com/cm5/installer/latest/cloudera-manager-installer.bin
$ chmod u+x cloudera-manager-installer.bin
$ sudo ./cloudera-manager-installer.bin
#and next next ...

or

#prepare
su -c 'rpm -Uvh http://download.fedoraproject.org/pub/epel/5/i386/epel-release-5-4.noarch.rpm'
#centos
wget https://archive.cloudera.com/cm5/redhat/5/x86_64/cm/cloudera-manager.repo
sudo cp cloudera-manager.repo /etc/yum.repos.d/

#ubuntu
wget https://archive.cloudera.com/cm5/debian/squeeze/amd64/cm/cloudera.list
sudo cp cloudera-manager.repo /etc/apt/sources.list.d/ 
sudo apt-get update

do the next step in browser

open http://192.168.your.ip:7180 in browser
default account and password is admin, admin

specify hosts for your CDH cluster installation

Add your hostnames as defined in your /etc/hosts file of all servers.

master slave1 slave2

provide SSH login credentials

upload id_rsa.pub from base server

or

use same username password
on all the nodes

choose the services that you want to start on your cluster

CDH5

All Service
choose mysql as the database
Host name: master
Database type: mysql
Database name:
use name:
password:

start and stop

sudo service cloudera-scm-server start
sudo service cloudera-scm-server stop
sudo service cloudera-scm-server restart
sudo service cloudera-scm-agent start
sudo service cloudera-scm-agent stop
sudo service cloudera-scm-agent restart
sudo service cloudera-scm-agent status

add to startup

in master

chkconfig --add cloudera-scm-server
chkconfig cloudera-scm-server on
chkconfig --list cloudera-scm-server
chkconfig httpd  on

in slaves

chkconfig --add cloudera-scm-agent
chkconfig cloudera-scm-agent on
chkconfig --list cloudera-scm-agent

test installing

testing the install

Running a MapReduce Job

Parcel installation

sudo -u hdfs hadoop jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 10 100

Package installation

 sudo -u hdfs hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 10 100

View the results of running the job by selecting Activities > MAPREDUCE-1 Jobs from the top navigation bar:

你可能感兴趣的:(Linux,数据科学)