创建dockfile
创建docker 目录并指定docker pool 大小
mkdir -p /docker/docker/devicemapper/devicemapper
ln -s /docker/docker/ docker
dd if=/dev/zero of=/var/lib/docker/devicemapper/devicemapper/data bs=1G count=0 seek=800
写入:
FROM ubuntu
MAINTAINER yidugov
RUN apt-get update
RUN apt-get install -y openssh-server sudo vim python
RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config
RUN groupadd hadoop
RUN useradd -g hadoop -d /home/hadoop -m hadoop -s /bin/bash
RUN echo "hadoop:hadoop" | chpasswd
RUN echo "root ALL=(ALL) ALL" >> /etc/sudoers
RUN mkdir /var/run/sshd
RUN mkdir /usr/local/bigdata
ADD bigdata.tar.gz /usr/local/bigdata
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
保存退出
在当前目录执行:docker build -t='yidugov/hadoop' .
创建完成后执行:
docker run --cpuset-cpus=0-3 -m 30G --name yidu1 --hostname yidu1 -d -P -p 50070:50070 -p 8088:8088 yidugov/hadoop
--cpuset-cpus=0-3 :指定docker应用的cpu
-m 30G:指定docker应用的内存
docker run --name yidu1 --hostname yidu1 -d -P -p 50070:50070 -p 8088:8088
yidugov/hadoop
docker run --name yidu2 --hostname yidu2 -d -P
yidugov/hadoop
docker run --name yidu3 --hostname yidu3 -d -P
yidugov/hadoop
docker run --name yidu4 --hostname yidu4 -d -P
yidugov/hadoop
docker run --name yidu5 --hostname yidu5 -d -P
yidugov/hadoop
生成docker容器
执行:docker exec -it yidu1 /bin/bash
进入容器将环境变量:
HADOOP_HOME=/usr/local/bigdata/hadoop
ZOOKEEPER_HOME=/usr/local/bigdata/zookeeper
JAVA_HOME=/usr/local/bigdata/jdk
MAVEN_HOME=/usr/local/bigdata/maven
SCALA_HOME=/usr/local/bigdata/scala
SPARK_HOME=/usr/local/bigdata/spark
MYSQL_HOME=/usr/local/bigdata/mysql
SQOOP_HOME=
/usr/local/bigdata/sqoop
ORACLE_HOME=
/usr/local/bigdata/oracle
HIVE_HOME=/usr/local/bigdata/hive
PATH=$PATH:$HOME/bin:$HIVE_HOME/bin:$MYSQL_HOME/bin:$ORACLE_HOME:$SQOOP_HOME/bin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$JAVA_HOME/bin:$MAVEN_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin
export PATH
export JAVA_LIBRARY_PATH=/usr/local/bigdata/hadoop/lib/native
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export SPARK_EXAMPLES_JAR=/usr/local/bigdata/spark/lib/spark-examples-1.6.2-hadoop2.6.0.jar
export LD_LIBRARY_PATH=$ORACLE_HOME
写入用户目录下.bashrc 文件中
安装NOSQL软件
NoSQL计算平台搭建
1.安装操作系统
略
2.更新软件包
尤其注意gcc 和 glibc 两个包是否安装
使用:rpm -qa | grep gcc*
rpm -qa | grep glibc*
使用:yum update 更新软件包
3.创建用户
使用:
groupadd hadoop 创建用户组
useradd -g hadoop -d /home/hadoop -p hadoop hadoop 创建用户
4.设置免密登录
使用:
rm -rf /root/.ssh/* 清空密钥文件夹
ssh-keygen -t rsa 生成公钥
ssh-copy-id -i yidu1 将公钥复制到其他节点机器(本机也要执行免密登录)
ssh-copy-id -i yidu2 将公钥复制到其他节点机器(本机也要执行免密登录)
ssh-copy-id -i yidu3 将公钥复制到其他节点机器(本机也要执行免密登录)
5.修改Hostname
使用:vim /etc/hosts
添加节点机器IP及节点机器hostname
例如:192.168.18.150 yidu1
vim /etc/sysconfig/network
修改本机Hostname
HOSTNAME=yidu1
6.创建本地软件文件夹(可忽略,但建议执行本步骤)
mkdir -p /usr/local/software
mkdir -p /usr/local/bigdata
将软件上传至次文件夹内
本次所需软件有:
apache-hive-2.0.0-bin.tar.gz
hbase-1.1.4-bin.tar.gz(选装)
Python-2.7.11.tar.xz
spark-1.6.2-bin-hadoop2.6.tgz
apache-maven-3.3.9-bin.tar.gz
jdk-8u77-linux-x64.tar.gz
scala-2.12.0-RC1.tgz
zookeeper-3.4.8.tar.gz
hadoop-2.6.4.tar.gz
pip-8.1.2.tar.gz(Python 安装工具选装)
setuptools-26.0.0.zip(Python 安装工具选装)
使用:
tar -zxvf * -C /usr/local/bigdata
(将软件包解压缩到指定文件夹内)
将软件包改名方便修改环境变量
7.修改环境变量
使用:
vim /etc/profile
在文件结尾添加以下信息:
HADOOP_HOME=/usr/local/bigdata/hadoop
ZOOKEEPER_HOME=/usr/local/bigdata/zookeeper
JAVA_HOME=/usr/local/bigdata/jdk
MAVEN_HOME=/usr/local/bigdata/maven
SCALA_HOME=/usr/local/bigdata/scala
SPARK_HOME=/usr/local/bigdata/spark
PATH=$PATH:$HOME/bin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$JAVA_HOME/bin:$MAVEN_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin
export PATH
export JAVA_LIBRARY_PATH=/usr/local/bigdata/hadoop/lib/native
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export SPARK_EXAMPLES_JAR=/usr/local/bigdata/spark/lib/spark-examples-1.6.2-hadoop2.6.0.jar
执行:
source /etc/profile
8.修改软件配置信息
修改zookeeper:
进入zookeeper目录
cd $ZOOKEEPER_HOME/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
修改:dataDir=/usr/local/bigdata/zookeeper/data
结尾添加:
server.1=yidu1:2888:3888
server.2=yidu2:2888:3888
server.3=yidu3:2888:3888
mkdir $ZOOKEEPER_HOME/data
touch $ZOOKEEPER_HOME/data/myid
echo '1' > myid (节点1在myid中写入:1;节点二在myid中写入:2;节点三在myid中写入:3;)
修改hadoop:
进入:cd /usr/local/bigdata/hadoop/etc/hadoop/
主要修改:
core-site.xml
hadoop-env.sh
hdfs-site.xml
mapred-site.xml
slaves
yarn-env.sh
yarn-site.xml
core-site.xml
hadoop.tmp.dir
file:/usr/local/bigdata/hadoop/tmp
io.file.buffer.size
131072
fs.default.name
hdfs://yidu1:9000
hadoop.proxyuser.spark.hosts
*
hadoop.proxyuser.spark.groups
*
hadoop.native.lib
true
hadoop-env.sh
export JAVA_HOME=/usr/local/bigdata/jdk
hdfs-site.xml
dfs.namenode.secondary.http-address
yidu1:9001
dfs.namenode.name.dir
file:/usr/local/bigdata/hadoop/dfs/name
dfs.datanode.data.dir
file:/usr/local/bigdata/hadoop/dfs/data
dfs.replication
3
dfs.webhdfs.enabled
true
mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
yidu1:10020
mapreduce.jobhistory.webapp.address
yidu1:19888
slaves
yidu2(节点hostname)
yidu3(节点hostname)
yarn-env.sh
JAVA_HOME=/usr/local/bigdata/jdk
yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.resourcemanager.address
yidu1:8032
yarn.resourcemanager.scheduler.address
yidu1:8030
yarn.resourcemanager.resource-tracker.address
yidu1:8035
yarn.resourcemanager.admin.address
yidu1:8033
yarn.resourcemanager.webapp.address
yidu1:8088
mkdir -p /usr/local/bigdata/hadoop/dfs/name
mkdir -p /usr/local/bigdata/hadoop/dfs/data
mkdir -p /usr/local/bigdata/hadoop/tmp
hdfs namenode -format
hadoop fs -mkdir -p /usr/hive/warehouse
hadoop fs -mkdir -p /usr/hive/tmp
修改Spark配置
cd /usr/local/bigdata/spark/conf
cp slaves.template slaves
vim slaves
添加:
yidu2
yidu3
cp spark-env.sh.template spark-env.sh
vim spark-env.sh
结尾添加:export SCALA_HOME=/usr/local/bigdata/scala
重启机器
9.启动软件
重启后切换到hadoop用户
执行:
zkServer.sh start (三台机器都执行)
zkServer.sh status (查看zkserver运行状态)
start-all.sh
jps (查看运行状态)
使用浏览器:输入:http://Master:8088 查看是否可以打开
使用浏览器:输入:http://Master:8080 查看是否可以打开
全部可以打开证明软件安装成功
10.使用spark进行计算
执行:
hadoop fs -mkdir /data 创建hdfs文件夹
将spark文件夹下的README.md上传到hdfs上
hadoop fs -put README.md /data
计算:
执行:spark-shell
sc
val file = sc.textFile("hdfs://yidu1:9000/data/README.md")
val sparks=file.filter(line => line.contains("Spark"))
sparks.count
返回:
res1: Long = 17
安装MYSQL:
1、groupadd mysql ## 添加一个mysql组
2、useradd -r -g mysql mysql ## 添加一个用户
3、解压缩下载的包,tar -xzvf /data/software/mysql-5.7.13-linux-glibc2.5-x86_64.tar.gz
4、然后 mv 解压后的包 mysql ##相当于重命名
5、 chown -R mysql:mysql ./ ##进入mysql包中, 给这个包授权 给mysql
6、bin/mysqld --initialize --user=hadoop --basedir=/usr/local/bigdata/mysql --datadir=/usr/local/bigdata/mysql/data ##进入mysql文件名 basedir 为mysql 的路径, datadir 为mysql的 data 包,里面存放着mysql自己的包, 如user
重要:此处需要注意记录生成的临时密码,如上文:YLi>7ecpe;YP
7、bin/mysql_ssl_rsa_setup --datadir=/usr/local/mysql/data
8、进入mysql support-files
9、cp my-default.cnf /etc/my.cnf
10、cp mysql.server /etc/init.d/mysql
11、vim /etc/init.d/mysql ##修改basedir= 自己的路径 修改datadir= 自己的路径
12、bin/mysqld_safe --user=mysql & ## 启动mysql
13、bin/mysql --user=root –p
14、 输入临时密码
15、set password=password('A123456');
16、grant all privileges on *.* to root@'%' identified by 'A123456';
17、flush privileges;
18、use mysql;
19、select host,user from user;
20.创建HIVE用户
create database hive;
grant all on hive.* to hive@'%' identified by 'hive';
grant all on hive.* to hive@'localhost' identified by 'hive';
flush privileges;
初始化hive:
schematool -initSchema -dbType mysql
安装SQOOP:
修改/usr/local/bigdata/sqoop/conf/sqoop-env.sh
export HADOOP_COMMON_HOME=/usr/local/bigdata/hadoop
export HADOOP_MAPRED_HOME=/usr/local/bigdata/hadoop
export HIVE_HOME=/usr/local/bigdata/hive
export ZOOCFGDIR=/usr/local/bigdata/zookeeper
修改/usr/local/bigdata/sqoop/bin/configure-sqoop
将没有用到的软件检查注释掉
将JDBC拷贝到
/usr/local/bigdata/sqoop/lib 下 例如 Oracle:
cp $ORACLE_HOME/ojdbc*.jar $SQOOP_HOME/lib
测试SQOOP:sqoop list-tables --connect jdbc:oracle:thin:@192.168.10.35:1521:yidugov --username MAYOR --password yiducloud
数据导入:sqoop import --connect jdbc:oracle:thin:@192.168.10.35:1521:yidugov --username MAYOR --password yiducloud --table B_SM --fields-terminated-by ':' -m 1