{服务器_ip1} node1
{服务器_ip2} node2
{服务器_ip3} node3
sudo apt-get update
sudo apt-get install openssh-client
sudo apt-get install openssh-server
sudo apt install net-tools
sudo service ssh start
sudo ps -e | grep ssh
sudo vi /etc/ssh/sshd_config
* sshd_config里增加:PermitRootLogin yes
/etc/init.d/ssh restart
* 注意登录名不是root,而是你安装的时候取的名字
ssh-keygen -t rsa
* 三次回车
cd ~/.ssh
cat id_rsa.pub >> authorized_keys
ssh localhost
- 检查状态 : sudo ufw status
- 禁用防火墙: sudo ufw disable
- 启用防火墙:sudo ufw enable
echo "set nocp" >> ~/.vimrc
source ~/.vimrc
tar -zxvf jdk-8u271-linux-x64.tar.gz
sudo vi /etc/profile
在/etc/profile里增加:
#set Java environment
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
退出后,执行:
source /etc/profile
java -version
sudo apt-get install gcc automake autoconf libtool make
sudo apt-get install ubuntu-make
* E: Could not get lock /var/lib/dpkg/lock-frontend - open (11: Resource temporarly unavailable)
* E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), is an other process using it?
删除锁定文件,就行了:
sudo rm /var/lib/dpkg/lock-frontend
sudo rm /var/lib/dpkg/lock
sudo apt-get install redis-server # 安装命令
ps -aux | grep redis # 查看进程
netstat -nlt | grep 6379 # 查看状态
sudo /etc/init.d/redis-server status # 可以看到Redis服务器状态
tar -zxvf redis-6.0.9.tar.gz
/usr/local/hello/redis/redis-6.0.9
sudo make
失败则 使用 sudo make MALLOC=libc
后再sudo make
sudo make test
这段运行时间会较长/usr/bin/
目录sudo make install
redis-server
按ctrl+c停止 mkdir /etc/redis
cp redis/redis.conf /etc/redis/6379.conf
bind 127.0.0.1 ---> bind 0.0.0.0
daemonize no ---> daemonize yes
requiredpassword 123456 #默认为空,可以不用改
redis/utils/redis_init_script
文件 cp redis_init_script /etc/init.d/redisd
update-rc.d redisd defaults
service redisd start
sudo service redisd start
redis-server redis.conf
redis-cli -p 6379 -a 123456(自己的密码,没有密码就不用打)
tar -zxvf /usr/local/hello/zookeeper/apache-zookeeper-3.5.8-bin.tar.gz
sudo vi /etc/profile
#set zookeeper environment
export ZOOKEEPER_HOME=/usr/local/hello/zookeeper/apache-zookeeper-3.5.8-bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile
sudo mkdir /usr/local/hello/zookeeper/data
/usr/local/hello/zookeeper/apache-zookeeper-3.5.8-bin/conf
下sudo cp -v zoo_sample.cfg zoo.cfg
sudo vi zoo.cfg
dataDir = /usr/local/hello/zookeeper/data
/usr/local/hello/zookeeper/data
里面的myidsudo bin/zkServer.sh start
sudo bin/zkCli.sh
tar -zxvf /usr/local/hello/kafka/kafka_2.13-2.6.0.tgz
sudo vi /usr/local/hello/kafka/kafka_2.13-2.6.0/config/server.properties
broker.id=1
log.dir=/usr/local/hello/kafka/logs
bin/zookeeper-server-start.sh -daemon config/zookeeper.properties
bin/kafka-server-start.sh -daemon config/server.properties
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
bin/kafka-topics.sh --list --zookeeper localhost:2181
tar -xf mysql-server_5.7.31-1ubuntu18.04_amd64.deb-bundle.tar
apt-get install -y libaio1
apt-get install -y libmecab2
sudo dpkg-preconfigure mysql-community-server_*.deb
sudo dpkg -i mysql-common_*.deb
sudo dpkg -i mysql-community-client_*.deb
sudo dpkg -i mysql-client_*.deb
sudo dpkg -i mysql-community-server_*.deb
mysql -uroot –p{密码是前面设置的}
grant all privileges on *.* to 'root'@'%'identified by '密码' with grant option;
exit;
sudo vi mysqld.cnf
bind-address = 127.0.0.1
systemctl restart mysql.service
bash Anaconda3-2020.07-Linux-x86_64.sh
/root/anaconda3
pip install numpy
pip install pandas
pip install -U scikit-learn
pip install install PyMySQL
参考链接:
解压命令:tar -zxvf hadoop-3.1.4.tar.gz
设置环境变量
sudo vi /etc/profile
#set hadoop environment
export HADOOP_HOME=/usr/local/hello/hadoop/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
source /etc/profile
验证:hadoop version
修改 hadoop/etc/hadoop/core-site.xml
hadoop.tmp.dir
file:/usr/local/hello/hadoop/tmp
Abase for other temporary directories.
fs.defaultFS
hdfs://node1:9000
修改 hadoop/etc/hadoop/hdfs-site.xml
dfs.replication
3
dfs.namenode.secondary.http-address
node3:50090
dfs.namenode.name.dir
file:/usr/local/hello/hadoop/tmp/dfs/name
dfs.datanode.data.dir
file:/usr/local/hello/hadoop/tmp/dfs/data
dfs.permissions
false
修改 hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
修改 hadoop/etc/hadoop/mapred-env.sh
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
修改 hadoop/etc/hadoop/mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
node2:10020
mapreduce.jobhistory.webapp.address
node2:19888
修改 hadoop/etc/hadoop/yarn-env.sh
在里面增加一行java环境变量(你安装的位置)
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
修改 hadoop/etc/hadoop/yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.resourcemanager.hostname
node2
yarn.log-aggregation-enable
true
yarn.resourcemanager.webapp.address
node2:8088
yarn.log-aggregation.retain-seconds
604800
sudo ./bin/hdfs namenode -format
sudo vi sbin/start-dfs.sh
头部空白位置增加:
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
sudo vi sbin/stop-dfs.sh
头部空白位置增加:
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
NameNode启动 sudo ./sbin/start-dfs.sh
NameNode结束 sudo ./sbin/stop-dfs.sh
vi sbin/start-yarn.sh
头部空白位置增加:
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vi sbin/stop-yarn.sh
头部空白位置增加:
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
配置workers(hadoop 3.0 之前叫slaves,之后改名成workers),里面加入机器名称:
node1
node2
node3
NameNode节点node1上启动 hdfs
命令:sbin/start-dfs.sh
ResourceManager节点node2上启动 yarn
命令:sbin/start-yarn.sh
如果需要查看历史服务 则需要打开历史服务器的守护进程。我们的历史服务器配置在node2 命令:sbin/mr-jobhistory-daemon.sh start historyserver
参考链接:https://blog.csdn.net/u014361775/article/details/75448149
解压:tar -zxvf apache-flume-1.9.0-bin.tar.gz
设置环境变量
sudo vi /etc/profile
#set flume environment
export FLUME_HOME=/usr/local/hello/flume/apache-flume-1.9.0-bin
export PATH=$PATH:$FLUME_HOME/bin
source /etc/profile
修改配置文件 flume-env.sh
flume-env.sh.template
模板文件复制一份并重命名flume-env.sh
mv flume-env.sh.template flume-env.sh
vi flume-env.sh
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
修改配置文件 flume-conf.properties
mv flume-conf.properties.template flume-conf.properties
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
验证:flume-ng version
启动:bin/flume-ng agent --conf conf --conf-file conf/flume-conf.properties --name a1 -Dflume.root.logger=INFO,console
tar -zxvf scala-2.11.8.tgz
sudo vi /etc/profile
#set scala environment
export SCALA_HOME=/usr/local/hello/scala/scala-2.11.8
export PATH=$PATH:$SCALA_HOME/bin
source /etc/profile
scala -version
sudo vi /etc/profile
#set spark environment
export SPARK_HOME=/usr/local/hello/spark/spark-3.0.1-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin
source /etc/profile
cp spark-env.sh.template spark-env.sh
vi spark-env.sh
export JAVA_HOME=$JAVA_HOME
export HADOOP_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SCALA_HOME=$SCALA_HOME
export SPARK_HOME=$SPARK_HOME
export SPARK_LOCAL_IP=node3
export SPARK_MASTER_HOST=node1
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8099
export SPARK_WORKER_CORES=3
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=5G
export SPARK_WORKER_WEBUI_PORT=8081
export SPARK_EXECUTOR_CORES=1
export SPARK_EXECUTOR_MEMORY=1G
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HADOOP_HOME/lib/native
cp slaves.template slaves
vi slaves
export JAVA_HOME=/usr/local/hello/java/jdk1.8.0_271
start-master.sh
以及start-slaves.sh
stop-master.sh
以及stop-slaves.sh
http://ip:8099
tar -zxvf apache-storm-2.2.0.tar.gz
/conf/storm.yaml
# 集群的ip(非一个)
storm.zookeeper.servers:
- "node1"
- "node2"
- "node3"
# 数据存放目录
storm.local.dir: "/usr/local/hello/storm/data"
# 主节点的ip
nimbus.seeds: ["node1"]
# ZooKeeper端口,需要和zookeeper的配置匹配
storm.zookeeper.port: 2181
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
# ui界面端口修改,默认是8080,很容易被占用
ui.port: 8758
sudo apt-get install lib32stdc++6
./bin/storm nimbus &
./bin/storm supervisor &
./bin/storm ui &
参考链接:https://blog.csdn.net/Z_r_s/article/details/105875353
解压:tar -zxvf apache-hive-3.1.2-bin.tar.gz
设置环境变量
sudo vi /etc/profile
#set hive environment
export HIVE_HOME=/usr/local/hello/hive/apache-hive-2.3.7-bin
export PATH=$PATH:$HIVE_HOME/bin
source /etc/profile
配置mysql,创建hive表
mysql -u root -p #登陆shell界面
create database hive;
grant all on *.* to 'root'@'%' identified by 'root';
flush privileges;
配置hive-env.sh
cp hive-env.sh.template hive-env.sh
vi hive-env.sh
,添加以下内容:
# Hive Configuration Directory can be controlled by:
export JAVA_HOME=$JAVA_HOME
export HADOOP_HOME=$HADOOP_HOME
export HIVE_HOME=$HIVE_HOME
export HIVE_CONF_DIR=${HIVE_HOME}/conf
# Folder containing extra libraries required for hive compilation/execution can be controlled by:
export HIVE_AUX_JARS_PATH=${HIVE_HOME}/lib
配置hive-site.xml
touch hive-site.xml
javax.jdo.option.ConnectionURL
jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true&useSSL=false
JDBC connect string for a JDBC metastore
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
Driver class name for a JDBC metastore
javax.jdo.option.ConnectionUserName
root
username to use against metastore database
javax.jdo.option.ConnectionPassword
root
password to use against metastore database
hive.metastore.schema.verification
false
初始化元数据
./bin/schematool -dbType mysql -initSchema
com.google.common.base.Preconditions.checkArgument
这是因为hive内依赖的guava.jar和hadoop内的版本不一致造成的,解决方法:
启动hive
./bin/hive