[root@node1 ~]# cd /opt/apps
[root@node1 apps]# ls
hadoop-3.1.3.tar.gz mysql-community-common-5.7.28-1.el7.x86_64.rpm mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm
mysql-community-client-5.7.28-1.el7.x86_64.rpm mysql-community-libs-5.7.28-1.el7.x86_64.rpm mysql-community-server-5.7.28-1.el7.x86_64.rpm
[root@node1 apps]# yum install lrzsz -y # 安装完之后直接拖拽上来即可
[root@node1 apps]# rz -E
rz waiting to receive.
[root@node1 apps]# ls
Anaconda3-Linux-x86_64.sh mysql-community-common-5.7.28-1.el7.x86_64.rpm mysql-community-server-5.7.28-1.el7.x86_64.rpm
hadoop-3.1.3.tar.gz mysql-community-libs-5.7.28-1.el7.x86_64.rpm
mysql-community-client-5.7.28-1.el7.x86_64.rpm mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm
# 分发到其他虚拟机上
[root@node1 apps]# scp Anaconda3-Linux-x86_64.sh node2:`pwd`
Anaconda3-Linux-x86_64.sh 100% 544MB 60.5MB/s 00:09
[root@node1 apps]# scp Anaconda3-Linux-x86_64.sh node3:`pwd`
Anaconda3-Linux-x86_64.sh 100% 544MB 63.0MB/s 00:08
[root@node1 apps]# scp Anaconda3-Linux-x86_64.sh node4:`pwd`
Anaconda3-Linux-x86_64.sh 100% 544MB 63.3MB/s 00:08
# 安装依赖 (四台虚拟机一样操作)
[root@node1 apps]# sh ./Anaconda3-Linuxx86_64.sh
......
Please, press ENTER to continue
>>> #按 Enter键
===================================
End User License Agreement - Anaconda
Individual Edition
===================================
......
Anaconda reserves all rights not expressly
granted to you in this Agreement.
--More-- # 按8次空格
......
Do you accept the license terms? [yes|no]
[no] >>> yes #是否接受许可 输入yes 按下
Enter
Anaconda3 will now be installed into this
location:
/root/anaconda3
- Press ENTER to confirm the location
- Press CTRL-C to abort the installation
- Or specify a different location below
#输入anaconda3的安装路径 推荐 /opt/anaconda3
[/root/anaconda3] >>> /opt/anaconda3
PREFIX=/opt/anaconda3
Unpacking payload ...
Preparing transaction: done
Executing transaction: done
installation finished.
Do you wish the installer to initialize
Anaconda3
by running conda init? [yes|no]#是否要初始化
yes 按下Enter键
[no] >>> yes
Thank you for installing Anaconda3!
==========================================
=================
......
[root@node1 apps]#
关闭XShell连接,重新连接。
(base) [root@node1 ~]#
看到这个Base开头表明安装好了,base是默认的虚拟环境。
channels:
- defaults
show_channel_urls: true
channel_alias: https://mirrors.tuna.tsinghua.edu.cn/anaconda
default_channels:
- http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
- http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
- http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro
- http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
custom_channels:
conda-forge: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
msys2: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
bioconda: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
menpo: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
simpleitk: http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
node2、node3、node4执行同样的操作
创建 pyspark_env 虚拟环境 以及安装虚拟环境所需要的包 pyspark jieba pyhive
node1-4一样操作
[root@node1 ~]# conda create -n pyspark_env python=3.8
输入y
#
# To activate this environment, use
#
# $ conda activate pyspark_env
#
# To deactivate an active environment, use
#
# $ conda deactivate
安装pyspark在虚拟环境中
(base) [root@node1 ~]# conda activate pyspark_env # 进入虚拟环境
(pyspark_env) [root@node1 ~]# conda install pyspark
安装jieba分词在虚拟环境中
(pyspark_env) [root@node1 ~]# pip install pyhive jieba -i https://pypi.tuna.tsinghua.edu.cn/simple
测试
(pyspark_env) [root@node1 ~]# conda info -e
# conda environments:
#
base /opt/anaconda3
pyspark_env * /opt/anaconda3/envs/pyspark_env
(pyspark_env) [root@node1 ~]# python
Python 3.8.16 (default, Mar 2 2023, 03:21:46)
[GCC 11.2.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pyspark
删除虚拟环境命令
conda remove -n pyspark_env --all
官方下载地址:https://dlcdn.apache.org/spark/spark-3.2.1/spark-3.2.1-bin-hadoop3.2.tgz
node1上做如下操作
将spark-3.2.1-bin-hadoop3.2.tgz上传到/opt/apps目录下
解压spark-3.2.1-bin-hadoop3.2.tgz文件到/opt目录下
- JAVA_HOME: 告知Spark JDK在哪里(已配过)
- HADOOP_HOME: 告知Spark Hadoop在哪里(已配过)
- HADOOP_CONF_DIR: 告知Spark Hadoop的配置文件在哪
里
- SPARK_HOME: 表示Spark安装路径在哪里
- PYSPARK_PYTHON: 表示Spark想运行Python程序, 那么去哪里找python执行器这5个环境变量 都需要配置在: `/etc/profile`中(尾部追加):
(base) [root@node1 apps]# tar -zxvf spark-3.2.1-bin-hadoop3.2.tgz -C /opt/
修改名称
(base) [root@node1 apps]# cd ../
(base) [root@node1 opt]# mv spark-3.2.1-bin-hadoop3.2/ spark-3.2.1
到尾部追加
(pyspark_env) [root@node1 ~]# vim /etc/profile
# Spark环境变量配置
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_HOME=/opt/spark-3.2.1
export
PYSPARK_PYTHON=/opt/anaconda3/envs/pyspark_env/
bin/python
export
PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
刷新配置环境,使得环境变量生效
(pyspark_env) [root@node1 ~]# source /etc/profile
(pyspark_env) [root@node1 conf]# pwd
/opt/spark-3.2.1/conf
(pyspark_env) [root@node1 conf]# cp log4j.properties.template log4j.properties
完整配置文件
pathmunge () {
case ":${PATH}:" in
*:"$1":*)
;;
*)
if [ "$2" = "after" ] ; then
PATH=$PATH:$1
else
PATH=$1:$PATH
fi
esac
}
if [ -x /usr/bin/id ]; then
if [ -z "$EUID" ]; then
# ksh workaround
EUID=`/usr/bin/id -u`
UID=`/usr/bin/id -ru`
fi
USER="`/usr/bin/id -un`"
LOGNAME=$USER
MAIL="/var/spool/mail/$USER"
fi
# Path manipulation
if [ "$EUID" = "0" ]; then
pathmunge /usr/sbin
pathmunge /usr/local/sbin
else
pathmunge /usr/local/sbin after
pathmunge /usr/sbin after
fi
HOSTNAME=`/usr/bin/hostname 2>/dev/null`
HISTSIZE=1000
if [ "$HISTCONTROL" = "ignorespace" ] ; then
export HISTCONTROL=ignoreboth
else
export HISTCONTROL=ignoredups
fi
export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL
# By default, we want umask to get set. This sets it for login shell
# Current threshold for system reserved uid/gids is 200
# You could check uidgid reservation validity in
# /usr/share/doc/setup-*/uidgid file
if [ $UID -gt 199 ] && [ "`/usr/bin/id -gn`" = "`/usr/bin/id -un`" ]; then
umask 002
else
umask 022
fi
for i in /etc/profile.d/*.sh /etc/profile.d/sh.local ; do
if [ -r "$i" ]; then
if [ "${-#*i}" != "$-" ]; then
. "$i"
else
. "$i" >/dev/null
fi
fi
done
unset i
unset -f pathmunge
export JAVA_HOME=/usr/java/default
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/opt/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# Spark环境变量配置
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_HOME=/opt/spark-3.2.1
export PYSPARK_PYTHON=/opt/anaconda3/envs/pyspark_env/bin/python
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
pyspark程序, 可以提供一个 交互式 的 Python解释器环境, 在这里面可以写普通python代码
任何位置直接输入都可以使用
(pyspark_env) [root@node1 conf]# pyspark
>>> sc.parallelize([1,2,3]).map(lambda x:x*2).collect()
[2, 4, 6]