Hadoop--mac 上利用虚拟机安装hadoop步骤整理

Hadoop–mac 上利用虚拟机安装hadoop步骤整理

本文仅提供详细步骤,不提供详尽解释

VMware fusion虚拟机配置静态IP

######mac book 执行获取
[tonerMac-Pro:~ toner]$  cd /Library/Preferences/VMware\ Fusion/vmnet8
[tonerMac-Pro:~ toner]$  cat dhcpd.conf
    subnet 192.168.162.0 netmask 255.255.255.0 {
      range 192.168.162.128 192.168.162.254;
      option broadcast-address 192.168.162.255;
      option domain-name-servers 192.168.162.2;
      option domain-name localdomain;
      default-lease-time 1800;                # default is 30 minutes
      max-lease-time 7200;                    # default is 2 hours
      option netbios-name-servers 192.168.162.2;
      option routers 192.168.162.2;
    }
    host vmnet8 {
      hardware ethernet 00:50:56:C0:00:08;
      fixed-address 192.168.162.1;
      option domain-name-servers 0.0.0.0;
      option domain-name "";
      option routers 0.0.0.0;
    }
[tonerMac-Pro:~ toner]$  cat dhcpd.conf    
		# NAT gateway address
    ip = 192.168.162.2
    netmask = 255.255.255.0
#################查看DNS
168.126.63.1
114.114.114.114
8.8.8.8
#######虚拟机执行设置
[root@localhost ~]  cd /etc/sysconfig/network-scripts
[root@localhost network-scripts]	vi ifcfg-ens33
    TYPE=Ethernet
    PROXY_METHOD=none
    BROWSER_ONLY=no
    DEFROUTE=yes
    IPV4_FAILURE_FATAL=no
    IPV6INIT=yes
    IPV6_AUTOCONF=yes
    IPV6_DEFROUTE=yes
    IPV6_FAILURE_FATAL=no
    IPV6_ADDR_GEN_MODE=stable-privacy
    NAME=ens33
    UUID=cc1832be-068a-4d8b-b16f-7d25aa346a94
    DEVICE=ens33
		#修改DHCP->static
		BOOTPROTO=static
		#修改no->yes
    ONBOOT=yes
    #新增静态IP
    IPADDR=192.168.162.200
    #新增服务网关
    GATEWAY=192.168.162.2
    #新增子网掩码
    NETMASK=255.255.255.0
    #新增DNS
    DNS1=168.126.63.1
    DNS2=114.114.114.114
    DNS3=8.8.8.8
[root@localhost network-scripts]  service network restart

ssh 免密登陆登陆

######mac book 执行获取
#使用 ssh-keygen 命令,一直按回车,就可以生成当前机器的公钥 id_rsa.pub
[tonerMac-Pro:~ toner]$  ssh-keygen  
[tonerMac-Pro:~ toner]$  cat .ssh/id_rsa.pub  
ssh-rsa AAAAB3NzaC1yc2EAAAADAQ.........2axF [email protected]

########快捷登陆设置
[tonerMac-Pro:~ toner]$  cd .ssh/
[tonerMac-Pro:~ toner]$  vi config
        #自定义别名
        Host            hadoop1
        #替换为你的ssh服务器ip或domain
        HostName        192.168.162.200
        #ssh服务器端口默认为22
        Port            22
        #ssh服务器用户名
        User            root
        #第一个步骤生成的公钥文件对应的私钥文件
        IdentityFile    ~/.ssh/id_rsa
#免密快捷登陆
[tonerMac-Pro:~ toner]$  ssh hadooop1
######虚拟机 执行设置
#进行免密配置,或者新建.ssh文件
[root@localhost ~]  ssh-keygen
[root@localhost ~]  cd .ssh/
#将mac上面的id_rsa.pub内容复制进去,即可实现mac对虚拟机的免密登陆
[root@localhost ~]  vi authorized_keys

JDK1.8安装

  • 创建/usr/local/java目录
[root@localhost ~] mkdir/usr/local/java
[root@localhost ~] cd /usr/loacl/java
  • 上传并解压jdk
[tonerMac-Pro:~ toner] scp jdk hadoop1 hadoop1:/usr/loacl/java/jdk
[root@localhost ~] tar -zxvf jdk-8u211-linux-x64.tar.gz
  • 配置环境变量
[root@localhost ~] vi /etc/profile
#添加以下java环境变量
export JAVA_HOME=/usr/local/java/jdk1.8
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export JAVA_PATH=${JAVA_HOME}/bin:${JRE_HOME}/bin
export PATH=$PATH:${JAVA_PATH}

[root@localhost ~] source /etc/profile
[root@localhost java] java -version
java version "1.8.0_211"
Java(TM) SE Runtime Environment (build 1.8.0_211-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.211-b12, mixed mode)

修改主机名

[root@localhost ~]  hostnamectl set-hostname hadoop1

安装与配置Hadoop

修改配置文件

路径:./etc/hadoop

1)Hadoop-env.sh

修改JDK安装目录

2)core-site.xml

<property>

  <name>fs.defaultFSname>
	
  <value>hdfs://SY-0217:8020value>

  <description>The name of the default file system.  A URI whose

  scheme and authority determine the FileSystem implementation.  The

  uri's scheme determines the config property (fs.SCHEME.impl) naming

  the FileSystem implementation class.  The uri's authority is used to

  determine the host, port, etc. for a filesystem.description>

property>


3)maprd-site.xml

<property>
  <name>mapreduce.framework.namename>
  <value>yarnvalue>
  <description>The runtime framework for executing MapReduce jobs.
  Can be one of local, classic or yarn.
  description>
property>


<property>
  <name>mapreduce.jobhistory.addressname>
  <value>SY-0355:10020value>
  <description>MapReduce JobHistory Server IPC host:portdescription>
property>

<property>
  <name>mapreduce.jobhistory.webapp.addressname>
  <value>SY-0355:19888value>
  <description>MapReduce JobHistory Server Web UI host:portdescription>
property>

4)hdfs-site.xml


<property>
  <name>dfs.nameservicesname>
  <value>hadoop-testvalue>
  <description>
    Comma-separated list of nameservices.
  description>
property>


<property>
  <name>dfs.ha.namenodes.hadoop-testname>
  <value>nn1,nn2value>
  <description>
    The prefix for a given nameservice, contains a comma-separated
    list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
  description>
property>


<property>
  <name>dfs.namenode.rpc-address.hadoop-test.nn1name>
  <value>SY-0217:8020value>
  <description>
    RPC address for nomenode1 of hadoop-test
  description>
property>


<property>
  <name>dfs.namenode.rpc-address.hadoop-test.nn2name>
  <value>SY-0355:8020value>
  <description>
    RPC address for nomenode2 of hadoop-test
  description>
property>

<property>
  <name>dfs.namenode.http-address.hadoop-test.nn1name>
  <value>SY-0217:50070value>
  <description>
    The address and the base port where the dfs namenode1 web ui will listen on.
  description>
property>

<property>
  <name>dfs.namenode.http-address.hadoop-test.nn2name>
  <value>SY-0355:50070value>
  <description>
    The address and the base port where the dfs namenode2 web ui will listen on.
  description>
property>


<property>
  <name>dfs.namenode.name.dirname>
  <value>file:///home/dongxicheng/hadoop/hdfs/namevalue>
  <description>Determines where on the local filesystem the DFS name node
      should store the name table(fsimage).  If this is a comma-delimited list
      of directories then the name table is replicated in all of the
      directories, for redundancy. description>
property>


<property>
  <name>dfs.namenode.shared.edits.dirname>
  <value>qjournal://SY-0355:8485;SY-0225:8485;SY-0226:8485/hadoop-demovalue>
  <description>A directory on shared storage between the multiple namenodes
  in an HA cluster. This directory will be written by the active and read
  by the standby in order to keep the namespaces synchronized. This directory
  does not need to be listed in dfs.namenode.edits.dir above. It should be
  left empty in a non-HA cluster.
  description>
property>


<property>
  <name>dfs.datanode.data.dirname>
  <value>file:///home/dongxicheng/hadoop/hdfs/datavalue>
  <description>Determines where on the local filesystem an DFS data node
  should store its blocks.  If this is a comma-delimited
  list of directories, then data will be stored in all named
  directories, typically on different devices.
  Directories that do not exist are ignored.
  description>
property>

<property>
  <name>dfs.ha.automatic-failover.enabledname>
  <value>falsevalue>
  <description>
    Whether automatic failover is enabled. See the HDFS High
    Availability documentation for details on automatic HA
    configuration.
  description>
property>

<property>
  <name>dfs.journalnode.edits.dirname>
  <value>/home/dongxicheng/hadoop/hdfs/journal/value>
property>

5)yarn-site.xml


<property>
    <description>The hostname of the RM.description>
    <name>yarn.resourcemanager.hostnamename>
    <value>SY-0217value>
  property>    
  
  <property>
    <description>The address of the applications manager interface in the RM.description>
    <name>yarn.resourcemanager.addressname>
    <value>${yarn.resourcemanager.hostname}:8032value>
  property>

  <property>
    <description>The address of the scheduler interface.description>
    <name>yarn.resourcemanager.scheduler.addressname>
    <value>${yarn.resourcemanager.hostname}:8030value>
  property>

  <property>
    <description>The http address of the RM web application.description>
    <name>yarn.resourcemanager.webapp.addressname>
    <value>${yarn.resourcemanager.hostname}:8088value>
  property>

  <property>
    <description>The https adddress of the RM web application.description>
    <name>yarn.resourcemanager.webapp.https.addressname>
    <value>${yarn.resourcemanager.hostname}:8090value>
  property>

  <property>
    <name>yarn.resourcemanager.resource-tracker.addressname>
    <value>${yarn.resourcemanager.hostname}:8031value>
  property>

  <property>
    <description>The address of the RM admin interface.description>
    <name>yarn.resourcemanager.admin.addressname>
    <value>${yarn.resourcemanager.hostname}:8033value>
  property>

  <property>
    <description>The class to use as the resource scheduler.description>
    <name>yarn.resourcemanager.scheduler.classname>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulervalue>
  property>

  <property>
    <description>fair-scheduler conf locationdescription>
    <name>yarn.scheduler.fair.allocation.filename>
    <value>${yarn.home.dir}/etc/hadoop/fairscheduler.xmlvalue>
  property>

  <property>
    <description>List of directories to store localized files in. An 
      application's localized file directory will be found in:
      ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}.
      Individual containers' work directories, called container_${contid}, will
      be subdirectories of this.
   description>
    <name>yarn.nodemanager.local-dirsname>
    <value>/home/dongxicheng/hadoop/yarn/localvalue>
  property>

  <property>
    <description>Whether to enable log aggregationdescription>
    <name>yarn.log-aggregation-enablename>
    <value>truevalue>
  property>

  <property>
    <description>Where to aggregate logs to.description>
    <name>yarn.nodemanager.remote-app-log-dirname>
    <value>/tmp/logsvalue>
  property>

  <property>
    <description>Amount of physical memory, in MB, that can be allocated 
    for containers.description>
    <name>yarn.nodemanager.resource.memory-mbname>
    <value>30720value>
  property>

  <property>
    <description>Number of CPU cores that can be allocated 
    for containers.description>
    <name>yarn.nodemanager.resource.cpu-vcoresname>
    <value>12value>
  property>

  <property>
    <description>the valid service name should only contain a-zA-Z0-9_ and can not start with numbersdescription>
    <name>yarn.nodemanager.aux-servicesname>
    <value>mapreduce_shufflevalue>
  property>

6)slaves

hadoop2
hadoop3

7)fairscheduler.xml

<queue name="infrastructure">
    <minResources>102400 mb, 50 vcores minResources>
    <maxResources>153600 mb, 100 vcores maxResources>
    <maxRunningApps>200maxRunningApps>
    <minSharePreemptionTimeout>300minSharePreemptionTimeout>
    <weight>1.0weight>
    <aclSubmitApps>root,yarn,search,hdfsaclSubmitApps>
  queue>

   <queue name="tool">
      <minResources>102400 mb, 30 vcoresminResources>
      <maxResources>153600 mb, 50 vcoresmaxResources>
   queue>

   <queue name="sentiment">
      <minResources>102400 mb, 30 vcoresminResources>
      <maxResources>153600 mb, 50 vcoresmaxResources>
   queue>

8)配置环境变量

# vim ~/.bashrc  

export HADOOP_HOME=/home/hadoop
export HADOOP_INSTALL=$HADOOP_HOME 
export HADOOP_MAPRED_HOME=$HADOOP_HOME 
export HADOOP_HDFS_HOME=$HADOOP_HOME 
export HADOOP_COMMON_HOME=$HADOOP_HOME 
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop  
export YARN_HOME=$HADOOP_HOME 
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop  

export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin 

9)禁止防火墙开机启动

systemctl disable firewalld.service 

10)启动Hadoop

###格式化NameNode
hdfs namenode -format
###启动HDFS
start-dfs.sh
###访问UI
http://hadoop1:50070
###停止HDFS
stop-dfs.sh
###运行YARN
start-yarn.sh
###访问UI
http://hadoop1:8088
###停止YARN
stop-yarn.sh

整理不易,转载请通知
参考资料:https://cloud.tencent.com/developer/article/1191526

你可能感兴趣的:(大数据)