在自己电脑配置cdh 版 hadoop 提交mr job客户端

image.png

1.首先 下载 与我们 cdh hadoop集群对应的 hadoop 安装文件

hadoop-2.6.0-cdh5.14.2.tar.gz

链接: https://pan.baidu.com/s/1iHm5M-gGZRWLKbzVjbYJmA 密码: q9nv

2. hadoop 解压到自己 本地电脑上

mac /opt

window D 盘

3.将测试服务器的hadoop 的配置文件 复制到自己的hadoop 配置文件目录

4.配置 环境变量 并使之生效

  1. 如果使用scala java 那先下载 jar包
    比如 scala build.sbt
name := "sbtawsHadoop"

version := "0.1"

scalaVersion := "2.12.6"

libraryDependencies ++= Seq(
    "org.apache.hadoop" % "hadoop-common" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-hdfs" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-client" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-mapreduce-client-core" % "2.6.0-cdh5.14.2",

    "org.apache.hadoop" % "hadoop-mapreduce-client-common" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % "2.6.0-cdh5.14.2",
    "org.apache.hbase" % "hbase" % "1.2.0-cdh5.14.2"

)
unmanagedResourceDirectories in Compile += baseDirectory.value /"conf"
unmanagedResourceDirectories in Compile += baseDirectory.value /"data"
unmanagedResourceDirectories in Compile += baseDirectory.value /"public"
resourceDirectory in Compile := baseDirectory.value / "data"
resourceDirectory in Compile := baseDirectory.value / "conf"
resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots"

resolvers += "cdh" at "https://repository.cloudera.com/artifactory/cloudera-repos"

assemblyOutputPath in assembly := baseDirectory.value/"count-beat-80201.jar"
assemblyMergeStrategy in assembly := {
    case PathList("META-INF", xs@_*) => MergeStrategy.discard
    case x => MergeStrategy.first
}



Su hds

Ln -s /usr/local/hadoop-2.6.0-cdh5.14.2/bin/hadoop /usr/local/bin/hadoop
ln -s /usr/local/hadoop/bin/hadoop hadoop
sudo netstat -tulpn | grep :8020

Cdh 配置文件 路径
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/

/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/yarn-site.xml

/var/log/hadoop-yarn/hadoop-cmf-yarn-RESOURCEMANAGER-cdhnode1.log.out

/opt/cloudera/parcel-repo//CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop-yarn/bin/yarn nodemanager

/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/bin/yarn

/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/jars/hadoop-common-2.6.0-cdh5.14.2.jar

/usr/local/Cellar/hadoop/2.8.2/bin/hadoop jar ./count-beat-80201.jar ApplistCount

hdfs://cdhnode1:8020/originData/clientlabel/AA77p2_20180525.txt

http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.14.2.tar.gz

export HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3
export YARN_HOME=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$YARN_HOME/bin

su hdfs
hadoop jar /opt/hadoop-mapreduce-examples-2.8.2.jar wordcount
/originData/clientlabel/output

hadoop jar ./GeoCreditPro-beat-2.0.jar ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output2

yarn resourcemanager

ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output4

image.png

core-site.xml




    
        fs.defaultFS
        hdfs://cdhnode1:8020
    
    
        fs.trash.interval
        1
    
    
        io.compression.codecs
        org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec
    
    
        hadoop.security.authentication
        simple
    
    
        hadoop.security.authorization
        false
    
    
        hadoop.rpc.protection
        authentication
    
    
        hadoop.security.auth_to_local
        DEFAULT
    
    
        hadoop.proxyuser.oozie.hosts
        *
    
    
        hadoop.proxyuser.oozie.groups
        *
    
    
        hadoop.proxyuser.mapred.hosts
        *
    
    
        hadoop.proxyuser.mapred.groups
        *
    
    
        hadoop.proxyuser.flume.hosts
        *
    
    
        hadoop.proxyuser.flume.groups
        *
    
    
        hadoop.proxyuser.HTTP.hosts
        *
    
    
        hadoop.proxyuser.HTTP.groups
        *
    
    
        hadoop.proxyuser.hive.hosts
        *
    
    
        hadoop.proxyuser.hive.groups
        *
    
    
        hadoop.proxyuser.hue.hosts
        *
    
    
        hadoop.proxyuser.hue.groups
        *
    
    
        hadoop.proxyuser.httpfs.hosts
        *
    
    
        hadoop.proxyuser.httpfs.groups
        *
    
    
        hadoop.proxyuser.hdfs.groups
        *
    
    
        hadoop.proxyuser.hdfs.hosts
        *
    
    
        hadoop.proxyuser.yarn.hosts
        *
    
    
        hadoop.proxyuser.yarn.groups
        *
    
    
        hadoop.security.group.mapping
        org.apache.hadoop.security.ShellBasedUnixGroupsMapping
    
    
        hadoop.security.instrumentation.requires.admin
        false
    
    
        net.topology.script.file.name
        /etc/hadoop/conf.cloudera.yarn/topology.py
    
    
        hadoop.ssl.enabled
        false
    
    
        hadoop.proxyuser.llama.hosts
        *
    
    
        hadoop.proxyuser.llama.groups
        *
    
    
        hadoop.ssl.require.client.cert
        false
        true
    
    
        hadoop.ssl.keystores.factory.class
        org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
        true
    
    
        hadoop.ssl.server.conf
        ssl-server.xml
        true
    
    
        hadoop.ssl.client.conf
        ssl-client.xml
        true
    
    
        hadoop.http.logs.enabled
        true
    

hdfs-site.xml





    
        dfs.namenode.name.dir
        file:///data1/dfs/nn,file:///data2/dfs/nn
    
    
        dfs.namenode.servicerpc-address
        cdhnode1:8022
    
    
        dfs.https.address
        cdhnode1:50470
    
    
        dfs.https.port
        50470
    
    
        dfs.namenode.http-address
        cdhnode1:50070
    
    
        dfs.replication
        2
    
    
        dfs.blocksize
        134217728
    
    
        dfs.client.use.datanode.hostname
        false
    
    
        fs.permissions.umask-mode
        022
    
    
        dfs.namenode.acls.enabled
        false
    
    
        dfs.client.use.legacy.blockreader
        false
    
    
        dfs.client.read.shortcircuit
        false
    
    
        dfs.domain.socket.path
        /var/run/hdfs-sockets/dn
    
    
        dfs.client.read.shortcircuit.skip.checksum
        false
    
    
        dfs.client.domain.socket.data.traffic
        false
    
    
        dfs.datanode.hdfs-blocks-metadata.enabled
        true
    

mapreduce-site.xml





    
        mapreduce.framework.name
        yarn
    
    
        mapred.remote.os
        Linux
    
    
        mapreduce.jobhistory.webapp.address
        cdhnode1:19888
    
    
        mapreduce.jobhistory.webapp.https.address
        cdhnode1:19890
    

    
        mapreduce.app-submission.cross-platform
        true
    

    
        mapreduce.application.classpath
        
            /usr/local/hadoop/etc/hadoop,
            /usr/local/hadoop/share/hadoop/common/*,
            /usr/local/hadoop/share/hadoop/common/lib/*,
            /usr/local/hadoop/share/hadoop/hdfs/*,
            /usr/local/hadoop/share/hadoop/hdfs/lib/*,
            /usr/local/hadoop/share/hadoop/mapreduce/*,
            /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
            /usr/local/hadoop/share/hadoop/yarn/*,
            /usr/local/hadoop/share/hadoop/yarn/lib/*
        
    

yarn-site.xml





    
        yarn.acl.enable
        true
    
    
        yarn.admin.acl
        *
    
    
        yarn.log-aggregation-enable
        true
    
    
        yarn.log-aggregation.retain-seconds
        604800
    
    
        yarn.resourcemanager.ha.enabled
        true
    
    
        yarn.resourcemanager.ha.automatic-failover.enabled
        true
    
    
        yarn.resourcemanager.ha.automatic-failover.embedded
        true
    
    
        yarn.resourcemanager.recovery.enabled
        true
    
    
        yarn.resourcemanager.zk-address
        cdhnode1:2181,cdhmaster:2181,cdhnode2:2181,cdhnode3:2181
    
    
        yarn.resourcemanager.store.class
        org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
    
    
        yarn.client.failover-sleep-base-ms
        100
    
    
        yarn.client.failover-sleep-max-ms
        2000
    
    
        yarn.resourcemanager.cluster-id
        yarnRM
    
    
        yarn.resourcemanager.work-preserving-recovery.enabled
        true
    
    
        yarn.resourcemanager.ha.id
        rm198
    
    
        yarn.resourcemanager.address.rm198
        cdhnode1:8032
    
    
        yarn.resourcemanager.scheduler.address.rm198
        cdhnode1:8030
    
    
        yarn.resourcemanager.resource-tracker.address.rm198
        cdhnode1:8031
    
    
        yarn.resourcemanager.admin.address.rm198
        cdhnode1:8033
    
    
        yarn.resourcemanager.webapp.address.rm198
        cdhnode1:8088
    
    
        yarn.resourcemanager.webapp.https.address.rm198
        cdhnode1:8090
    
    
        yarn.resourcemanager.address.rm214
        cdhnode3:8032
    
    
        yarn.resourcemanager.scheduler.address.rm214
        cdhnode3:8030
    
    
        yarn.resourcemanager.resource-tracker.address.rm214
        cdhnode3:8031
    
    
        yarn.resourcemanager.admin.address.rm214
        cdhnode3:8033
    
    
        yarn.resourcemanager.webapp.address.rm214
        cdhnode3:8088
    
    
        yarn.resourcemanager.webapp.https.address.rm214
        cdhnode3:8090
    
    
        yarn.resourcemanager.ha.rm-ids
        rm198,rm214
    
    
        yarn.resourcemanager.proxy-user-privileges.enabled
        true
    
    
        yarn.resourcemanager.nodes.include-path
        /run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_allow.txt
    
    
        yarn.resourcemanager.nodes.exclude-path
        /run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_exclude.txt
    
    
        yarn.resourcemanager.client.thread-count
        50
    
    
        yarn.resourcemanager.scheduler.client.thread-count
        50
    
    
        yarn.resourcemanager.admin.client.thread-count
        1
    
    
        yarn.scheduler.minimum-allocation-mb
        1024
    
    
        yarn.scheduler.increment-allocation-mb
        512
    
    
        yarn.scheduler.maximum-allocation-mb
        53931
    
    
        yarn.scheduler.minimum-allocation-vcores
        1
    
    
        yarn.scheduler.increment-allocation-vcores
        1
    
    
        yarn.scheduler.maximum-allocation-vcores
        24
    
    
        yarn.resourcemanager.amliveliness-monitor.interval-ms
        1000
    
    
        yarn.am.liveness-monitor.expiry-interval-ms
        600000
    
    
        yarn.resourcemanager.am.max-attempts
        2
    
    
        yarn.resourcemanager.container.liveness-monitor.interval-ms
        600000
    
    
        yarn.resourcemanager.nm.liveness-monitor.interval-ms
        1000
    
    
        yarn.nm.liveness-monitor.expiry-interval-ms
        600000
    
    
        yarn.resourcemanager.resource-tracker.client.thread-count
        50
    
    
        yarn.application.classpath
        $HADOOP_CLIENT_CONF_DIR,$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
    
    
        yarn.resourcemanager.scheduler.class
        org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
    
    
        yarn.scheduler.fair.allow-undeclared-pools
        true
    
    
        yarn.scheduler.fair.user-as-default-queue
        true
    
    
        yarn.scheduler.fair.preemption
        false
    
    
        yarn.scheduler.fair.preemption.cluster-utilization-threshold
        0.8
    
    
        yarn.scheduler.fair.sizebasedweight
        false
    
    
        yarn.scheduler.fair.assignmultiple
        true
    
    
        yarn.scheduler.fair.continuous-scheduling-enabled
        false
    
    
        yarn.scheduler.fair.locality-delay-node-ms
        2000
    
    
        yarn.scheduler.fair.locality-delay-rack-ms
        4000
    
    
        yarn.scheduler.fair.continuous-scheduling-sleep-ms
        5
    
    
        yarn.resourcemanager.max-completed-applications
        10000
    
    
        yarn.resourcemanager.zk-timeout-ms
        60000
    


    
        yarn.application.classpath
        
        /usr/local/hadoop/etc/hadoop,
        /usr/local/hadoop/share/hadoop/common/*,
        /usr/local/hadoop/share/hadoop/common/lib/*,
        /usr/local/hadoop/share/hadoop/hdfs/*,
        /usr/local/hadoop/share/hadoop/hdfs/lib/*,
        /usr/local/hadoop/share/hadoop/mapreduce/*,
        /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
        /usr/local/hadoop/share/hadoop/yarn/*,
        /usr/local/hadoop/share/hadoop/yarn/lib/*
    
    


你可能感兴趣的:(在自己电脑配置cdh 版 hadoop 提交mr job客户端)