1.首先 下载 与我们 cdh hadoop集群对应的 hadoop 安装文件
hadoop-2.6.0-cdh5.14.2.tar.gz
链接: https://pan.baidu.com/s/1iHm5M-gGZRWLKbzVjbYJmA 密码: q9nv
2. hadoop 解压到自己 本地电脑上
mac /opt
window D 盘
3.将测试服务器的hadoop 的配置文件 复制到自己的hadoop 配置文件目录
4.配置 环境变量 并使之生效
- 如果使用scala java 那先下载 jar包
比如 scala build.sbt
name := "sbtawsHadoop"
version := "0.1"
scalaVersion := "2.12.6"
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-common" % "2.6.0-cdh5.14.2",
"org.apache.hadoop" % "hadoop-hdfs" % "2.6.0-cdh5.14.2",
"org.apache.hadoop" % "hadoop-client" % "2.6.0-cdh5.14.2",
"org.apache.hadoop" % "hadoop-mapreduce-client-core" % "2.6.0-cdh5.14.2",
"org.apache.hadoop" % "hadoop-mapreduce-client-common" % "2.6.0-cdh5.14.2",
"org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % "2.6.0-cdh5.14.2",
"org.apache.hbase" % "hbase" % "1.2.0-cdh5.14.2"
)
unmanagedResourceDirectories in Compile += baseDirectory.value /"conf"
unmanagedResourceDirectories in Compile += baseDirectory.value /"data"
unmanagedResourceDirectories in Compile += baseDirectory.value /"public"
resourceDirectory in Compile := baseDirectory.value / "data"
resourceDirectory in Compile := baseDirectory.value / "conf"
resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots"
resolvers += "cdh" at "https://repository.cloudera.com/artifactory/cloudera-repos"
assemblyOutputPath in assembly := baseDirectory.value/"count-beat-80201.jar"
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs@_*) => MergeStrategy.discard
case x => MergeStrategy.first
}
Su hds
Ln -s /usr/local/hadoop-2.6.0-cdh5.14.2/bin/hadoop /usr/local/bin/hadoop
ln -s /usr/local/hadoop/bin/hadoop hadoop
sudo netstat -tulpn | grep :8020
Cdh 配置文件 路径
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/yarn-site.xml
/var/log/hadoop-yarn/hadoop-cmf-yarn-RESOURCEMANAGER-cdhnode1.log.out
/opt/cloudera/parcel-repo//CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop-yarn/bin/yarn nodemanager
/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/bin/yarn
/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/jars/hadoop-common-2.6.0-cdh5.14.2.jar
/usr/local/Cellar/hadoop/2.8.2/bin/hadoop jar ./count-beat-80201.jar ApplistCount
hdfs://cdhnode1:8020/originData/clientlabel/AA77p2_20180525.txt
http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.14.2.tar.gz
export HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3
export YARN_HOME=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$YARN_HOME/bin
su hdfs
hadoop jar /opt/hadoop-mapreduce-examples-2.8.2.jar wordcount
/originData/clientlabel/output
hadoop jar ./GeoCreditPro-beat-2.0.jar ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output2
yarn resourcemanager
ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output4
core-site.xml
fs.defaultFS
hdfs://cdhnode1:8020
fs.trash.interval
1
io.compression.codecs
org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec
hadoop.security.authentication
simple
hadoop.security.authorization
false
hadoop.rpc.protection
authentication
hadoop.security.auth_to_local
DEFAULT
hadoop.proxyuser.oozie.hosts
*
hadoop.proxyuser.oozie.groups
*
hadoop.proxyuser.mapred.hosts
*
hadoop.proxyuser.mapred.groups
*
hadoop.proxyuser.flume.hosts
*
hadoop.proxyuser.flume.groups
*
hadoop.proxyuser.HTTP.hosts
*
hadoop.proxyuser.HTTP.groups
*
hadoop.proxyuser.hive.hosts
*
hadoop.proxyuser.hive.groups
*
hadoop.proxyuser.hue.hosts
*
hadoop.proxyuser.hue.groups
*
hadoop.proxyuser.httpfs.hosts
*
hadoop.proxyuser.httpfs.groups
*
hadoop.proxyuser.hdfs.groups
*
hadoop.proxyuser.hdfs.hosts
*
hadoop.proxyuser.yarn.hosts
*
hadoop.proxyuser.yarn.groups
*
hadoop.security.group.mapping
org.apache.hadoop.security.ShellBasedUnixGroupsMapping
hadoop.security.instrumentation.requires.admin
false
net.topology.script.file.name
/etc/hadoop/conf.cloudera.yarn/topology.py
hadoop.ssl.enabled
false
hadoop.proxyuser.llama.hosts
*
hadoop.proxyuser.llama.groups
*
hadoop.ssl.require.client.cert
false
true
hadoop.ssl.keystores.factory.class
org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
true
hadoop.ssl.server.conf
ssl-server.xml
true
hadoop.ssl.client.conf
ssl-client.xml
true
hadoop.http.logs.enabled
true
hdfs-site.xml
dfs.namenode.name.dir
file:///data1/dfs/nn,file:///data2/dfs/nn
dfs.namenode.servicerpc-address
cdhnode1:8022
dfs.https.address
cdhnode1:50470
dfs.https.port
50470
dfs.namenode.http-address
cdhnode1:50070
dfs.replication
2
dfs.blocksize
134217728
dfs.client.use.datanode.hostname
false
fs.permissions.umask-mode
022
dfs.namenode.acls.enabled
false
dfs.client.use.legacy.blockreader
false
dfs.client.read.shortcircuit
false
dfs.domain.socket.path
/var/run/hdfs-sockets/dn
dfs.client.read.shortcircuit.skip.checksum
false
dfs.client.domain.socket.data.traffic
false
dfs.datanode.hdfs-blocks-metadata.enabled
true
mapreduce-site.xml
mapreduce.framework.name
yarn
mapred.remote.os
Linux
mapreduce.jobhistory.webapp.address
cdhnode1:19888
mapreduce.jobhistory.webapp.https.address
cdhnode1:19890
mapreduce.app-submission.cross-platform
true
mapreduce.application.classpath
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*
yarn-site.xml
yarn.acl.enable
true
yarn.admin.acl
*
yarn.log-aggregation-enable
true
yarn.log-aggregation.retain-seconds
604800
yarn.resourcemanager.ha.enabled
true
yarn.resourcemanager.ha.automatic-failover.enabled
true
yarn.resourcemanager.ha.automatic-failover.embedded
true
yarn.resourcemanager.recovery.enabled
true
yarn.resourcemanager.zk-address
cdhnode1:2181,cdhmaster:2181,cdhnode2:2181,cdhnode3:2181
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
yarn.client.failover-sleep-base-ms
100
yarn.client.failover-sleep-max-ms
2000
yarn.resourcemanager.cluster-id
yarnRM
yarn.resourcemanager.work-preserving-recovery.enabled
true
yarn.resourcemanager.ha.id
rm198
yarn.resourcemanager.address.rm198
cdhnode1:8032
yarn.resourcemanager.scheduler.address.rm198
cdhnode1:8030
yarn.resourcemanager.resource-tracker.address.rm198
cdhnode1:8031
yarn.resourcemanager.admin.address.rm198
cdhnode1:8033
yarn.resourcemanager.webapp.address.rm198
cdhnode1:8088
yarn.resourcemanager.webapp.https.address.rm198
cdhnode1:8090
yarn.resourcemanager.address.rm214
cdhnode3:8032
yarn.resourcemanager.scheduler.address.rm214
cdhnode3:8030
yarn.resourcemanager.resource-tracker.address.rm214
cdhnode3:8031
yarn.resourcemanager.admin.address.rm214
cdhnode3:8033
yarn.resourcemanager.webapp.address.rm214
cdhnode3:8088
yarn.resourcemanager.webapp.https.address.rm214
cdhnode3:8090
yarn.resourcemanager.ha.rm-ids
rm198,rm214
yarn.resourcemanager.proxy-user-privileges.enabled
true
yarn.resourcemanager.nodes.include-path
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_allow.txt
yarn.resourcemanager.nodes.exclude-path
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_exclude.txt
yarn.resourcemanager.client.thread-count
50
yarn.resourcemanager.scheduler.client.thread-count
50
yarn.resourcemanager.admin.client.thread-count
1
yarn.scheduler.minimum-allocation-mb
1024
yarn.scheduler.increment-allocation-mb
512
yarn.scheduler.maximum-allocation-mb
53931
yarn.scheduler.minimum-allocation-vcores
1
yarn.scheduler.increment-allocation-vcores
1
yarn.scheduler.maximum-allocation-vcores
24
yarn.resourcemanager.amliveliness-monitor.interval-ms
1000
yarn.am.liveness-monitor.expiry-interval-ms
600000
yarn.resourcemanager.am.max-attempts
2
yarn.resourcemanager.container.liveness-monitor.interval-ms
600000
yarn.resourcemanager.nm.liveness-monitor.interval-ms
1000
yarn.nm.liveness-monitor.expiry-interval-ms
600000
yarn.resourcemanager.resource-tracker.client.thread-count
50
yarn.application.classpath
$HADOOP_CLIENT_CONF_DIR,$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
yarn.resourcemanager.scheduler.class
org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
yarn.scheduler.fair.allow-undeclared-pools
true
yarn.scheduler.fair.user-as-default-queue
true
yarn.scheduler.fair.preemption
false
yarn.scheduler.fair.preemption.cluster-utilization-threshold
0.8
yarn.scheduler.fair.sizebasedweight
false
yarn.scheduler.fair.assignmultiple
true
yarn.scheduler.fair.continuous-scheduling-enabled
false
yarn.scheduler.fair.locality-delay-node-ms
2000
yarn.scheduler.fair.locality-delay-rack-ms
4000
yarn.scheduler.fair.continuous-scheduling-sleep-ms
5
yarn.resourcemanager.max-completed-applications
10000
yarn.resourcemanager.zk-timeout-ms
60000
yarn.application.classpath
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*