参考:[Hadoop](一)hadoop安装:环境准备
参考:[Hadoop](二)hadoop安装:安装和配置
参考:[Hadoop](三) Helloworld:WordCont
旧API
public class WordCountByOldAPI {
private static class Map extends MapReduceBase implements Mapper {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
private static class Reduce extends MapReduceBase implements Reducer {
@Override
public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
JobConf conf = new JobConf(WordCountByOldAPI.class);
conf.setJobName("wordCount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
结果:
[hadoop@master deploy]$ hadoop jar hello-hadoop-1.0-SNAPSHOT.jar com.demo.hellohadoop.mapreduce.WordCountByOldAPI /tmp/input /tmp/wordcount01
...
[hadoop@master deploy]$ hdfs dfs -cat /tmp/wordcount01/part-00000
bye 1
goodbye 1
hadoop 2
hello 2
world 2
新API
public class WordCountByNewAPI {
public static class Map extends Mapper {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "wordCount");
job.setJarByClass(WordCountByNewAPI.class);
job.setMapperClass(WordCountByNewAPI.Map.class);
job.setReducerClass(WordCountByNewAPI.Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果:
[hadoop@master deploy]$ hadoop jar hello-hadoop-1.0-SNAPSHOT.jar com.demo.hellohadoop.mapreduce.WordCountByNewAPI /tmp/input /tmp/wordcount02
...
[hadoop@master deploy]$ hdfs dfs -cat /tmp/wordcount02/part-r-00000
bye 1
goodbye 1
hadoop 2
hello 2
world 2
参考:[Hadoop]HDFS:java API
[hadoop@master software]$ dd if=/dev/zero of=test bs=1M count=200
[hadoop@master software]$ hdfs dfs -put ./test /tmp
参考:[Hadoop]HDFS:java API
制造一个包含1~100的文件
[hadoop@master deploy]$ for i in {1..100};do echo $i;done >> seq.txt
写mapreduce任务,每个数乘以2
public class MapReduceTest01 {
public static class Map extends Mapper {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
Integer number = new Integer(value.toString());
IntWritable one = new IntWritable(number);
IntWritable two = new IntWritable(number * 2);
context.write(one,two);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
// conf.set("mapreduce.cluster.local.dir","");
Job job = Job.getInstance(conf, "test01");
job.setJarByClass(MapReduceTest01.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
Path input = new Path("hdfs://192.168.236.135:9000/tmp/seq.txt");
Path output = new Path("hdfs://192.168.236.135:9000/tmp/test01");
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
System.exit(job.waitForCompletion(true) ? 0: 1);
}
}
[hadoop@master deploy]$ while true;do echo $RANDOM;done >> numbers.txt
参考:[Hive]安装和配置
[hadoop@master lahman2012-csv]$ less Fielding.csv
playerID,yearID,stint,teamID,lgID,POS,G,GS,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
aardsda01,2004,1,SFN,NL,P,11,0,32,0,0,0,0,,,,,
aardsda01,2006,1,CHN,NL,P,45,0,159,1,5,0,1,,,,,
aardsda01,2007,1,CHA,AL,P,25,0,97,2,4,1,0,,,,,
aardsda01,2008,1,BOS,AL,P,47,0,146,3,6,0,0,,,,,
aardsda01,2009,1,SEA,AL,P,73,0,214,2,5,0,1,0,,0,0,
aardsda01,2010,1,SEA,AL,P,53,0,149,2,3,1,0,0,,0,0,
aardsda01,2012,1,NYA,AL,P,1,0,3,0,0,0,0,,,,,
aaronha01,1954,1,ML1,NL,LF,105,102,2773,205,4,6,0,,,,,
aaronha01,1954,1,ML1,NL,OF,116,113,3093,223,5,7,0,,,,,
aaronha01,1954,1,ML1,NL,RF,11,11,320,12,1,1,1,,,,,
create table Fielding(
playerID string,
yearID int,
stint int,
teamID string,
lgID string,
POS string,
G string,
GS string,
InnOuts string,
PO string,
A int,
E int,
DP int,
PB int,
WP int,
SB int,
CS int,
ZR int
)
row format delimited fields terminated by ','
stored as textfile;
hive> load data local inpath '/home/hadoop/software/packages/lahman2012-csv-hive/lahman2012-csv/Fielding.csv' into table dw.fielding;
参考: Nginx安装和配置
[hadoop@master logs]$ less test1.com.access.log
127.0.0.1 - 11/Jul/2019:14:38:19 +0800 GET / HTTP/1.1 200 146 - Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0 -
127.0.0.1 - 11/Jul/2019:14:43:56 +0800 GET /index HTTP/1.1 201 2 - curl/7.29.0 -
127.0.0.1 - 11/Jul/2019:14:43:56 +0800 GET /index HTTP/1.1 201 2 - curl/7.29.0 -
...
create table nginx_log(
remote_addr string,
remote_user string,
time_local string,
request string,
status int,
body_bytes_sent int,
http_referer string,
http_user_agent string,
http_x_forwarded_for string
)
row format delimited
fields terminated by '\t'
stored as textfile;
hive> load data local inpath '/opt/nginx/logs/test1.com.access.log' into table nginx_log;
hive> select * from nginx_log limit 10;
OK
127.0.0.1 - 11/Jul/2019:14:38:19 +0800 GET / HTTP/1.1 200 146 - Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0 - NULL NULL NULL NULL NULL NULL NULL NULL
127.0.0.1 - 11/Jul/2019:14:43:56 +0800 GET /index HTTP/1.1 201 2 - curl/7.29.0 - NULL NULL NULL NULL NULL NULL NULL NULL
...
参考:[sqoop]安装和使用
参考: [zookeeper]安装和配置