1
2
3
4
|
10,ACCOUNTING,NEW YORK
20,RESEARCH,DALLAS
30,SALES,CHICAGO
40,OPERATIONS,BOSTON
|
01
02
03
04
05
06
07
08
09
10
11
12
|
7369,SMITH,CLERK,7902,17-12月-80,800,,20
7499,ALLEN,SALESMAN,7698,20-2月-81,1600,300,30
7521,WARD,SALESMAN,7698,22-2月-81,1250,500,30
7566,JONES,MANAGER,7839,02-4月-81,2975,,20
7654,MARTIN,SALESMAN,7698,28-9月-81,1250,1400,30
7698,BLAKE,MANAGER,7839,01-5月-81,2850,,30
7782,CLARK,MANAGER,7839,09-6月-81,2450,,10
7839,KING,PRESIDENT,,17-11月-81,5000,,10
7844,TURNER,SALESMAN,7698,08-9月-81,1500,0,30
7900,JAMES,CLERK,7698,03-12月-81,950,,30
7902,FORD,ANALYST,7566,03-12月-81,3000,,20
7934,MILLER,CLERK,7782,23-1月-82,1300,,10
|
1
2
3
4
5
|
cd
/home/shiyanlou/install-pack/class6
hadoop fs -
mkdir
-p
/class6/input
hadoop fs -copyFromLocal dept
/class6/input
hadoop fs -copyFromLocal emp
/class6/input
hadoop fs -
ls
/class6/input
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
import
java.io.BufferedReader;
import
java.io.FileReader;
import
java.io.IOException;
import
java.util.HashMap;
import
java.util.Map;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.filecache.DistributedCache;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q1SumDeptSalary
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
// 用于缓存 dept文件中的数据
private
Map
new
HashMap
private
String[] kv;
// 此方法会在Map方法执行之前执行且执行一次
@Override
protected
void
setup(Context context)
throws
IOException, InterruptedException {
BufferedReader in =
null
;
try
{
// 从当前作业中获取要缓存的文件
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String deptIdName =
null
;
for
(Path path : paths) {
// 对部门文件字段进行拆分并缓存到deptMap中
if
(path.toString().contains(
"dept"
)) {
in =
new
BufferedReader(
new
FileReader(path.toString()));
while
(
null
!= (deptIdName = in.readLine())) {
// 对部门文件字段进行拆分并缓存到deptMap中
// 其中Map中key为部门编号,value为所在部门名称
deptMap.put(deptIdName.split(
","
)[
0
], deptIdName.split(
","
)[
1
]);
}
}
}
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
if
(in !=
null
) {
in.close();
}
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
kv = value.toString().split(
","
);
// map join: 在map阶段过滤掉不需要的数据,输出key为部门名称和value为员工工资
if
(deptMap.containsKey(kv[
7
])) {
if
(
null
!= kv[
5
] && !
""
.equals(kv[
5
].toString())) {
context.write(
new
Text(deptMap.get(kv[
7
].trim())),
new
Text(kv[
5
].trim()));
}
}
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(Text key, Iterable
throws
IOException, InterruptedException {
// 对同一部门的员工工资进行求和
long
sumSalary =
0
;
for
(Text val : values) {
sumSalary += Long.parseLong(val.toString());
}
// 输出key为部门名称和value为该部门员工工资总和
context.write(key,
new
LongWritable(sumSalary));
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称、Mapper和Reduce类
Job job =
new
Job(getConf(),
"Q1SumDeptSalary"
);
job.setJobName(
"Q1SumDeptSalary"
);
job.setJarByClass(Q1SumDeptSalary.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为缓存的部门数据路径、第2个参数为员工数据路径和第3个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
DistributedCache.addCacheFile(
new
Path(otherArgs[
0
]).toUri(), job.getConfiguration());
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
1
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
2
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q1SumDeptSalary(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
import
java.io.BufferedReader;
import
java.io.FileReader;
import
java.io.IOException;
import
java.util.HashMap;
import
java.util.Map;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.filecache.DistributedCache;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q2DeptNumberAveSalary
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
// 用于缓存 dept文件中的数据
private
Map
new
HashMap
private
String[] kv;
// 此方法会在Map方法执行之前执行且执行一次
@Override
protected
void
setup(Context context)
throws
IOException, InterruptedException {
BufferedReader in =
null
;
try
{
// 从当前作业中获取要缓存的文件
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String deptIdName =
null
;
for
(Path path : paths) {
// 对部门文件字段进行拆分并缓存到deptMap中
if
(path.toString().contains(
"dept"
)) {
in =
new
BufferedReader(
new
FileReader(path.toString()));
while
(
null
!= (deptIdName = in.readLine())) {
// 对部门文件字段进行拆分并缓存到deptMap中
// 其中Map中key为部门编号,value为所在部门名称
deptMap.put(deptIdName.split(
","
)[
0
], deptIdName.split(
","
)[
1
]);
}
}
}
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
if
(in !=
null
) {
in.close();
}
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
kv = value.toString().split(
","
);
// map join: 在map阶段过滤掉不需要的数据,输出key为部门名称和value为员工工资
if
(deptMap.containsKey(kv[
7
])) {
if
(
null
!= kv[
5
] && !
""
.equals(kv[
5
].toString())) {
context.write(
new
Text(deptMap.get(kv[
7
].trim())),
new
Text(kv[
5
].trim()));
}
}
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(Text key, Iterable
throws
IOException, InterruptedException {
long
sumSalary =
0
;
int
deptNumber =
0
;
// 对同一部门的员工工资进行求和
for
(Text val : values) {
sumSalary += Long.parseLong(val.toString());
deptNumber++;
}
// 输出key为部门名称和value为该部门员工工资平均值
context.write(key,
new
Text(
"Dept Number:"
+ deptNumber +
", Ave Salary:"
+ sumSalary / deptNumber));
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称、Mapper和Reduce类
Job job =
new
Job(getConf(),
"Q2DeptNumberAveSalary"
);
job.setJobName(
"Q2DeptNumberAveSalary"
);
job.setJarByClass(Q2DeptNumberAveSalary.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为缓存的部门数据路径、第2个参数为员工数据路径和第3个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
DistributedCache.addCacheFile(
new
Path(otherArgs[
0
]).toUri(), job.getConfiguration());
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
1
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
2
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q2DeptNumberAveSalary(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
import
java.io.BufferedReader;
import
java.io.FileReader;
import
java.io.IOException;
import
java.text.DateFormat;
import
java.text.ParseException;
import
java.text.SimpleDateFormat;
import
java.util.Date;
import
java.util.HashMap;
import
java.util.Map;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.filecache.DistributedCache;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q3DeptEarliestEmp
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
// 用于缓存 dept文件中的数据
private
Map
new
HashMap
private
String[] kv;
// 此方法会在Map方法执行之前执行且执行一次
@Override
protected
void
setup(Context context)
throws
IOException, InterruptedException {
BufferedReader in =
null
;
try
{
// 从当前作业中获取要缓存的文件
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String deptIdName =
null
;
for
(Path path : paths) {
if
(path.toString().contains(
"dept"
)) {
in =
new
BufferedReader(
new
FileReader(path.toString()));
while
(
null
!= (deptIdName = in.readLine())) {
// 对部门文件字段进行拆分并缓存到deptMap中
// 其中Map中key为部门编号,value为所在部门名称
deptMap.put(deptIdName.split(
","
)[
0
], deptIdName.split(
","
)[
1
]);
}
}
}
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
if
(in !=
null
) {
in.close();
}
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
kv = value.toString().split(
","
);
// map join: 在map阶段过滤掉不需要的数据
// 输出key为部门名称和value为员工姓名+","+员工进入公司日期
if
(deptMap.containsKey(kv[
7
])) {
if
(
null
!= kv[
4
] && !
""
.equals(kv[
4
].toString())) {
context.write(
new
Text(deptMap.get(kv[
7
].trim())),
new
Text(kv[
1
].trim() +
","
+ kv[
4
].trim()));
}
}
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(Text key, Iterable
throws
IOException, InterruptedException {
// 员工姓名和进入公司日期
String empName =
null
;
String empEnterDate =
null
;
// 设置日期转换格式和最早进入公司的员工、日期
DateFormat df =
new
SimpleDateFormat(
"dd-MM月-yy"
);
Date earliestDate =
new
Date();
String earliestEmp =
null
;
// 遍历该部门下所有员工,得到最早进入公司的员工信息
for
(Text val : values) {
empName = val.toString().split(
","
)[
0
];
empEnterDate = val.toString().split(
","
)[
1
].toString().trim();
try
{
System.out.println(df.parse(empEnterDate));
if
(df.parse(empEnterDate).compareTo(earliestDate) <
0
) {
earliestDate = df.parse(empEnterDate);
earliestEmp = empName;
}
}
catch
(ParseException e) {
e.printStackTrace();
}
}
// 输出key为部门名称和value为该部门最早进入公司员工
context.write(key,
new
Text(
"The earliest emp of dept:"
+ earliestEmp +
", Enter date:"
+
new
SimpleDateFormat(
"yyyy-MM-dd"
).format(earliestDate)));
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q3DeptEarliestEmp"
);
job.setJobName(
"Q3DeptEarliestEmp"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q3DeptEarliestEmp.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为缓存的部门数据路径、第2个参数为员工数据路径和第三个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
DistributedCache.addCacheFile(
new
Path(otherArgs[
0
]).toUri(), job.getConfiguration());
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
1
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
2
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q3DeptEarliestEmp(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
import
java.io.BufferedReader;
import
java.io.FileReader;
import
java.io.IOException;
import
java.util.HashMap;
import
java.util.Map;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.filecache.DistributedCache;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q4SumCitySalary
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
// 用于缓存 dept文件中的数据
private
Map
new
HashMap
private
String[] kv;
// 此方法会在Map方法执行之前执行且执行一次
@Override
protected
void
setup(Context context)
throws
IOException, InterruptedException {
BufferedReader in =
null
;
try
{
// 从当前作业中获取要缓存的文件
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String deptIdName =
null
;
for
(Path path : paths) {
if
(path.toString().contains(
"dept"
)) {
in =
new
BufferedReader(
new
FileReader(path.toString()));
while
(
null
!= (deptIdName = in.readLine())) {
// 对部门文件字段进行拆分并缓存到deptMap中
// 其中Map中key为部门编号,value为所在城市名称
deptMap.put(deptIdName.split(
","
)[
0
], deptIdName.split(
","
)[
2
]);
}
}
}
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
if
(in !=
null
) {
in.close();
}
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
kv = value.toString().split(
","
);
// map join: 在map阶段过滤掉不需要的数据,输出key为城市名称和value为员工工资
if
(deptMap.containsKey(kv[
7
])) {
if
(
null
!= kv[
5
] && !
""
.equals(kv[
5
].toString())) {
context.write(
new
Text(deptMap.get(kv[
7
].trim())),
new
Text(kv[
5
].trim()));
}
}
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(Text key, Iterable
throws
IOException, InterruptedException {
// 对同一城市的员工工资进行求和
long
sumSalary =
0
;
for
(Text val : values) {
sumSalary += Long.parseLong(val.toString());
}
// 输出key为城市名称和value为该城市工资总和
context.write(key,
new
LongWritable(sumSalary));
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q4SumCitySalary"
);
job.setJobName(
"Q4SumCitySalary"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q4SumCitySalary.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为缓存的部门数据路径、第2个参数为员工数据路径和第3个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
DistributedCache.addCacheFile(
new
Path(otherArgs[
0
]).toUri(), job.getConfiguration());
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
1
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
2
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q4SumCitySalary(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
|
import
java.io.IOException;
import
java.util.HashMap;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q5EarnMoreThanManager
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
String[] kv = value.toString().split(
","
);
// 输出经理表数据,其中key为员工编号和value为M+该员工工资
context.write(
new
Text(kv[
0
].toString()),
new
Text(
"M,"
+ kv[
5
]));
// 输出员工对应经理表数据,其中key为经理编号和value为(E,该员工姓名,该员工工资)
if
(
null
!= kv[
3
] && !
""
.equals(kv[
3
].toString())) {
context.write(
new
Text(kv[
3
].toString()),
new
Text(
"E,"
+ kv[
1
] +
","
+ kv[
5
]));
}
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(Text key, Iterable
throws
IOException, InterruptedException {
// 定义员工姓名、工资和存放部门员工Map
String empName;
long
empSalary =
0
;
HashMap
new
HashMap
// 定义经理工资变量
long
mgrSalary =
0
;
for
(Text val : values) {
if
(val.toString().startsWith(
"E"
)) {
// 当是员工标示时,获取该员工对应的姓名和工资并放入Map中
empName = val.toString().split(
","
)[
1
];
empSalary = Long.parseLong(val.toString().split(
","
)[
2
]);
empMap.put(empName, empSalary);
}
else
{
// 当时经理标志时,获取该经理工资
mgrSalary = Long.parseLong(val.toString().split(
","
)[
1
]);
}
}
// 遍历该经理下属,比较员工与经理工资高低,输出工资高于经理的员工
for
(java.util.Map.Entry
if
(entry.getValue() > mgrSalary) {
context.write(
new
Text(entry.getKey()),
new
Text(
""
+ entry.getValue()));
}
}
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q5EarnMoreThanManager"
);
job.setJobName(
"Q5EarnMoreThanManager"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q5EarnMoreThanManager.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为员工数据路径和第2个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
0
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
1
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q5EarnMoreThanManager(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
import
java.io.IOException;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q6HigherThanAveSalary
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
String[] kv = value.toString().split(
","
);
// 获取所有员工数据,其中key为0和value为该员工工资
context.write(
new
IntWritable(
0
),
new
Text(kv[
5
]));
// 获取所有员工数据,其中key为0和value为(该员工姓名 ,员工工资)
context.write(
new
IntWritable(
1
),
new
Text(kv[
1
] +
","
+ kv[
5
]));
}
}
public
static
class
Reduce
extends
Reducer
// 定义员工工资、员工数和平均工资
private
long
allSalary =
0
;
private
int
allEmpCount =
0
;
private
long
aveSalary =
0
;
// 定义员工工资变量
private
long
empSalary =
0
;
public
void
reduce(IntWritable key, Iterable
throws
IOException, InterruptedException {
for
(Text val : values) {
if
(
0
== key.get()) {
// 获取所有员工工资和员工数
allSalary += Long.parseLong(val.toString());
allEmpCount++;
System.out.println(
"allEmpCount = "
+ allEmpCount);
}
else
if
(
1
== key.get()) {
if
(aveSalary ==
0
) {
aveSalary = allSalary / allEmpCount;
context.write(
new
Text(
"Average Salary = "
),
new
Text(
""
+ aveSalary));
context.write(
new
Text(
"Following employees have salarys higher than Average:"
),
new
Text(
""
));
}
// 获取员工的平均工资
System.out.println(
"Employee salary = "
+ val.toString());
aveSalary = allSalary / allEmpCount;
// 比较员工与平均工资的大小,输出比平均工资高的员工和对应的工资
empSalary = Long.parseLong(val.toString().split(
","
)[
1
]);
if
(empSalary > aveSalary) {
context.write(
new
Text(val.toString().split(
","
)[
0
]),
new
Text(
""
+ empSalary));
}
}
}
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q6HigherThanAveSalary"
);
job.setJobName(
"Q6HigherThanAveSalary"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q6HigherThanAveSalary.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
// 必须设置Reduce任务数为1 # -D mapred.reduce.tasks = 1
// 这是该作业设置的核心,这样才能够保证各reduce是串行的
job.setNumReduceTasks(
1
);
// 设置输出格式类
job.setMapOutputKeyClass(IntWritable.
class
);
job.setMapOutputValueClass(Text.
class
);
// 设置输出键和值类型
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(LongWritable.
class
);
// 第1个参数为员工数据路径和第2个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
0
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
1
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q6HigherThanAveSalary(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
import
java.io.BufferedReader;
import
java.io.FileReader;
import
java.io.IOException;
import
java.util.HashMap;
import
java.util.Map;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.filecache.DistributedCache;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q7NameDeptOfStartJ
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
// 用于缓存 dept文件中的数据
private
Map
new
HashMap
private
String[] kv;
// 此方法会在Map方法执行之前执行且执行一次
@Override
protected
void
setup(Context context)
throws
IOException, InterruptedException {
BufferedReader in =
null
;
try
{
// 从当前作业中获取要缓存的文件
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String deptIdName =
null
;
for
(Path path : paths) {
// 对部门文件字段进行拆分并缓存到deptMap中
if
(path.toString().contains(
"dept"
)) {
in =
new
BufferedReader(
new
FileReader(path.toString()));
while
(
null
!= (deptIdName = in.readLine())) {
// 对部门文件字段进行拆分并缓存到deptMap中
// 其中Map中key为部门编号,value为所在部门名称
deptMap.put(deptIdName.split(
","
)[
0
], deptIdName.split(
","
)[
1
]);
}
}
}
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
if
(in !=
null
) {
in.close();
}
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
kv = value.toString().split(
","
);
// 输出员工姓名为J开头的员工信息,key为员工姓名和value为员工所在部门名称
if
(kv[
1
].toString().trim().startsWith(
"J"
)) {
context.write(
new
Text(kv[
1
].trim()),
new
Text(deptMap.get(kv[
7
].trim())));
}
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q7NameDeptOfStartJ"
);
job.setJobName(
"Q7NameDeptOfStartJ"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q7NameDeptOfStartJ.
class
);
job.setMapperClass(MapClass.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为缓存的部门数据路径、第2个参数为员工数据路径和第3个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
DistributedCache.addCacheFile(
new
Path(otherArgs[
0
]).toUri(), job.getConfiguration());
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
1
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
2
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q7NameDeptOfStartJ(), args);
System.exit(res);
}
}
|
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
import
java.io.IOException;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q8SalaryTop3Salary
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
String[] kv = value.toString().split(
","
);
// 输出key为0和value为员工姓名+","+员工工资
context.write(
new
IntWritable(
0
),
new
Text(kv[
1
].trim() +
","
+ kv[
5
].trim()));
}
}
public
static
class
Reduce
extends
Reducer
public
void
reduce(IntWritable key, Iterable
throws
IOException, InterruptedException {
// 定义工资前三员工姓名
String empName;
String firstEmpName =
""
;
String secondEmpName =
""
;
String thirdEmpName =
""
;
// 定义工资前三工资
long
empSalary =
0
;
long
firstEmpSalary =
0
;
long
secondEmpSalary =
0
;
long
thirdEmpSalary =
0
;
// 通过冒泡法遍历所有员工,比较员工工资多少,求出前三名
for
(Text val : values) {
empName = val.toString().split(
","
)[
0
];
empSalary = Long.parseLong(val.toString().split(
","
)[
1
]);
if
(empSalary > firstEmpSalary) {
thirdEmpName = secondEmpName;
thirdEmpSalary = secondEmpSalary;
secondEmpName = firstEmpName;
secondEmpSalary = firstEmpSalary;
firstEmpName = empName;
firstEmpSalary = empSalary;
}
else
if
(empSalary > secondEmpSalary) {
thirdEmpName = secondEmpName;
thirdEmpSalary = secondEmpSalary;
secondEmpName = empName;
secondEmpSalary = empSalary;
}
else
if
(empSalary > thirdEmpSalary) {
thirdEmpName = empName;
thirdEmpSalary = empSalary;
}
}
// 输出工资前三名信息
context.write(
new
Text(
"First employee name:"
+ firstEmpName),
new
Text(
"Salary:"
+ firstEmpSalary));
context.write(
new
Text(
"Second employee name:"
+ secondEmpName),
new
Text(
"Salary:"
+ secondEmpSalary));
context.write(
new
Text(
"Third employee name:"
+ thirdEmpName),
new
Text(
"Salary:"
+ thirdEmpSalary));
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q8SalaryTop3Salary"
);
job.setJobName(
"Q8SalaryTop3Salary"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q8SalaryTop3Salary.
class
);
job.setMapperClass(MapClass.
class
);
job.setReducerClass(Reduce.
class
);
job.setMapOutputKeyClass(IntWritable.
class
);
job.setMapOutputValueClass(Text.
class
);
// 设置输入格式类
job.setInputFormatClass(TextInputFormat.
class
);
// 设置输出格式类
job.setOutputKeyClass(Text.
class
);
job.setOutputFormatClass(TextOutputFormat.
class
);
job.setOutputValueClass(Text.
class
);
// 第1个参数为员工数据路径和第2个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
0
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
1
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q8SalaryTop3Salary(), args);
System.exit(res);
}
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
import
java.io.IOException;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.io.WritableComparable;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
import
org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public
class
Q9EmpSalarySort
extends
Configured
implements
Tool {
public
static
class
MapClass
extends
Mapper
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
// 对员工文件字段进行拆分
String[] kv = value.toString().split(
","
);
// 输出key为员工所有工资和value为员工姓名
int
empAllSalary =
""
.equals(kv[
6
]) ? Integer.parseInt(kv[
5
]) : Integer.parseInt(kv[
5
]) + Integer.parseInt(kv[
6
]);
context.write(
new
IntWritable(empAllSalary),
new
Text(kv[
1
]));
}
}
/**
* 递减排序算法
*/
public
static
class
DecreaseComparator
extends
IntWritable.Comparator {
public
int
compare(WritableComparable a, WritableComparable b) {
return
-
super
.compare(a, b);
}
public
int
compare(
byte
[] b1,
int
s1,
int
l1,
byte
[] b2,
int
s2,
int
l2) {
return
-
super
.compare(b1, s1, l1, b2, s2, l2);
}
}
@Override
public
int
run(String[] args)
throws
Exception {
// 实例化作业对象,设置作业名称
Job job =
new
Job(getConf(),
"Q9EmpSalarySort"
);
job.setJobName(
"Q9EmpSalarySort"
);
// 设置Mapper和Reduce类
job.setJarByClass(Q9EmpSalarySort.
class
);
job.setMapperClass(MapClass.
class
);
// 设置输出格式类
job.setMapOutputKeyClass(IntWritable.
class
);
job.setMapOutputValueClass(Text.
class
);
job.setSortComparatorClass(DecreaseComparator.
class
);
// 第1个参数为员工数据路径和第2个参数为输出路径
String[] otherArgs =
new
GenericOptionsParser(job.getConfiguration(), args).getRemainingArgs();
FileInputFormat.addInputPath(job,
new
Path(otherArgs[
0
]));
FileOutputFormat.setOutputPath(job,
new
Path(otherArgs[
1
]));
job.waitForCompletion(
true
);
return
job.isSuccessful() ?
0
:
1
;
}
/**
* 主方法,执行入口
* @param args 输入参数
*/
public
static
void
main(String[] args)
throws
Exception {
int
res = ToolRunner.run(
new
Configuration(),
new
Q9EmpSalarySort(), args);
System.exit(res);
}
}
|