这题是网上找的,如果做的不对,请大家指正。
1 使用Hive或者自定义MR实现如下逻辑
product_no lac_id moment start_time user_id county_id staytime city_id
13429100031 22554 8 2013-03-11 08:55:19.151754088 571 571 282 571
13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571
13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 103 571
13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571
13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571
13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571
13429100140 26642 9 2013-03-11 09:02:19.151754088 571 571 18 571
13429100082 22691 8 2013-03-11 08:57:32.151754088 571 571 287 571
13429100189 22558 8 2013-03-11 08:56:24.139539816 571 571 48 571
13429100349 22503 8 2013-03-11 08:54:30.152622440 571 571 211 571
字段解释:
product_no:用户手机号;
lac_id:用户所在基站;
start_time:用户在此基站的开始时间;
staytime:用户在此基站的逗留时间。
需求描述:
根据lac_id和start_time知道用户当时的位置,根据staytime知道用户各个基站的逗留时长。根据轨迹合并连续基站的staytime。
最终得到每一个用户按时间排序在每一个基站驻留时长
期望输出举例:
13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571
13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 390 571
13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571
13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571
13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571
说说我的思路:先按照TextInputFormat进行map,在map函数中再对每一行处理将手机号作为map的outputkey,行内容为outputvalue。在reduce的是按照时间排序。
- package hadoop;
-
- import java.io.IOException;
- import java.net.URI;
- import java.net.URISyntaxException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.List;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FSDataOutputStream;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
- public class HadoopTest1
- {
- public static String split = " +|\t";
-
- public static class MyComarator implements Comparator
- {
- @Override
- public int compare(Object o1, Object o2)
- {
-
- String str1 = (String)o1;
- String str2 = (String)o2;
-
- String []arr1 = str1.split(split);
- String []arr2 = str2.split(split);
-
- return (arr1[3] + arr1[4]).compareTo((arr2[3] + arr2[4]));
- }
- }
-
- public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>
- {
- public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
- {
- if (key.equals(new LongWritable(0)))
- {
- return;
- }
- String line = value.toString();
- String[] elements = line.split(split);
- context.write(new Text(elements[0]), value);
- }
- }
- public static class MyReducer extends Reducer<Text, Text, NullWritable, Text>
- {
- public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
- {
- List<String>list = new ArrayList<String>();
-
- for (Text v : values)
- {
- list.add(v.toString());
- }
-
- list.sort(new MyComarator());
- Collections.reverse(list);
-
- for (int i =0; i < list.size(); ++i)
- {
- context.write(NullWritable.get(), new Text(list.get(i)));
- }
- }
- }
-
- public static void main(String[] args)
- {
- String HDFS_PATH = "hdfs://master:9000";
- String INPUT_PATH = "/home/hadoop/hadoop-data/20150721/input";
- String OUTT_PATH = "/home/hadoop/hadoop-data/20150721/output";
-
- try
- {
- FileSystem fs = FileSystem.get(new URI(HDFS_PATH), new Configuration());
- FSDataOutputStream out = fs.create(new Path(HDFS_PATH + INPUT_PATH + "/text"));
- String text = "product_no lac_id moment start_time user_id county_id staytime city_id\n"
- + "13429100031 22554 8 2013-03-11 08:55:19.151754088 571 571 282 571\n"
- + "13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571\n"
- + "13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 103 571\n"
- + "13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571\n"
- + "13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571\n"
- + "13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571\n"
- + "13429100140 26642 9 2013-03-11 09:02:19.151754088 571 571 18 571\n"
- + "13429100082 22691 8 2013-03-11 08:57:32.151754088 571 571 287 571\n"
- + "13429100189 22558 8 2013-03-11 08:56:24.139539816 571 571 48 571\n"
- + "13429100349 22503 8 2013-03-11 08:54:30.152622440 571 571 211 571";
- out.write(text.getBytes());
- out.close();
-
- Job job = new Job(new Configuration(), "HadoopTest1");
- job.setJarByClass(HadoopTest1.class);
-
- job.setMapperClass(MyMapper.class);
- job.setReducerClass(MyReducer.class);
-
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setOutputKeyClass(NullWritable.class);
- job.setOutputValueClass(Text.class);
-
- if (fs.exists(new Path(HDFS_PATH + OUTT_PATH)))
- {
- fs.delete(new Path(HDFS_PATH + OUTT_PATH), true);
- }
-
- TextInputFormat.addInputPath(job, new Path(HDFS_PATH + INPUT_PATH));
- FileOutputFormat.setOutputPath(job, new Path(HDFS_PATH + OUTT_PATH));
-
- job.waitForCompletion(true);
-
- }
- catch (URISyntaxException e)
- {
- e.printStackTrace();
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- catch (ClassNotFoundException e)
- {
- e.printStackTrace();
- }
- catch (InterruptedException e)
- {
- e.printStackTrace();
- }
- }
- }
最后的输出结果:
- 13429100031 22554 8 2013-03-11 08:55:19.151754088 571 571 282 571
- 13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571
- 13429100082 22691 8 2013-03-11 08:57:32.151754088 571 571 287 571
- 13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 103 571
- 13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571
- 13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571
- 13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571
- 13429100140 26642 9 2013-03-11 09:02:19.151754088 571 571 18 571
- 13429100189 22558 8 2013-03-11 08:56:24.139539816 571 571 48 571
- 13429100349 22503 8 2013-03-11 08:54:30.152622440 571 571 211 571
如有不对的地方,还请大家指教。