CREATE TABLE `sports` (
`id` int(10) NOT NULL AUTO_INCREMENT COMMENT 'id',
`phone` varchar(20) COLLATE utf8_bin DEFAULT NULL COMMENT '手机号',
`deviceID` varchar(50) COLLATE utf8_bin DEFAULT NULL COMMENT '设备编号',
`dataType` varchar(50) COLLATE utf8_bin DEFAULT NULL COMMENT '数据类型',
`appType` varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '应用类型',
`pname` varchar(50) COLLATE utf8_bin DEFAULT NULL COMMENT '用户姓名',
`sendFlag` char(1) COLLATE utf8_bin DEFAULT '0' COMMENT '发送标识',
`receiveTime` varchar(20) COLLATE utf8_bin DEFAULT NULL COMMENT '接收时间',
`realTime` varchar(20) COLLATE utf8_bin DEFAULT NULL COMMENT '数据真实时间',
`sendTime` varchar(20) COLLATE utf8_bin DEFAULT NULL COMMENT '转发时间',
`deviceType` varchar(50) COLLATE utf8_bin DEFAULT NULL COMMENT '设备类型:手机计步为 PHONE',
`dataValue` varchar(1500) COLLATE utf8_bin DEFAULT NULL COMMENT '数据内容',
`AppA_flag` char(1) COLLATE utf8_bin DEFAULT '0' COMMENT 'AppA发送标识',
`AppB_flag` char(1) COLLATE utf8_bin DEFAULT '0' COMMENT 'AppB发送标识',
`AppC_flag` char(1) COLLATE utf8_bin DEFAULT '0' COMMENT 'AppC发送标识',
`AppD_flag` char(1) COLLATE utf8_bin DEFAULT '0' COMMENT 'AppD发送标识',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=191 DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
sqoop import --connect jdbc:mysql://192.168.106.1:3306/aggregate --username root --password root --table sports --target-dir '/aggregate/sports/' --fields-terminated-by '\t'
* 数据格式如下:
355 p898 0526898 stepCount AppA;AppB;AppC;AppD No8-1 0 2018-07-01 09:33:00
2018-07-01 09:33:00 2018-07-01 09:53:06 null
[{"stepSum":"526899000"},{"calSum":"526899000"},
{"distanceSum":"3161394"},{"yxbsSum":"0"},{"weight":"70"},
{"stride":"70"},{"degreeOne":"52220"},{"degreeTwo":"52220"},
{"degreeThree":"52220"},{"degreeFour":"52220"},{"uploadType":"1"},
{"measureTime":"2018-07-01 09:33:00"}]
1 0 0 0
package com.bsr.emr.hadoop.batchImport;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import com.bsr.emr.hadoop.util.HadoopDateUtil;
import com.bsr.emr.hadoop.util.JsonUtil;
import com.google.gson.JsonArray;
import com.google.gson.JsonParser;
import net.sf.json.JSONArray;
/*
* sqoop import --connect jdbc:mysql://192.168.106.1:3306/aggregate --username root --password root --table sports --target-dir '/aggregate/sports/' --fields-terminated-by '\t'
* 数据格式如下:
355 p898 0526898 stepCount AppA;AppB;AppC;AppD No8-1 0 2018-07-01 09:33:00
2018-07-01 09:33:00 2018-07-01 09:53:06 null
[{"stepSum":"526899000"},{"calSum":"526899000"},
{"distanceSum":"3161394"},{"yxbsSum":"0"},{"weight":"70"},
{"stride":"70"},{"degreeOne":"52220"},{"degreeTwo":"52220"},
{"degreeThree":"52220"},{"degreeFour":"52220"},{"uploadType":"1"},
{"measureTime":"2018-07-01 09:33:00"}]
1 0 0 0
*
*/
public class SportAnalysis {
static class SportAnalysisMapper extends Mapper {
Text rowData = new Text();
protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
final String[] split = value.toString().split("\t");
try {
String stepSum = "";
String phone = split[2].trim();
String deviceId = split[3].trim();
String dataType = split[4].trim();
String time = split[7].trim();
String receiveTime = HadoopDateUtil.dateToString(time);
String rowKey = phone + "_" + deviceId + "_" + dataType + "_" + receiveTime;
String stepData = split[11].trim();
if ("stepCount".equals(split[3].trim())) {
JSONArray ja = JSONArray.fromObject(stepData);
stepSum = JsonUtil.getJsonParamterString(ja.getJSONObject(0), "stepSum");
rowData.set(rowKey + "\t" + value.toString() + "\t" + stepSum);
System.out.println(rowData);
context.write(key, rowData);
}
} catch (Exception e) {
final Counter counter = context.getCounter("SimpleDataImport", "ErrorFormat");
counter.increment(1L);
System.out.println("出错了" + split[0] + " " + e.getMessage());
}
};
}
/**
* obsDatetime,value,conceptId,patientId ,conceptName
*
* @author ulove
*
*/
static class SportAnalysisReducer extends TableReducer {
protected void reduce(LongWritable key, java.lang.Iterable values, Context context)
throws java.io.IOException, InterruptedException {
for (Text text : values) {
final String[] splited = text.toString().split("\t");
final Put put = new Put(Bytes.toBytes(splited[0]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("Id"), Bytes.toBytes(splited[1]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("phone"), Bytes.toBytes(splited[2]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("deviceId"), Bytes.toBytes(splited[3]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("dataType"), Bytes.toBytes(splited[4]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("appType"), Bytes.toBytes(splited[5]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("pname"), Bytes.toBytes(splited[6]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("sendFlag"), Bytes.toBytes(splited[7]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("receiveTime"), Bytes.toBytes(splited[8]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("realTime"), Bytes.toBytes(splited[9]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("sendTime"), Bytes.toBytes(splited[10]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("deviceType"), Bytes.toBytes(splited[11]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("dataValue"), Bytes.toBytes(splited[12]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("AppA_flag"), Bytes.toBytes(splited[13]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("AppB_flag"), Bytes.toBytes(splited[14]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("AppC_flag"), Bytes.toBytes(splited[15]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("AppD_flag"), Bytes.toBytes(splited[16]));
put.add(Bytes.toBytes("sort"), Bytes.toBytes("stepSum"), Bytes.toBytes(splited[17]));
context.write(NullWritable.get(), put);
}
};
}
public static void main(String[] args) throws Exception {
final Configuration configuration = new Configuration();
// 设置zookeeper
configuration.set("hbase.zookeeper.quorum", "hadoop");
// 2 设置hbase表名称
configuration.set(TableOutputFormat.OUTPUT_TABLE, "sportsAnalysis");
// 3. 将该值改大,防止client 连接ZK 超时退出s
configuration.set("dfs.socket.timeout", "180000");
final Job job = new Job(configuration, "SportAnalysis");
job.setMapperClass(SportAnalysisMapper.class);
job.setReducerClass(SportAnalysisReducer.class);
// 设置map的输出,不设置reduce的输出类型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
// 不再设置输出路径,而是设置输出格式类型s
job.setOutputFormatClass(TableOutputFormat.class);
FileInputFormat.setInputPaths(job, "hdfs://192.168.106.111:9000/aggregate/sports");
job.waitForCompletion(true);
}
}