MAC下hadoop开发环境搭建系列(五)


MAC下hadoop开发环境搭建系列(五)_第1张图片
 

分享一个mapreduce程序,作用:将hdfs文件数据批量加载进redis内存数据库:

1.源代码:

 

/**
* Program:
* The program is used to batch load data to redis by Jedis.
* History:
* Created by Qingshou Chen on 15/11/13.
*/

package com.asiainfo.bdcenter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.HostAndPort;
import redis.clients.jedis.JedisCluster;

import java.io.IOException;
import java.util.*;

public class BatchToRedis extends Configured implements Tool{

/**
* 继承Configured,implements Tool 可以方便读取命令行-conf -D 配置信息和加载其他配置文件(通过addResource方法)
*/

/**
* 日志处理
*/
private static Logger logger = LoggerFactory.getLogger(BatchToRedis.class);
public int run(String[] args) throws Exception{

/**
* 检查调用参数是否正确
*/
if (args.length != 0){
System.err.println("Usage:hadoop jar BatchToRedis.jar");
System.err.println("Attention:Please set parameters at config.xml in the same path of BatchToRedis.jar");
System.exit(-1);
}

/**
* 初始化配置 config.xml在src目录下,执行jar包时需要将该文件跟jar包放在同一个目录下
*/
Configuration conf = getConf();
conf.addResource(new Path("./config.xml"));

/*Date date = new Date();
DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String sdf = df.format(date);*/
String jobname = "BatchToRedis:"+conf.get("redis.table.name");
logger.info("***************JOB:"+jobname+" START***************");
logger.info("****This Program is used by batch load data to redis cluster!****");
logger.info("****The result is set hashkeys like:hmset table_name:key fieldname1 fieldvalue1 [fieldnameN fieldvalueN]****");
logger.info("***************"+"INPUT PATH:"+conf.get("input.path")+"***************");
logger.info("***************"+"DATA FIELD NAME:"+conf.get("data.field.name")+"***************");
logger.info("***************"+"DATA FIELD SPLIT:"+conf.get("data.field.split")+"***************");
logger.info("***************"+"REDIS CLUSTER:"+conf.get("redis.cluster.node.list")+"***************");

/**
* SET JOB
*/
Job job = Job.getInstance(conf,jobname);
job.setJarByClass(BatchToRedis.class);
FileInputFormat.addInputPath(job, new Path(conf.get("input.path")));
FileInputFormat.setMinInputSplitSize(job,1);
FileInputFormat.setMaxInputSplitSize(job,Long.parseLong(conf.get("map.split.size"))*1024*1024);
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(BatchToRedisMapper.class);
job.setReducerClass(BatchToRedisReducer.class);
job.setNumReduceTasks(0);
// job.setOutputKeyClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(NullOutputFormat.class);

/**
* 显示job执行时间和结果
*/
long currentTime = System.currentTimeMillis();
boolean success = job.waitForCompletion(true);
logger.info("***************Job Escape: " + StringUtils.formatTimeDiff(System.currentTimeMillis(), currentTime)+"***************");
if (!success)

{
logger.info("***************JOB FAILED***************");
return 1;
}

logger.info("***************JOB END SUCCESSFULL***************");
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new BatchToRedis(), args);
System.exit(exitCode);

}

/**
* map
*/
public static class BatchToRedisMapper extends Mapper<LongWritable, Text, Text, Text> {

private JedisCluster jc;

protected void setup(Context context) throws IOException{

/**
* 连接redis集群
*/
Set<HostAndPort> jedisClusterNodes = new HashSet<HostAndPort>();
Configuration conf = context.getConfiguration();
String[] redisnodes = conf.get("redis.cluster.node.list").split(",");
for(String redisnode : redisnodes){
jedisClusterNodes.add(new HostAndPort(redisnode.split(":")[0],Integer.parseInt(redisnode.split(":")[1])));
}
this.jc = new JedisCluster(jedisClusterNodes);
}
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line = value.toString();
Configuration conf = context.getConfiguration();
String[] fields = line.split(conf.get("data.field.split"));
String[] fieldnames = conf.get("data.field.name").split(",");
Map<String, String> map = new HashMap<String, String>();

/**
* 设置主键 table_name+主键属性(需放在数据文件第一列)
*/
String hashkey = conf.get("redis.table.name") + ":" + fields[0];

/**
* 设置其他属性
*/
for(int i = 1;i<fields.length;i++) {
map.put(fieldnames[i], fields[i]);
}

jc.del(hashkey);
jc.hmset(hashkey, map);
// System.out.println(jc.hget(hashkey,"name"));

}

protected void cleanup(Context context){
/**
* 关闭redis集群
*/
jc.close();
}
}

/**
* reduce
*/
public static class BatchToRedisReducer extends Reducer<Text, Text, Text, IntWritable> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

System.out.println("reduce execute!!!!");
}
}

}

 

2.配置文件,config.xml:

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
<!-- 配置map切片大小 单位 MB-->
<property>
<name>map.split.size</name>
<value>32</value>
</property>
<!-- txt文件(数据文件)的路径(HDFS)-->
<property>
<name>input.path</name>
<value>/bdcenter/tmp/test/in</value>
</property>
<!--数据文件字段名,用逗号分隔,注意:主键放第一列! -->
<property>
<name>data.field.name</name>
<value>bill_id,name,age,sex</value>
</property>
<!--数据文件字段分隔符 -->
<property>
<name>data.field.split</name>
<value>,</value>
</property>
<!-- redis数据库中hash 主键中所放的表名-->
<property>
<name>redis.table.name</name>
<value>dw_user</value>
</property>

<!-- Redis cluster node list -->
<property>
<name>redis.cluster.node.list</name>
<value>10.192.168.74:6379,10.192.168.75:6379,10.192.168.76:6379,10.192.168.77:6379,10.192.168.78:6379,10.192.168.79:6379,10.192.168.80:6379,10.192.168.81:6379,10.192.168.82:6379,10.192.168.83:6379</value>
</property>
</configuration>

 

 

 

纪录实际操作过程                                 

内容在个人公众号mangrendd同步更新

你可能感兴趣的:(redis,mapreduce,hadoop,jedis,config)