MapReduce API

MapJoin

MapJoin
package a.b.c;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.httpclient.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



/*
 * 
table1
name	 sex	    age	   depNo
zhang,male,20,1
li,female,25,2
wang,female,30,3
zhou,male,35,2

table2
depNo	depName
1,sales
2,Dev
3,Mgt
*/


//对两个表进行连接,部门表数据量较少,员工表数据量较多,因此我们将table2进行缓存,然后进行每个map都可以在每个节点内存直接拿到数据,直接进行处理

class MapJoinMapper extends Mapper{
    private Map tables=new HashMap();
    @Override
    protected void setup(Context context)
    		throws IOException, InterruptedException {
    		super.setup(context);
    		if(context.getCacheFiles()!=null && context.getCacheFiles().length>0){
    			@SuppressWarnings("deprecation")
				Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());  
                // 我们这里只缓存了一个文件,所以取第一个即可,创建BufferReader去读取  
                BufferedReader reader = new BufferedReader(new FileReader(paths[0].toString()));
    			String s;
    			while((s=reader.readLine())!=null){
    				String[] wordsString=s.split(",");
    				int deptNo=Integer.parseInt(wordsString[0]);
    				String DeptName=wordsString[1];
    				tables.put(deptNo+"", DeptName);
    			}
    			reader.close();
    		}
    }

    
	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
        String inputPath = ((FileSplit) context.getInputSplit()).toString();
        String line = value.toString();
        if (line == null || line.equals("")) {
            return;
        }
        //获取table表
        if (inputPath.contains("table1")) {
        	//按照,对数据进行切分
            String[] words = line.split(",");
            //获取员工姓名
            String empName = words[0];
            //获取部门编号
            String deptNo=words[3];
            
            int deptNo1=Integer.parseInt(words[3]);
            //根据tables获取deptNo获取deptName
            String DeptName=tables.get(deptNo);
            context.write(new IntWritable(deptNo1),new Text(deptNo+" "+DeptName+" "+empName));
        }	
	}    
}

class MapJoinReduce extends Reducer {
    @Override
    protected void reduce(IntWritable keys, Iterable values, Context context) throws IOException, InterruptedException {
    	for (Text value:values)	{
    	context.write(keys, value);
    	}
    }
}



public class MapJoin {
	 public static void main(String[] args) throws Exception {
	        Configuration conf = new Configuration();
	        Job job = Job.getInstance(conf, "MapReduce");
	        job.setJarByClass(MapJoin.class);


	        job.setMapperClass(MapJoinMapper.class);
	        job.setMapOutputKeyClass(IntWritable.class);
	        job.setMapOutputValueClass(Text.class);

	        //job.setReducerClass(Reduce.class);
	        job.setOutputKeyClass(IntWritable.class);
	        job.setOutputValueClass(Text.class);
	        
	        

	        
	       	job.addCacheFile(new Path("hdfs://master:9000/table/table2").toUri());
	        FileInputFormat.addInputPath(job, new Path(args[0]));
	        FileOutputFormat.setOutputPath(job, new Path(args[1]));
	        
			Path path = new Path(args [1]);
			FileSystem fileSystem = path.getFileSystem(conf);
			if (fileSystem.exists(path)) {
				fileSystem.delete((path),true);
			}
	        
	        
	        System.exit(job.waitForCompletion(true) ? 0 : 1);
	    }
	}

    
    
    
 

	



你可能感兴趣的:(MapReduce,API)