Hadoop2.5.2 map reduce 多目录自定义文件名输出

  mos可以和content一起用

package jyw.test;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.fs.FileSystem;

 
public class WordCountMultiDir {
    
    public static class MapClass 
                 extends Mapper {
        private MultipleOutputs mos;
       
     
        @Override
        protected void setup(Context context) 
                throws IOException, InterruptedException {
            super.setup(context);
            mos = new MultipleOutputs(context);
            
        }

        @Override
        protected void map(LongWritable key, 
                           Text value, 
                           Context context) 
                throws IOException, InterruptedException {

        	 StringTokenizer itr = new StringTokenizer(value.toString()); 
             while (itr.hasMoreTokens()) { 
            //   word.set(); 
             //  context.write(word, one); 
            	 Text wvalue =  new Text(itr.nextToken());
            	 //方式1
            	 mos.write(NullWritable.get(), wvalue, 
                           generateFileName(wvalue));
 
            	 
             } 
        	
        }
       
        private String generateFileName(Text value) {
            char c = value.toString().toLowerCase().charAt(0);   
            String dirname ;
            if (c >= 'a' && c <= 'g') {   
                dirname="ag";   
            } else{
            	dirname="hz";
            }  
        	 
            return "hdfs://192.168.0.42:9000/user/jiayongwei/mul/"+ dirname + "/log";
        }

        @Override
        protected void cleanup(Context context) 
                throws IOException, InterruptedException {
            super.cleanup(context);
            mos.close();
        }
    }
    public static void deleteFile(String file) throws IOException {
        Configuration conf = new Configuration();
       // conf.addResource(new Path("/home/jiayongwei/hadoop/hadoop-0.20.0/conf/core-site.xml"));
 
        FileSystem fileSystem = FileSystem.get(conf);
 
        Path path = new Path(file);
        if (!fileSystem.exists(path)) {
            System.out.println("File " + file + " does not exists");
            return;
        }
 
        fileSystem.delete(new Path(file), true);
 
        fileSystem.close();
    }
    public static void main(String[] args) 
            throws IOException, ClassNotFoundException, 
            InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "MulOutput");
        String[] remainingArgs = 
                new GenericOptionsParser(conf, args)
                        .getRemainingArgs();

        if (remainingArgs.length != 1) {
            System.err.println("argument Error!");
            System.exit(1);
        }
        Path in = new Path("hdfs://192.168.0.42:9000/user/jiayongwei/input/");
        Path out = new Path(remainingArgs[0]);
        deleteFile("/user/jiayongwei/mul/");
        deleteFile(remainingArgs[0]);
        
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJarByClass(WordCountMultiDir.class);
        job.setMapperClass(MapClass.class);
        job.setInputFormatClass(TextInputFormat.class);
        
        
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(0);
 
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

 

你可能感兴趣的:(hadoop)