基于mapreduce的两个简单例子

Mapreudce开发:

(对路由数据进行过滤,只保留想要的几项。只有map没有reduce

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

 

public  class Map extends Configured implements Tool {

public static class Map1 extends Mapper<LongWritable,Text,NullWritable,Text>{

 enum Couter{

 LINESKIP,//计数器

 }

public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{

String line=value.toString();//读取源数据

try{

String [] linespilt=line.split(" ");

String m=linespilt[0];

String t=linespilt[1];

String mac=linespilt[4];

String str=m+" "+t+" "+" "+mac;

Text tout=new Text(str);

context.write(NullWritable.get(), tout); //key \t value

}catch(java.lang.ArrayIndexOutOfBoundsException e)

{

          context.getCounter(Couter.LINESKIP).increment(1);//出错行加1

                return;

}

}

}

public int run(String[] args)throws Exception{

Configuration conf=getConf();

Job job=new Job(conf,"lala");//作业名

FileInputFormat.addInputPath(job,new Path("hdfs://192.168.146.130:9000/user/hadoop/input/mptrys"));

FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.146.130:9000/mptryd"));

job.setMapperClass(Map1.class);//mapclass

job.setOutputFormatClass(TextOutputFormat.class);

job.setOutputKeyClass(NullWritable.class);//指定输出key

job.setOutputValueClass(Text.class);//指定输出的Value

job.waitForCompletion(true);

return job.isSuccessful()?1:0;

}

public static void main(String args[])throws Exception{

int res=ToolRunner.run(new Configuration(),new Map(), args);

System.exit(res);

}

}

 

 

 

 

倒排:

package trymr2;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.Mapper.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

 

public class test2 extends Configured implements Tool {

public static class Map1 extends Mapper{

 enum Couter{

 LINESKIP,

 }

 public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{

 String line=value.toString();

  try{

String [] linespilt=line.split(" ");

String calf=linespilt[0];

String calt=linespilt[1];

Text tcalf=new Text(calf);

Text tcalt=new Text(calt);

context.write(tcalt, tcalf);

}catch(java.lang.ArrayIndexOutOfBoundsException e)

{

                context.getCounter(Couter.LINESKIP).increment(1);

              return;

}

 }

}

 

public static class Reduce extends Reducer{

 enum Couter{

  LINESKIP,

 }

 public void reduce(Text key,Iterable value,Context

context)throws IOException,InterruptedException{

 String val;

             String out="";

 for(Text vale:value){

 val=vale.toString();

 out+=val+"|";

 }

 //Text a=new Text(out);

  context.write(key, new Text(out));

 }

}

public int run(String[] args) throws Exception{

Configuration conf=getConf();

Job job=new Job(conf,"lala1");

FileInputFormat.addInputPath(job,new Path

("hdfs://192.168.146.130:9000/user/hadoop/input/lilanmr2"));

FileOutputFormat.setOutputPath

(job,new Path("hdfs://192.168.146.130:9000/mptryd12"));

job.setMapperClass(Map1.class);

job.setReducerClass(Reduce.class);

job.setOutputFormatClass(TextOutputFormat.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

job.waitForCompletion(true);

return job.isSuccessful()?1:0;

}

public static void main(String args[])throws Exception{

int res=ToolRunner.run(new Configuration(),new test2(), args);

System.exit(res);

}

}

 

 

你可能感兴趣的:(Hadoop)