hadoop图像处理接口hipi

了解详细的资料可以登录弗吉尼亚大学网站(http://hipi.cs.virginia.edu/)

下载库:

git clone [email protected]:uvagfx/hipi.git

下载完源代码后,需要编辑源代码中build.xml,此文件就在hipi的源代码的根文件下。修改hadoop的安装路径以及hadoop的安装版本。比如下面是针对hadoop2.6版本所做的修改。

  ...
  
  
  ...
使用ant命令进行编译:
ant
...
hipi:
    [javac] Compiling 30 source files to /users/horton/hipi/lib
      [jar] Building jar: /users/horton/hipi/lib/hipi-2.0.jar
     [echo] Hipi library built.
compile:
    [javac] Compiling 1 source file to /users/horton/hipi/bin
      [jar] Building jar: /users/horton/hipi/examples/covariance.jar
     [echo] Covariance built.
all:
BUILD SUCCESSFUL
Total time: 3 seconds

生成待处理图像数据:

hadoop jar tool/hibimport.jar ~/SampleImages sampleimages.hib
** added: 1.jpg
** added: 2.jpg
** added: 3.jpg
Created: sampleimages.hib and sampleimages.hib.dat
统计像素平均值程序如下,采用MapReduce编程模式:

import hipi.image.FloatImage;
import hipi.image.ImageHeader;
import hipi.imagebundle.mapreduce.ImageBundleInputFormat;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FirstProgram extends Configured implements Tool {
  
  public static class FirstProgramMapper extends Mapper {
    public void map(ImageHeader key, FloatImage value, Context context) 
      throws IOException, InterruptedException {
     if (value != null && value.getWidth() > 1 && value.getHeight() > 1 && value.getBands() == 3) {

        int w = value.getWidth();
        int h = value.getHeight();

        float[] valData = value.getData();

        float[] avgData = {0,0,0};

        for (int j = 0; j < h; j++) {
          for (int i = 0; i < w; i++) {
            avgData[0] += valData[(j*w+i)*3+0]; // R
            avgData[1] += valData[(j*w+i)*3+1]; // G
            avgData[2] += valData[(j*w+i)*3+2]; // B
          }
        }

        FloatImage avg = new FloatImage(1, 1, 3, avgData);
        avg.scale(1.0f/(float)(w*h));
        context.write(new IntWritable(1), avg);
      }  
    }
  }
  
  public static class FirstProgramReducer extends Reducer {
    public void reduce(IntWritable key, Iterable values, Context context) 
      throws IOException, InterruptedException {
       FloatImage avg = new FloatImage(1, 1, 3);
      int total = 0;
      for (FloatImage val : values) {
        avg.add(val);
        total++;
      }
      if (total > 0) {
        avg.scale(1.0f / total);
        float[] avgData = avg.getData();
        String result = String.format("Average pixel value: %f %f %f", avgData[0], avgData[1], avgData[2]);
        context.write(key, new Text(result));
      }
    }
  }
  
  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog  ");
      System.exit(0);
    }
    
    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    job.setInputFormatClass(ImageBundleInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(FirstProgram.class);
    job.setMapperClass(FirstProgramMapper.class);
    job.setReducerClass(FirstProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    
    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);
    
    // Return success or failure
    return success ? 0 : 1;
  }
  
  public static void main(String[] args) throws Exception {
    ToolRunner.run(new FirstProgram(), args);
    System.exit(0);
  }
  
}
运行结束:

	File System Counters
		FILE: Number of bytes read=126
		FILE: Number of bytes written=212787
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=1479479
		HDFS: Number of bytes written=50
		HDFS: Number of read operations=6
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=2
	Job Counters 
		Launched map tasks=1
		Launched reduce tasks=1
		Data-local map tasks=1
		Total time spent by all maps in occupied slots (ms)=3146
		Total time spent by all reduces in occupied slots (ms)=2988
		Total time spent by all map tasks (ms)=3146
		Total time spent by all reduce tasks (ms)=2988
		Total vcore-seconds taken by all map tasks=3146
		Total vcore-seconds taken by all reduce tasks=2988
		Total megabyte-seconds taken by all map tasks=3221504
		Total megabyte-seconds taken by all reduce tasks=3059712
	Map-Reduce Framework
		Map input records=4
		Map output records=4
		Map output bytes=112
		Map output materialized bytes=126
		Input split bytes=134
		Combine input records=0
		Combine output records=0
		Reduce input groups=1
		Reduce shuffle bytes=126
		Reduce input records=4
		Reduce output records=1
		Spilled Records=8
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=119
		CPU time spent (ms)=3730
		Physical memory (bytes) snapshot=1027117056
		Virtual memory (bytes) snapshot=5848113152
		Total committed heap usage (bytes)=2282881024
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=1479345
	File Output Format Counters 
		Bytes Written=50
为了只管地查看结果,需要将运行的结果下载到本地,关于hadoop上传下载文件操作查看前面的 博客
Average pixel value: 0.420624 0.404933 0.380449




你可能感兴趣的:(hadoop学习)