Hadoop 上传小文件 合成sequencefile 记录

    在Hadoop中,支持对二进制文件的处理,而sequencefile的方法,便是其中的重点。

    以下为实践中实现的经验:(本次记录上传文件。)

package test;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;

public class SequenceFileWriteDemo {
	private static String[] Data={
		"one,two show one",
		"three,four show two",
		"five,six show three",
		"seven,eight show four",
		"nine,ten show five"
	};
	public void writeText() throws IOException{
		String uri="hdfs://192.168.50.28:8020/user/root/jyl/testImageSequenceFile";
		Configuration con=new Configuration();
		FileSystem fs=FileSystem.get(URI.create(uri), con);
		Path path=new Path("uri");
		IntWritable key=new IntWritable();
		Text value=new Text();
		SequenceFile.Writer writer=null;
		writer=SequenceFile.createWriter(fs, con, path, key.getClass(), value.getClass());
		for(int i=0;i<100;i++){
			key.set(100-i);
			value.set(Data[i%Data.length]);
			System.out.printf("[%s]\t%s\t%s\n",writer.getLength(),key,value);
			writer.append(key, value);
		}
		IOUtils.closeStream(writer);
	}
	
	public void writeImage() throws IOException{
		String uri="hdfs://192.168.50.28:8020/user/root/jyl/testByteImageSequenceFile";
		Configuration con=new Configuration();
		FileSystem fs=FileSystem.get(URI.create(uri), con);
		Path path=new Path(uri);
		BytesWritable key=new BytesWritable();
		
		BytesWritable value=new BytesWritable();
		
		File file1=new File("/mnt/disk1/yl/images/zhouzhou.jpg");
		File file2=new File("/mnt/disk1/yl/images/gouhuo.jpg");
		
		InputStream in1=new FileInputStream(file1);
		InputStream in2=new FileInputStream(file2);
		
		byte[] byte1=new byte[(int) file1.length()];
		byte[] byte2=new byte[(int) file2.length()];
		
		
		in1.read(byte1);
		in2.read(byte2);
		
		SequenceFile.Writer writer=null;
		writer=SequenceFile.createWriter(fs, con, path, BytesWritable.class,value.getClass());
		
		byte[] b1=new byte[1];
		b1[0]=1;
		BytesWritable bw1 = new BytesWritable(b1);
		
		byte[] b2=new byte[1];
		b2[0]=2;
		BytesWritable bw2 = new BytesWritable(b2);
		
		
		value.set(byte1, 0, byte1.length);
		writer.append(bw1,value);
		value.set(byte2, 0, byte2.length);
		writer.append(bw2,value);
		
		IOUtils.closeStream(writer);
	}
	
	public static void main(String[] args) throws IOException {
		SequenceFileWriteDemo demo=new SequenceFileWriteDemo();
		demo.writeImage();
//		demo.writeText();
	}
}



你可能感兴趣的:(Hadoop 上传小文件 合成sequencefile 记录)