hadoop小文件操作之SequenceFile

存储文件:

import java.io.BufferedInputStream;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.net.URI;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import org.apache.hadoop.io.SequenceFile;

import org.apache.hadoop.io.Text;

 

public class SequenceFileWrite {

public static void main(String[] args) throws IOException {

String src = "E:\\test\\spring3_MVC.docx";

InputStream in = new BufferedInputStream(new FileInputStream(src));

String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";

   Configuration conf = new Configuration();

   FileSystem fs = FileSystem.get(URI.create(uri), conf);

   Path path = new Path(uri);

   Text key = new Text();   

   Text value = new Text();

   SequenceFile.Writer writer = null;    

   try {

     //返回一个SequenceFile.Writer实例 需要数据流和path对象 将数据写入了path对象

     writer = SequenceFile.createWriter(fs, conf, path,key.getClass(), value.getClass());  

     int len = 0;

     byte[] buff = new byte[1024];

     key.set("spring.docx");

     while ((len = in.read(buff))!= -1) {

    value.set(buff,0,len);

writer.append(key, value);//将每条记录追加到SequenceFile.Writer实例的末尾   

    value.clear();

 }

   } finally {

     IOUtils.closeStream(writer);

     IOUtils.closeStream(in);

   }

 }

}

 

读取文件:

   import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStream;

import java.net.URI;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import org.apache.hadoop.io.SequenceFile;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.util.ReflectionUtils;

 

 

public class SequenceFileReader {

public static void main(String[] args) throws IOException {  

   String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";  

   Configuration conf = new Configuration();  

   FileSystem fs = FileSystem.get(URI.create(uri), conf);  

   Path path = new Path(uri);    

   SequenceFile.Reader reader = null;  

   String dst = "e:\\test\\spring.docx";    

   OutputStream out = null;

   try {  

     reader = new SequenceFile.Reader(fs, path, conf);

     //返回 SequenceFile.Reader 对象       getKeyClass()获得Sequence中使用的类型  

     Writable key = (Writable)  ReflectionUtils.newInstance(reader.getKeyClass(), conf);

     out =new  FileOutputStream(dst);

     Text  value = new Text();

     while (reader.next(key, value)) { //next()方法迭代读取记录 直到读完返回false  

    System.out.println(key);

    out.write(value.getBytes(),0,value.getLength());//这个长度一定要添加,否则不兼容office2007

    value.clear();  //记着清除一下,不然可能会出现多余的输出     

     }  

     out.flush();

   } finally {  

     IOUtils.closeStream(reader);  

     IOUtils.closeStream(out);

   }  

 } 

}

你可能感兴趣的:(hadoop,sequenceFile,hadoop 小文件)