hdfs适合因为存储的是大文件,从寻址等消耗来看,更加适合于流式的方式操作文件,但是,hdfs并不是不能进行随机读写,hdfs也是支持随机读写的。
主要通过FSDataInputStream类读,通过FSDataOutputStream类写。下面是两个例子
例1:从本地读,写入到hdfs
import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HdfsWr { /** * * @param uri 本地路径 * @param conf * @param uri2 hdfs路径 * @throws IOException */ public void fun(String uri,Configuration conf,String uri2) throws IOException { // TODO Auto-generated method stub FileSystem fs = FileSystem.get(URI.create(uri), conf); FileSystem fs2 = FileSystem.get(URI.create(uri2), conf); //hdfs输入流 FSDataInputStream in = null; //hdfs输出流 FSDataOutputStream out = null; byte[] buffer=new byte[20]; try{ //通过这个方法在hdfs中创建一个文件 out = fs2.create(new Path(uri2)); in = fs.open(new Path(uri)); //随机读取,从100开始,读20个 in.read(100, buffer, 0, 20); //然后写入hdfs中的文件 out.write(buffer); out.flush(); }finally{ in.close(); out.close(); } } }
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HdfsRw { /** * * @param uri hdfs路径 * @param conf * @param uri2 本地路径 * @throws IOException */ public void fun(String uri,Configuration conf,String uri2) throws IOException { FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = null; //写入本地,直接用普通的输出流 FileOutputStream out = new FileOutputStream(new File(uri2)); byte[] buffer=new byte[20]; try{ //写文件 in = fs.open(new Path(uri)); //读 in.read(100, buffer, 0, 20); //写 out.write(buffer); out.flush(); //还有一种写法,就是借助于IOUtils // in.skip(100); // IOUtils.copyBytes(in, fos, 20, 4096, false); }finally{ out.close(); in.close(); } } }