hdfs 多个文件合并_hadoop 本地多压缩文件上传hdfs hdfs上多文件合并压缩到本地

代码如下:

package net.maichuang.log;

import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.io.compress.CompressionCodecFactory;

import org.apache.hadoop.io.compress.CompressionOutputStream;

import org.apache.hadoop.io.compress.GzipCodec;

import org.apache.hadoop.util.ReflectionUtils;

public class PutMergeCompression {

//本地多压缩文件上传hdfs

public static void putMergeFunc(String LocalDir, String fsFile) throws IOException

{

Configuration  conf = new Configuration();

CompressionCodecFactory factory = new CompressionCodecFactory(conf);

FileSystem fs = FileSystem.get(conf);       //fs是HDFS文件系统

FileSystem local = FileSystem.getLocal(conf);   //本地文件系统

Path localDir = new Path(LocalDir);

Path HDFSFile = new Path(fsFile);

FileStatus[] status =  local.listStatus(localDir);  //得到输入目录

FSDataOutputStream out = fs.create(HDFSFile);       //在HDFS上创建输出文件

for(FileStatus st: status)

{

Path temp = st.getPath();

CompressionCodec codec = factory.getCodec(temp);

InputStream in = codec.createInputStream(local.open(temp));

IOUtils.copyBytes(in, out, 4096, false);    //读取in流中的内容放入out

in.close(); //完成后,关闭当前文件输入流

}

out.close();

}

//hdfs上多文件合并压缩到本地

public static void getMergeFunc(String fsFile,String LocalDir) throws IOException

{

Configuration  conf = new Configuration();

CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);

FileSystem fs = FileSystem.get(conf);       //fs是HDFS文件系统

Path HDFSFile = new Path(fsFile);

FileStatus[] status =  fs.listStatus(HDFSFile); //得到输入目录

FileOutputStream outFile = new FileOutputStream(LocalDir);

CompressionOutputStream outGzip = codec.createOutputStream(outFile);

for(FileStatus st: status)

{

Path temp = st.getPath();

FSDataInputStream in = fs.open(temp);

IOUtils.copyBytes(in, outGzip, 4096, false);    //读取in流中的内容放入out

in.close(); //完成后,关闭当前文件输入流

}

outGzip.close();

outFile.close();

}

//从hdfs 上拷贝并压缩到本地

public static void GzipFile() throws IOException {

String inputFile = “hdfs://master:9000/user/xie/input/2013-11-01/”;

String outputFile = “/home/xie/data/2013-11-01/”;

Configuration  conf = new Configuration();

FileSystem fs = FileSystem.get(conf);       //fs是HDFS文件系统

Path HDFSFile = new Path(inputFile);

FileStatus[] status =  fs.listStatus(HDFSFile); //得到输入目录

File dir = new File(outputFile);

if(!dir.exists())

dir.mkdirs();

for(FileStatus st: status)

{

String[] path = st.getPath().toString().split(“/”);

String filename = path[path.length -1];

getMergeFunc(inputFile+filename,outputFile+filename+”.gz”);

}

}

//从本地解压并拷贝到hdfs上

public static void UnzipFile() throws IOException {

String local = “/home/xie/data/2013-11-01”;

String fs = “hdfs://master:9000/user/xie/test/2013-11-01”;

putMergeFunc(local,fs);

}

public static void main(String [] args) throws IOException

{

UnzipFile();

//        GzipFile();

//        String l = “/home/xie/data/xx”;

//        String f = “hdfs://master:9000/user/xie/test1/PutMergeTest”;

//        putMergeFunc(l,f);

//        String x = “/home/xie/data/xx/wo.gz”;

//        String y = “hdfs://master:9000/user/xie/test”;

//        getMergeFunc(y,x);

}

}

你可能感兴趣的:(hdfs,多个文件合并)