代码如下:
package net.maichuang.log;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.util.ReflectionUtils;
public class PutMergeCompression {
//本地多压缩文件上传hdfs
public static void putMergeFunc(String LocalDir, String fsFile) throws IOException
{
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
FileSystem fs = FileSystem.get(conf); //fs是HDFS文件系统
FileSystem local = FileSystem.getLocal(conf); //本地文件系统
Path localDir = new Path(LocalDir);
Path HDFSFile = new Path(fsFile);
FileStatus[] status = local.listStatus(localDir); //得到输入目录
FSDataOutputStream out = fs.create(HDFSFile); //在HDFS上创建输出文件
for(FileStatus st: status)
{
Path temp = st.getPath();
CompressionCodec codec = factory.getCodec(temp);
InputStream in = codec.createInputStream(local.open(temp));
IOUtils.copyBytes(in, out, 4096, false); //读取in流中的内容放入out
in.close(); //完成后,关闭当前文件输入流
}
out.close();
}
//hdfs上多文件合并压缩到本地
public static void getMergeFunc(String fsFile,String LocalDir) throws IOException
{
Configuration conf = new Configuration();
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
FileSystem fs = FileSystem.get(conf); //fs是HDFS文件系统
Path HDFSFile = new Path(fsFile);
FileStatus[] status = fs.listStatus(HDFSFile); //得到输入目录
FileOutputStream outFile = new FileOutputStream(LocalDir);
CompressionOutputStream outGzip = codec.createOutputStream(outFile);
for(FileStatus st: status)
{
Path temp = st.getPath();
FSDataInputStream in = fs.open(temp);
IOUtils.copyBytes(in, outGzip, 4096, false); //读取in流中的内容放入out
in.close(); //完成后,关闭当前文件输入流
}
outGzip.close();
outFile.close();
}
//从hdfs 上拷贝并压缩到本地
public static void GzipFile() throws IOException {
String inputFile = “hdfs://master:9000/user/xie/input/2013-11-01/”;
String outputFile = “/home/xie/data/2013-11-01/”;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf); //fs是HDFS文件系统
Path HDFSFile = new Path(inputFile);
FileStatus[] status = fs.listStatus(HDFSFile); //得到输入目录
File dir = new File(outputFile);
if(!dir.exists())
dir.mkdirs();
for(FileStatus st: status)
{
String[] path = st.getPath().toString().split(“/”);
String filename = path[path.length -1];
getMergeFunc(inputFile+filename,outputFile+filename+”.gz”);
}
}
//从本地解压并拷贝到hdfs上
public static void UnzipFile() throws IOException {
String local = “/home/xie/data/2013-11-01”;
String fs = “hdfs://master:9000/user/xie/test/2013-11-01”;
putMergeFunc(local,fs);
}
public static void main(String [] args) throws IOException
{
UnzipFile();
// GzipFile();
// String l = “/home/xie/data/xx”;
// String f = “hdfs://master:9000/user/xie/test1/PutMergeTest”;
// putMergeFunc(l,f);
// String x = “/home/xie/data/xx/wo.gz”;
// String y = “hdfs://master:9000/user/xie/test”;
// getMergeFunc(y,x);
}
}