上传前需要为目录开放权限,否则客户端上传会因为没有权限而失败
$ hadoop fs -mkdir /test #创建目录
$ hadoop fs -chmod 777 /test #修改权限
$ hadoop fs -rm -r /test/jdk-8u121-docs-all #删除目录
上传下载的基础类,配置文件另写,目的是去缓存,根据每个文件大小写数据块。而不是使用系统自己的配置,能节省空间。
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.aqdog.hadoop;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
/**
*
* @author bruce
*/
public class HadoopUtils {
public static void uploadFileToHDFS(Configuration conf, File file, String dst) throws Exception {
//创建一个文件系统
FileSystem fs = FileSystem.get(conf);
Path srcPath = new Path(file.getAbsolutePath());
Path dstPath = new Path(dst);
Long start = System.currentTimeMillis();
fs.copyFromLocalFile(false, srcPath, dstPath);
System.out.println("size:" + file.length() + "\t Time:" + (System.currentTimeMillis() - start) + "\t" + file.getAbsolutePath());
fs.close();
}
/**
* 下载文件
*
* @param src
* @throws Exception
*/
public static void downLoadFileFromHDFS(Configuration conf, String src, File tarFile) throws Exception {
FileSystem fs = FileSystem.get(conf);
Path srcPath = new Path(src);
InputStream in = fs.open(srcPath);
OutputStream out = new FileOutputStream(tarFile);
try {
if (tarFile.exists() && tarFile.isFile()) {
tarFile.delete();
}
IOUtils.copyBytes(in, out, 4096, true);
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
fs.close();
}
}
/**
* 遍历指定目录(direPath)下的所有文件
*
* @param direPath
* @throws Exception
*/
public static void getDirectoryFromHdfs(Configuration conf, String direPath) throws Exception {
FileSystem fs = FileSystem.get(URI.create(direPath), conf);
FileStatus[] filelist = fs.listStatus(new Path(direPath));
for (int i = 0; i < filelist.length; i++) {
FileStatus fileStatus = filelist[i];
System.out.println("Name:" + fileStatus.getPath().getName() + "size:" + fileStatus.getLen());
}
fs.close();
}
}
下面是测试类,可以单线程上传文件和多线程上传文件。我使用的是JDK DOC目录做的上传,大约有16000多个文件,并发后,每个文件的上传时间明显增长。
我尝试过四种情况,顺序,50,500,不限制并发数。
不并发的时候每个文件上传在20毫秒左右;50个并发的时候,100毫秒;500的时候2秒左右。
不限制并发数,就是有多少个文件,起多少个线程,然后是半天没反应,有反应了就跑了几行,然后直接死了。
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.aqdog;
import com.aqdog.hadoop.HDFSUtils;
import com.aqdog.hadoop.HadoopUtils;
import com.aqdog.utils.StringUtils;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.logging.Level;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;
/**
*
* @author bruce
*/
public class TestHadoop {
static Logger log = Logger.getLogger(TestHadoop.class);
public static void main(String args[]) {
// TestHadoop th = new TestHadoop();
//th.testByThread();
//th.testOneThread();
TestHadoop.downloadFile("/test/aaaa/ideaIU-2016.3.5.exe", new File("D:/aaaa/aaaa.exe"));
}
public void testOneThread() {
TestHadoop.readFiles(new File("D:\\aaaa"));
long starTime = System.currentTimeMillis();// 开始时间
List
list = new ArrayList();
for (Entry en : fileMap.entrySet()) {
TestHadoop.writeFile(new File(en.getKey()), en.getValue());
}
}
public void testByThread() {
TestHadoop.readFiles(new File("D:\\jdk-8u121-docs-all"));
long starTime = System.currentTimeMillis();// 开始时间
List list = new ArrayList();
int threadConntLimit = 500; //限制并发数
for (Entry en : fileMap.entrySet()) {
T t = new T(en);
t.start();
list.add(t);
threadConntLimit--;
if (threadConntLimit <= 0) {
for (int i = 0; i < list.size(); i++) {
T ta = list.get(i);
if (ta.isAlive()) {
try {
ta.join();
} catch (InterruptedException ex) {
java.util.logging.Logger.getLogger(TestHadoop.class.getName()).log(Level.SEVERE, null, ex);
}
} else {
list.remove(i);
threadConntLimit++;
}
}
}
}
long time = System.currentTimeMillis() - starTime;
System.out.println("文件数:" + fileMap.size() + "总共用时:" + time);
}
public static void readFiles(File file) {
if (file.exists()) {
if (file.isDirectory()) {
for (File f : file.listFiles()) {
readFiles(f);
}
} else {
fileMap.put(file.getAbsolutePath(), file.getAbsolutePath().replaceAll("D:\\\\", "/").replaceAll("\\\\", "/"));
}
}
}
public static void downloadFile(String src, File target) {
Configuration conf = new Configuration();
conf.set("dfs.socket.timeout", "30000");
conf.set("dfs.datanode.socket.write.timeout", "30000");
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
conf.set("fs.defaultFS", "hdfs://192.168.254.130:9000");
try {
HadoopUtils.downLoadFileFromHDFS(conf, src, target);
} catch (Exception ex) {
java.util.logging.Logger.getLogger(TestHadoop.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static void writeFile(File file, String target) {
Configuration conf = new Configuration();
long i = file.length() / 1024 / 1024;
conf.set("dfs.socket.timeout", "30000");
conf.set("dfs.datanode.socket.write.timeout", "30000");
//conf.set("dfs.blocksize", ((i + 1) * 1024 * 1024) + "");
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
conf.set("fs.defaultFS", "hdfs://192.168.254.130:9000");
try {
HadoopUtils.uploadFileToHDFS(conf, file, "/test" + target);
} catch (Exception ex) {
java.util.logging.Logger.getLogger(TestHadoop.class.getName()).log(Level.SEVERE, null, ex);
}
}
class T extends Thread {
private Entry en;
public T(Entry entry) {
this.en = entry;
}
public void run() {
TestHadoop.writeFile(new File(en.getKey()), en.getValue());
}
}
private static HashMap fileMap = new HashMap();
}