Java 实现HDFS文件上传

HDFS上传文件大致分为三种:

1)直接调用API

2)使用webhdfs

3)httpfs

以下对前两种进行简要说明

1)直接调用API(直接上代码)

public void hdfsUpload(String srcPath) throws IOException,URISyntaxException {
    Configuration conf = new Configuration();
    FileSystem fs = null;
    conf.set("hadoop.security.authentication", "kerberos");
    String userName = "hochoy";//   kerberos 认证的username,可配置在配置文件中        Connection2hbase.getHbaseConfig().get("kerberos.username");
    String keytab = "/usr/lib/hochoy.keytab";//     kerberos 认证的keytab,配置在配置文件中,存放于具体目录     Connection2hbase.getHbaseConfig().get("kerberos.keytab");
    URI urlHdfs = new URI("hdfs://nameservice1:8020");
    String url17monipdb = "/user/hochoy";

    UserGroupInformation.setConfiguration(conf);
    UserGroupInformation.loginUserFromKeytab(userName,keytab);   //kerberos 认证
    fs = FileSystem.get(urlHdfs,conf);
    if (fs.exists(new Path(url17monipdb + "/17monipdb.dat"))){
        //rename 及linux中的cp ,文件拷贝
        fs.rename(new Path(url17monipdb + "/17monipdb.dat"),new Path(url17monipdb + "/17monipdb.dat"+".bak"+new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date())));
    }
    //调用API上传文件
    fs.copyFromLocalFile(false,true,new Path(srcPath),new Path(url17monipdb+"/17monipdb.dat"));
    fs.close();
}

2)使用webhdfs

linux语句

http://ip:14000/webhdfs/v1/user/hochoy/17monipdb.dat?user.name=hochoy&op=GETFILESTATUS



获取文件状态:
curl -i "http://ip:14000/webhdfs/v1/user/hochoy/readme.txt?user.name=hochoy&op=GETFILESTATUS"

curl -i "http://192.168.1.32:50070/webhdfs/v1/user/hochoy/readme.txt?user.name=hochoy&op=GETFILESTATUS"


===========================


获取文件信息: curl -i -L "http://ip:14000/webhdfs/v1/user/hochoy/readme.txt?user.name=hochoy&op=OPEN"

创建目录: curl -i -X PUT  "http://ip:14000/webhdfs/v1/user/hochoy/hochoy?user.name=hochoy&op=MKDIRS"
====================================

创建文件:
curl -i -X PUT "http://ip:50070/webhdfs/v1/user/hochoy/hochoy/work.sh?user.name=hochoy&op=CREATE"
curl -i -X PUT "http://192.168.1.32:50070/webhdfs/v1/user/hochoy/hochoy/work.sh?user.name=hochoy&op=CREATE"


发送数据到指定文件
curl -i -X PUT -T work.sh  "http://tw-master:50075/webhdfs/v1/user/hochoy/hochoy/work.sh?user.name=hochoy&op=CREATE&namenoderpcaddress=nameservice1&overwrite=true"
curl -i -X PUT -T work.sh  "http://tw-slave01:50075/webhdfs/v1/user/hochoy/hochoy/work.sh?user.name=hochoy&op=CREATE&namenoderpcaddress=nameservice1&overwrite=true"
 
 curl -i -X PUT "http://ip:50070/webhdfs/v1/user/hochoy/readme.txt?user.name=hochoy&op=CREATE"
curl -i -X PUT -T work.sh  http://tw-master:50075/webhdfs/v1/user/hochoy/readme.txt?op=CREATE&user.name=hochoy&namenoderpcaddress=nameservice1&overwrite=true
 
查看文件内容
curl -i -L "http://ip:14000/webhdfs/v1/user/hochoy/readme.txt?user.name=hochoy&op=OPEN"

代码



/**
 * Created by hochoy on 2018/5/7.
 */

import net.sf.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.*;

public class HDFSOperating {
    private final Logger logger = LoggerFactory.getLogger(getClass());

    /**
     * @param webhdfs
     * @param stream       the InputStream of file to upload
     * @param hdfsFilePath
     * @param op
     * @param parameters
     * @param method
     * @throws IOException
     */
    public void uploadFile(String webhdfs, InputStream stream, String hdfsFilePath, String op, Map, String> parameters, String method) throws IOException {
        HttpURLConnection con;
        try {
            con = getConnection(webhdfs, hdfsFilePath, op, parameters, method);

            byte[] bytes = new byte[1024];
            int rc = 0;
            while ((rc = stream.read(bytes, 0, bytes.length)) > 0)
                con.getOutputStream().write(bytes, 0, rc);
            con.getInputStream();
            con.disconnect();
        } catch (IOException e) {
            logger.info(e.getMessage());
            e.printStackTrace();
        }
        stream.close();
    }


    /**
     * @param webhdfs
     * @param hdfsFilePath
     * @param op
     * @param parameters
     * @param method
     * @throws IOException
     */
    public Map, Object> getFileStatus(String [] webhdfs, String hdfsFilePath, String op, Map, String> parameters, String method) {
        Map, Object> fileStatus = new HashMap, Object>();
        HttpURLConnection connection  =null;

        for (String url:webhdfs){
            try {
                HttpURLConnection conn = getConnection(url,hdfsFilePath,op,parameters,method);
                if (conn.getInputStream() != null){
                    connection = conn;
                    break;
                }
            }catch (IOException e){
                logger.error("");
            }
        }


        StringBuffer sb = new StringBuffer();
        try {
            InputStream is = connection.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
            String line = null;
            while ((line = reader.readLine()) != null) {
                sb.append(line);
            }
            reader.close();
            System.out.println(sb.toString());
            JSONObject root = JSONObject.fromObject(sb.toString());
            JSONObject status = root.getJSONObject("FileStatus");
            Iterator keys = status.keys();

            while (keys.hasNext()) {
                String key = keys.next().toString();
                String value = status.get(key).toString();
                fileStatus.put(key, value);
            }
//            is.close();
        }catch (IOException e){
            logger.error(Constants.EXCEPTION_WAS_CAUGHT,e);
        }catch (NullPointerException e){
            logger.error(Constants.EXCEPTION_WAS_CAUGHT,e);
        }

        return fileStatus;
    }

    /**
     * @param strurl     webhdfs like http://ip:port/webhdfs/v1 ,port usually 50070 or 14000
     * @param path       hdfs path + hdfs filename  eg:/user/razor/readme.txt
     * @param op         the operation for hdfsFile eg:GETFILESTATUS,OPEN,MKDIRS,CREATE etc.
     * @param parameters other parameter if you need
     * @param method     method eg: GET POST PUT etc.
     * @return
     */
    public HttpURLConnection getConnection(String strurl, String path, String op, Map, String> parameters, String method) {
        URL url = null;
        HttpURLConnection con = null;
        StringBuffer sb = new StringBuffer();
        try {
            sb.append(strurl);
            sb.append(path);
            sb.append("?op=");
            sb.append(op);
            if (parameters != null) {
                for (String key : parameters.keySet())
                    sb.append("&").append(key + "=" + parameters.get(key));
            }
            url = new URL(sb.toString());
            con = (HttpURLConnection) url.openConnection();
            con.setRequestMethod(method);
            con.setRequestProperty("accept", "*/*");
            con.setRequestProperty("connection", "Keep-Alive");
            String s = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)";
            String s1 = "ozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)";
            con.setRequestProperty("User-Agent", s1);
//            con.setRequestProperty("Accept-Encoding", "gzip");
//            con.setDoInput(true);
            con.setDoOutput(true);
            con.setUseCaches(false);
        } catch (IOException e) {
            logger.error(Constants.EXCEPTION_WAS_CAUGHT, e);
        }
        return con;
    }
}

3)httpfs后续更新

你可能感兴趣的:(java,Hadoop)