Java调用Hadoop API

hadoop提供的Java API主要用来对文件操作。最常见的也就是读、写、遍历。

总的来说,调用API的第一步获取FileSystem,可以用FileSystem fs = FileSystem.get()来获取。接下来如果是写(上传)文件,则用fs.create(),如果是读(下载)文件,则用fs.get()。然后再用FSDataInputStream、FSDataOutputStream 具体代码如下:

package hadoop;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

public class HadoopFileSystem 
{
    public static final String HDFS_PATH = "hdfs://master:9000";
    public static void main(String[] args)
    {
        //readFile();
        //writeFile();
        listFile("/home/hadoop");
    }
     
    static void listFile(String listPath)
    {
        try
        {
            FileSystem fs = FileSystem.get(new URI(HDFS_PATH), new Configuration());
            FileStatus[] files = fs.listStatus(new Path(listPath));
            for (FileStatus f : files)
            {
                if (f.isDir())
                {
                    System.out.println("d " + f.getPath());
                    listFile(f.getPath().toString());
                }
                else
                {
                    System.out.println("- " + f.getPath());
                }
            }
        }
        catch (URISyntaxException e)
        {
            e.printStackTrace();
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
    }
    
    static void writeFile()
    {
        try
        {
            String path = "/home/hadoop/data/writefile/write.txt";
            FileSystem fs = FileSystem.get(new URI(HDFS_PATH), new Configuration());
            
            if ( fs.exists(new Path(path)) )
            {
                System.out.println("file already exist, delete it first...");
                fs.deleteOnExit(new Path(path));
            }
           
            FSDataOutputStream out = fs.create(new Path(path));
            InputStream in = new FileInputStream("C:/data.txt");
            IOUtils.copyBytes(in, out, 1024, true);
            
            System.out.println("**********write finished*******");
            
            FSDataInputStream i = fs.open(new Path(path));
            IOUtils.copyBytes(i, System.out, 1024, true);
            
        }
        catch (URISyntaxException e)
        {
            e.printStackTrace();
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
    }
    
    static void readFile() 
    {   
        try
        {
            String path = HDFS_PATH + "/home/hadoop/data/input/core-site.xml";
            FileSystem fs = FileSystem.get(URI.create(HDFS_PATH), new Configuration());
            FSDataInputStream in = fs.open(new Path(path));
            IOUtils.copyBytes(in, System.out, 1024, true);
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
    }
}

其中writeFile、readFile、listFile分别代表了写、读、列举。一般操作格式就是这个样子吧,先记下来再说。。。


版权声明:本文为博主原创文章,未经博主允许不得转载。

你可能感兴趣的:(java,hadoop,api)