import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
public class URLCat{
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception {
InputStream in = null;
try {
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
}
代码说明:这里采用的方法是通过FsUrlStreamHandlerFactory实例调用java.net.URL的setURLStreamHandlerFactory方法。每个java虚拟机只能调用一次这个方法,因此通常在静态中调用。
运行步骤:
1. 用HelloWorld测试环境运行(保证环境可用)
2. 将
export CLASSPATH=.:/var/local/hadoop/hadoop-2.6.0/share/hadoop/common/lib/*:/var/local/hadoop/hadoop-2.6.0/share/hadoop/common/hadoop-common-2.6.0.jar
添加到/etc/profile中,source /etc/profile 使其生效
3. 运行命令:
hadoop jar jar文件名 类名 路径名
#编译java文件,生成class文件
javac URLCat.java
jar cvf URLCat.jar URLCat.class
hadoop jar 类名.jar 类名 hdfs://192.168.253.131:9000/文件路径
直接使用FileSystem以标准输出格式显示Hadoop文件系统中的文件
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class FileSystemCat {
public static void main(String[] args)throws Exception{
//将输入的第一个参数作为URI
String uri = args[0];
//Configuration对象封装了客户端或者服务器的配置
Configuration conf= new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri),conf);
//InputStream是所有字节输入流的超类
InputStream in = null;
try{
//通过FileSystems的.open方法获取输入流
in = fs.open(new Path(uri));
IOUtils.copyBytes(in,System.out,4096,false);
}finally{
IOUtils.closeStream(in);
}
}
}
程序运行情况与上面步骤类似
这个类继承了java.io.DataInputStream接口的一个特殊类,并支持随机访问,由此可以从流的任意位置读取数据。
使用seek()方法,将Hadoop文件写入标准你输出上两次
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class FileSystemDoubleCat {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
String uri = args[0];
Configuration conf = new Configuration();
//获取相应的FileSystem对象
FileSystem fs = FileSystem.get(URI.create(uri),conf);
FSDataInputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in,System.out,4096,false);
//FSDataInputStream继承了Seekable对象,可以移动到文件的任意位置
in.seek(0);
IOUtils.copyBytes(in,System.out,4096,false);
}
finally{
IOUtils.closeStream(in);
}
}
}
程序运行情况与上面步骤类似
将本地文件复制到Hadoop文件系统
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
public class FileCopyWithProgress {
public static void main(String[] args) throws Exception{
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst),conf);
OutputStream out = fs.create(new Path(dst),new Progressable(){
public void progress(){
System.out.print(".");
}
});
IOUtils.copyBytes(in,out,4096,true);
}
}
运行过程:
javac FileCopyWithProgress.java
//将编译的两个文件都进行打包
jar cvf FileCopyWithProgress.jar FileCopyWithProgress.class FileCopyWithProgress\$1.class
hadoop jar FileCopyWithProgress.jar FileCopyWithProgress /etc/hosts hdfs://192.168.253.131:9000/hosts
hdfs dfs -cat /hosts