一、使用Hadoop URL读取数据
package hadoop; import java.io.InputStream; import java.net.URL; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.io.IOUtils; public class URLCat { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } public static void readHdfs(String url) throws Exception { InputStream in = null; try { in = new URL(url).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } public static void main(String[] args) throws Exception { readHdfs("hdfs://"); } }
12/09/11 14:18:59 INFO security.UserGroupInformation: JAAS Configuration already set up for Hadoop, not re-installing.
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/thirdparty/guava/common/collect/LinkedListMultimap
at org.apache.hadoop.hdfs.SocketCache.
at org.apache.hadoop.hdfs.DFSClient.
运行main方法,输出:hello world bye world 和hdfs中存储的文件信息是保持一致的:
二、使用FileSystem API 读取数据
package hadoop; import java.io.IOException; import java.io.InputStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; public class FileSystemCat { public static void readHdfs(String url) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(url), conf); InputStream in = null; try { in = fs.open(new Path(url)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } public static void main(String[] args) throws IOException { readHdfs("hdfs://"); } }
bye 2
hadoop 2
hello 2
world 2
3.1 写数据 public boolean mkdirs(Path f) throws IOException 会按照客户端请求创建未存在的父目录
package hadoop; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Progressable; public class FileCopyWithProgress { public static void fileCopy(String localFile, String hdfsFile) throws IOException{ InputStream in = new BufferedInputStream(new FileInputStream(localFile)); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(hdfsFile),conf); OutputStream out = fs.create(new Path(hdfsFile),new Progressable(){ public void progress(){ System.out.println("*"); } }); IOUtils.copyBytes(in, out, 4096,true); } public static void main(String[] args) throws IOException { fileCopy("D://heat2.txt", "hdfs://"); } }
Exception in thread "main" org.apache.hadoop.security.AccessControlException: org.apache.hadoop.security.AccessControlException: Permission denied: user=libininfo, access=WRITE, inode="/user/hadoopuser":hadoopuser:supergroup:drwxr-xr-x
解决方法:在hdfs-site.xml 中取消权限校验,即加入以下配置:
到服务器上修改hadoop的配置文件:conf/hdfs-core.xml, 找到 dfs.permissions 的配置项 , 将value值改为 false
Exception in thread "main" org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot create file/user/hadoopuser/output20120911. Name node is in safe mode.
The reported blocks 6 has reached the threshold 0.9990 of total blocks 6. Safe mode will be turned off automatically in 5 seconds.
现在就清楚了,那现在要解决这个问题,我想让Hadoop不处在safe mode 模式下,能不能不用等,直接解决呢?
bin/hadoop dfsadmin -safemode leave
3.2 文件系统查询 列出目录文件信息
package hadoop; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; public class ListStatus { public static void readStatus(String url) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(url), conf); Path[] paths = new Path[1]; paths[0] = new Path(url); FileStatus[] status = fs.listStatus(paths); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path p : listedPaths) { System.out.println(p); } } public static void main(String[] args) throws IOException { readStatus("hdfs://"); } }