涉及的软件或文件如下:
1.idea maven
2.hadoop.dll,winutils.exe ,winutils.pdb 适配windows64位操作系统(重要)而且这三个是适合hadoop2.7版本的也适合于2.6版本 下载链接:点击打开链接
3.关闭hdfs的权限
在这个etc/hadoop下的hdfs-site.xml添加 (记得先关闭集群再重启)
dfs.permissions
false
设置没有权限,不过我们在正式的服务器上不能这样设置!!!!!!!!!!!!!!!
4.开始调试
maven 如下:
4.0.0
com.baidu.hadoop
hadoop-train
1.0
UTF-8
2.6.0-cdh5.7.0
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
org.apache.hadoop
hadoop-client
${hadoop.version}
org.apache.hadoop
hadoop-common
${hadoop.version}
org.apache.hadoop
hadoop-hdfs
${hadoop.version}
junit
junit
4.10
test
调试模板:
package hadoop.mapreduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* 使用mr 开发wordcount
* Created by hello on 2018-05-15.
*/
public class WordCountApp {
/**
* Map 读取输入的文件
*/
public static class MyMapper extends Mapper{
LongWritable one = new LongWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//接收到每一行数据
String line = value.toString();
//按照指定分隔符拆分
String[] words = line.split("\t");
for(String s : words){
//通过上下文对map输出
context.write(new Text(s),one);
}
}
}
/**
* Reducer归并操作
*/
public static class MyReducer extends Reducer{
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
long sum = 0 ;
for(LongWritable value : values){
sum += value.get();
}
//最终输出总数
context.write(key,new LongWritable(sum));
}
}
/**
* driver:封装mr作业的所有信息
*/
public static void main(String[] args) throws Exception{
String HDFS_PATH ="hdfs://192.168.91.127:8020";
//创建configuration
Configuration configuration = new Configuration();
//远程调试hadoop
System.setProperty("hadoop.home.dir","D:\\KDR\\hadoop-2.6.0-cdh5.7.0");
configuration.set("fs.defaultFS",HDFS_PATH);
configuration.set("yarn.resourcemanager.hostname",HDFS_PATH);
//创建job
Job job = Job.getInstance(configuration,"worconut");
//设置job处理类
job.setJarByClass(WordCountApp.class);
//设置作业处理的输入路径
FileInputFormat.setInputPaths(job,new Path("/test/a.txt"));
//设置map相关的参数
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置reduce相关参数
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置作业处理的输出路径
FileOutputFormat.setOutputPath(job,new Path("/out/"));
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
junit单元测试:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;
/**
* Created by hello on 2018-05-11.
*/
public class HDFSApp {
public static final String HDFS_PATH ="hdfs://192.168.91.127:8020";
FileSystem fileSystem = null;
Configuration configuration = null;
@Test
public void mkdir() throws Exception{
fileSystem.mkdirs(new Path("/hdfsapi/test"));
}
/**
* 创建文件
* @throws Exception
*/
@Test
public void create() throws Exception{
FSDataOutputStream fs=fileSystem.create(new Path("/hdfsapi/test/a.txt"));
fs.write("hello haddop".getBytes());
fs.flush();
fs.close();
}
/**
* 查看文件内容
*/
@Test
public void cat() throws Exception{
FSDataInputStream in = fileSystem.open(new Path("/hdfsapi/test/b.txt"));
IOUtils.copyBytes(in,System.out,1024);
in.close();
}
/**
* 重命名文件
* @throws Exception
*/
@Test
public void rename() throws Exception{
fileSystem.rename(new Path("/hdfsapi/test/a.txt"),new Path("/hdfsapi/test/b.txt"));
}
/**
* 显示文件目录
* @throws Exception
*/
@Test
public void catgory() throws Exception{
FileStatus[] fs=fileSystem.listStatus(new Path("/hdfsapi/test/dalei.json"));
for(FileStatus fileStatus : fs){
String isDir = fileStatus.isDirectory() ? "文件夹":"文件";
short replication =fileStatus.getReplication();
long leng=fileStatus.getLen();
Path path = fileStatus.getPath();
System.out.println(isDir+"\t"+replication+"\t"+leng+"\t"+path);
}
}
/**
*
* 上传文件到hdfs
* @throws Exception
*/
@Test
public void upload() throws Exception{
Path localPath = new Path("E:\\train.json");
Path hdfsPath = new Path("/hdfsapi/test/");
fileSystem.copyFromLocalFile(localPath,hdfsPath);
}
/**
* 带进度条的上传
* @throws Exception
*/
@Test
public void uploadWithProgress() throws Exception{
Path localPath = new Path("E:\\train.json");
Path hdfsPath = new Path("/hdfsapi/test/");
Progressable progress = new Progressable() {
public void progress() {
System.out.print(">");
}
};
InputStream in = new BufferedInputStream(new FileInputStream(new File("E:\\train.json")));
FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/dalei.json"),progress);
IOUtils.copyBytes(in,output,4096);
}
/**
*从hdfs上下载文件
* @throws Exception
*/
@Test
public void download() throws Exception{
Path localPath = new Path("D:\\hi.txt");
Path hdfsFile = new Path("/hdfsapi/test/dalei.json");
fileSystem.copyToLocalFile(hdfsFile,localPath);
}
@Before
public void setUp() throws Exception{
System.out.println("hadoop setUp");
configuration = new Configuration();
fileSystem = FileSystem.get(new URI(HDFS_PATH),configuration,"root");
}
@After
public void tearDown() throws Exception{
configuration = null;
fileSystem = null;
System.out.println("hadoop tearDown");
}
}