Hadoop与MySQL进行交互实现

准备工作:

1、安装和配置好 MySQL

2、创建数据表,并写入测试数据;以及创建结果表,以便MR写入数据

注意:

    1、与 mysql 交互时,Mapper的输出类型 (LongWritable,DBWritable);Reducer的输出类型 (DBWritable,NullWritable)

    2、如果提交到集群上运行,需要注意 2 点:

          a> mysql 的 url 地址要修改为 mysql所在主机的 IP

          b> 因为打 jar 包时,不会将mysql的驱动一并打入jar包;

               所以,要将 mysql 驱动 jar包 拷贝到各个节点的 $HADOOP_HOME/share/hadoop/common/lib 目录下

做好以上准备工作,就可以开始写代码:

WCApp.class

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;

public class WCApp {
    public static void main(String[] args) throws Exception {
        System.setProperty("hadoop.home.dir", "H:\\hadoop-2.4.1");
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setJarByClass(WCApp.class);
        job.setJobName("WCDBApp");
        job.setNumReduceTasks(3);

        job.setMapperClass(WCDBMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(WCReducer.class);
        job.setOutputKeyClass(WordRSDBWritable.class);
        job.setOutputValueClass(NullWritable.class);

        //设置输入和输出的类型
        job.setInputFormatClass(DBInputFormat.class);
        job.setOutputFormatClass(DBOutputFormat.class);

        //配置数据库信息
        String driverClass = "com.mysql.jdbc.Driver";
        //注意:如果要提交到集群上运行,这里的localhost要修改成mysql所在主机的ip地址
        String dbUrl = "jdbc:mysql://localhost:3306/bigdata";
        String userName = "root";
        String passwd = "root";
        //Configuration conf, String driverClass, String dbUrl, String userName, String passwd
        //设置数据库信息
        DBConfiguration.configureDB(job.getConfiguration(),driverClass,dbUrl,userName,passwd);
        //设置数据库输入内容
        DBInputFormat.setInput(job,WordsDBWritable.class,"select * from words","select count(*) from words");

        //设置输出路径
        DBOutputFormat.setOutput(job,"wc_result","id","word","count");

        job.waitForCompletion(true);
    }
}
WCDBMapper.class
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WCDBMapper extends Mapper {
    @Override
    protected void map(LongWritable key, WordsDBWritable value, Context context) throws IOException, InterruptedException {
        //注意:这里的key是从0开始的
        //value是数据库表里的一条记录
        //获取数据
        String txt = value.getTxt();
        //分词
        String[] split = txt.split(" ");
        //压平
        for (String s : split) {
            context.write(new Text(s),new IntWritable(1));
        }
    }
}

WCReducer.class

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * 注意:将数据写入sql时,Reducer的输出的key和value的数据类型分别为: DBWritable,NullWritable
 */
public class WCReducer extends Reducer {
    @Override
    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
        int total = 0;
        for (IntWritable value : values) {
            total += value.get();
        }
        WordRSDBWritable wordRSDBWritable = new WordRSDBWritable();
        wordRSDBWritable.setName(key.toString());
        wordRSDBWritable.setCount(total);
        context.write(wordRSDBWritable,NullWritable.get());
    }
}
WordsDBWritable.class
/**
 * 自定义的类,用于与数据库交互
 * 对应的sql表名:words
 */
public class WordsDBWritable implements Writable,DBWritable {

    private int id;
    private String name;
    private String txt;

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getTxt() {
        return txt;
    }

    public void setTxt(String txt) {
        this.txt = txt;
    }

    public void write(DataOutput out) throws IOException {
        out.writeInt(id);
        out.writeUTF(name);
        out.writeUTF(txt);
    }

    public void readFields(DataInput in) throws IOException {
        id = in.readInt();
        name = in.readUTF();
        txt = in.readUTF();
    }

    /*
        向 db 写
     */
    public void write(PreparedStatement statement) throws SQLException {
        statement.setInt(1,id);
        statement.setString(2,name);
        statement.setString(3,txt);
    }

    /*
        从 db 读
     */
    public void readFields(ResultSet resultSet) throws SQLException {
        id = resultSet.getInt(1);
        name = resultSet.getString(2);
        txt = resultSet.getString(3);
    }
}
WordRSDBWritable.class
/**
 * 自定义的类,用于与数据库交互
 * 对应的sql表名:wc_result
 */
public class WordRSDBWritable implements Writable,DBWritable {

    private int id;
    private String word;
    private int count;

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return word;
    }

    public void setName(String name) {
        this.word = name;
    }

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }

    public void write(DataOutput out) throws IOException {
        out.writeInt(id);
        out.writeUTF(word);
        out.writeInt(count);
    }

    public void readFields(DataInput in) throws IOException {
        id = in.readInt();
        word = in.readUTF();
        count = in.readInt();
    }

    /*
        向 db 写
     */
    public void write(PreparedStatement statement) throws SQLException {
        statement.setInt(1,id);
        statement.setString(2, word);
        statement.setInt(3, count);
    }

    /*
        从 db 读
     */
    public void readFields(ResultSet resultSet) throws SQLException {
        id = resultSet.getInt(1);
        word = resultSet.getString(2);
        count = resultSet.getInt(3);
    }
}

注意:因为要用mysql的驱动,所以要在pom.xml文件中加入 mysql的引用。


    mysql
    mysql-connector-java
    5.1.17

	

 

你可能感兴趣的:(Hadoop)