电商推荐系统

电商推荐系统_第1张图片

此篇博客主要记录一下商品推荐系统的主要实现过程。

一、获取用户对商品的偏好值

电商推荐系统_第2张图片

代码实现

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.math.BigDecimal;

public class GoodsStep1 extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep1(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class GS1Mapper extends Mapper<LongWritable, Text, Text, DoubleWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(",");
            // 将行为转化为偏好值
            double like = 0.0;
            if (split.length >= 3) {
                String str = split[2].toLowerCase();
                if (str.equals("paysuccess")) { // 支付成功
                    like = 0.3;
                } else if (str.equals("addreview")) { //评论
                    like = 0.3;
                } else if (str.equals("createorder")) { // 创建订单
                    like = 0.2;
                } else if (str.equals("addcar")){ // 加入购物车
                    like = 0.15;
                } else { // 浏览
                    like = 0.05;
                }
            }
            // key=用户:商品 value=[偏好,偏好]
            Text outkey = new Text(split[0] + ":" + split[1]);
            DoubleWritable outvalue = new DoubleWritable(like);
            context.write(outkey, outvalue);
        }
    }

    public static class GS1Reducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
            // 避免精度丢失选用bigDecimal
            BigDecimal sum = new BigDecimal(0.0);
            for (DoubleWritable value : values) {
                BigDecimal v = new BigDecimal(value.get());
                sum = sum.add(v);
            }
            DoubleWritable outvalue = new DoubleWritable(sum.doubleValue());
            context.write(key, outvalue);
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step1");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep1.GS1Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);
        //  默认reduce
        job.setReducerClass(GoodsStep1.GS1Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        // 输入分片类型
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job, new Path("data/userLog.log"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step1"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

二、将偏好数据整理成偏好矩阵

电商推荐系统_第3张图片
电商推荐系统_第4张图片

代码实现

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class GoodsStep2 extends Configured implements Tool {

    public static class GS2Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String uid_gid = key.toString();
            String[] split = uid_gid.split(":");
            // 将商品id作为输出key
            Text outkey = new Text(split[1]);
            // 将用户id与偏好值组合形成value
            Text outvalue = new Text(split[0] + ":" + value.toString());
            context.write(outkey, outvalue);
        }
    }

    public static class GS2Reducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuffer buffer = new StringBuffer();
            for (Text value : values) {
                buffer.append(value.toString()).append(",");
            }
            buffer.setLength(buffer.length() - 1);
            context.write(key, new Text(buffer.toString()));
        }
    }

    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep2(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator","\t");
        Job job = Job.getInstance(conf, "step2");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep2.GS2Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GoodsStep2.GS2Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job, new Path("src/main/resources/step1/part-r-00000"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step2"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

三、统计商品共现次数

电商推荐系统_第5张图片

代码实现

笛卡尔积
package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.ArrayList;


public class GoodsStep3 extends Configured implements Tool {
    public static class GS3Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String uid_gid = key.toString();
            String[] split = uid_gid.split(":");
            Text outkey = new Text(split[0]);
            Text outvalue = new Text(split[1]);
            context.write(outkey, outvalue);
        }
    }

    public static class GS3Reducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            ArrayList<String> list = new ArrayList<>();
            for (Text value:values) {
                list.add(value.toString());
            }

            for (String g1 : list) {
                for (String g2:list) {
                    if (!g1.equals(g2)) {
                        Text outkey = new Text(g1);
                        Text outvalue = new Text(g2);
                        context.write(outkey, outvalue);
                    }
                }
            }
        }
    }

    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep3(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator","\t");
        Job job = Job.getInstance(conf, "step3");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GS3Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GS3Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job, new Path("src/main/resources/step1/part-r-00000"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step3"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}
共现次数
package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class GoodsStep4 extends Configured implements Tool {

    public static class GS4Mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            String outkey = split[0] + ":" + split[1];
            context.write(new Text(outkey), new IntWritable(1));
        }
    }

    public static class GS4Reducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable i : values) {
                sum += 1;
            }
            context.write(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep4(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step4");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GS4Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //  默认reduce
        job.setReducerClass(GS4Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        // 输入分片类型
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job, new Path("src/main/resources/step3/part-r-00000"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step4"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

四、获取商品共现矩阵

电商推荐系统_第6张图片

代码实现

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class GoodsStep5 extends Configured implements Tool {
    public static class GS5Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String goods = key.toString();
            String[] split = goods.split(":");
            // key为第一列商品,value为第二列商品:次数
            Text outkey = new Text(split[0]);
            Text outvalue = new Text(split[1] + ":" + value.toString());
            context.write(outkey, outvalue);
        }
    }

    public static class GS5Reducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuffer buffer = new StringBuffer();
            for (Text value : values) {
                buffer.append(value.toString()).append(",");
            }
            buffer.setLength(buffer.length() - 1);
            context.write(key, new Text(buffer.toString()));
        }
    }

    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep5(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step5");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GS5Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GS5Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job, new Path("src/main/resources/step4/part-r-00000"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step5"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

五、获取推荐值

电商推荐系统_第7张图片

代码实现

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.Map;

public class GoodsStep6 extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep6(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 第二步
     * 375	11:0.25,5:0.25,4:0.55
     * 商品 用户:偏好值
     * 第五步
     * 375	203:1,961:1,91:1,90:2,89:1
     * 商品 商品:共现次数
     * 输出数据:
     * 用户:商品 推荐值
     */
    public static class GS6Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(",");
            for (String str : split) {
                // key=商品 value={用户:偏好值,商品:共现次数}
                context.write(key, new Text(str));
            }
        }
    }

    public static class GS6Reducer extends Reducer<Text, Text, Text, DoubleWritable> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            // 偏好集合[用户:偏好]
            HashMap<String, String> like = new HashMap<>();
            // 共现集合[商品:共现次数]
            HashMap<String, String> same = new HashMap<>();
            for (Text value : values) {
                String data = value.toString();
                String[] split = data.split(":");
                if (split[1].contains(".")) {
                    like.put(split[0], split[1]);
                } else {
                    same.put(split[0], split[1]);
                }
            }
            for (Map.Entry<String, String> l : like.entrySet()) {
                for (Map.Entry<String, String> s : same.entrySet()) {
                    //用户偏好值
                    BigDecimal lvalue = new BigDecimal(l.getValue());
                    //商品共现
                    BigDecimal svalue = new BigDecimal(s.getValue());
                    //用户:共现商品
                    Text outkey = new Text(l.getKey() + ":" + s.getKey());
                    double outvalue = lvalue.multiply(svalue).doubleValue();
                    context.write(outkey, new DoubleWritable(outvalue));
                }
            }
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step6");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep6.GS6Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GoodsStep6.GS6Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.setInputPaths(job, new Path("src/main/resources/step2"),
                new Path("src/main/resources/step5"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step6"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

六、推荐值累加及数据清洗

电商推荐系统_第8张图片

代码实现

推荐值累加
package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.math.BigDecimal;

public class GoodsStep7 extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep7(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class GS7Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            context.write(key, value);
        }
    }

    public static class GS7Reducer extends Reducer<Text, Text, Text, DoubleWritable> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            BigDecimal sum = new BigDecimal(0.0);
            for (Text value : values) {
                BigDecimal v = new BigDecimal(value.toString());
                sum = sum.add(v);
            }
            DoubleWritable outvalue = new DoubleWritable(sum.doubleValue());
            context.write(key, outvalue);
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step7");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep7.GS7Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GoodsStep7.GS7Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job, new Path("src/main/resources/step6"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step7"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}
数据清洗
统计已经支付成功一次的数据
package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class GoodsStep8 extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep8(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class GS8Mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(",");
            boolean paySuccess = split[2].toLowerCase().equals("paysuccess");
            if (paySuccess) {
                context.write(new Text(split[0] + ":" + split[1]), new IntWritable(1));
            }
        }
    }

    public static class GS8Reducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int num = 0;
            for (IntWritable i : values) {
                num ++;
            }
            if (num == 1) context.write(key, new IntWritable(num));
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step8.1");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep8.GS8Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //  默认reduce
        job.setReducerClass(GoodsStep8.GS8Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        // 输入分片类型
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job, new Path("data/userLog.log"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step8"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}
在整理出来的数据中去除统计出来支付成功的
package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.Iterator;

public class GoodsStep8_2 extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsStep8_2(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class GS8_1Mapper extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            context.write(key, value);
        }
    }

    public static class GS8_1Reducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//            int num = 0;
//            String outvalue = "";
//            for (Text value : values) {
//                outvalue = value.toString();
//                num ++;
//            }
//            if (num == 1) context.write(key, new Text(outvalue));
            Iterator<Text> iter = values.iterator();
            Text outvalue = iter.next();
            if (iter.hasNext()) {}
            else {
                context.write(key, outvalue);
            }
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, "step8.2");
        job.setJarByClass(this.getClass());
        // 2、装配map,指定map对象,map对象的输出key和value
        job.setMapperClass(GoodsStep8_2.GS8_1Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //  默认reduce
        job.setReducerClass(GoodsStep8_2.GS8_1Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // 输入分片类型
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.setInputPaths(job, new Path("src/main/resources/step7"),
                new Path("src/main/resources/step8"));
        // 5、指定输出文件,文件类型, 文件路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("src/main/resources/step8_2"));
        // 6、运行
        job.waitForCompletion(true);
        return 0;
    }
}

七、写入数据库

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;

public class GoodsStep9 {
    public static void main(String[] args) {
        try {
            toDB();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void toDB() throws Exception {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Class.forName("com.mysql.cj.jdbc.Driver");
        Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/tmall?serverTimezone=Asia/Shanghai", "briup", "briup");
        Statement statement = null;
        FSDataInputStream open = fs.open(new Path("src/main/resources/step8_2/part-r-00000"));
        BufferedReader br = new BufferedReader(new InputStreamReader(open));
        String line = "";
        while ((line = br.readLine()) != null) {
            // 11:512	0.25
            // 用户:商品 推荐值
            String[] str = line.split("\t");
            String[] uid_gid = str[0].split(":");
            statement = conn.createStatement();
            String sql = "delete from recommend where customerId = '" + uid_gid[0] + "' and bookId = '" + uid_gid[1] + "'";
            String sql2 = "insert into recommend(customerId, bookId, recommendNum) values ('"
                    + uid_gid[0] + "','" + uid_gid[1] + "'," + str[1] + ")";
            statement.addBatch(sql);
            statement.addBatch(sql2);
            statement.executeBatch();
        }
    }
}

八、工作流

package zb.grms;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class GoodsMain extends Configured implements Tool {
    public static void main(String[] args) {
        try {
            ToolRunner.run(new GoodsMain(), args);
            GoodsStep9.toDB();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
//        String inpath = new String("inpath");
//        Path path = new Path(conf.get(inpath));
        Path path = new Path("data/userLog.log");
        Path outpath =  new Path("src/main/resources/step1");
        Path outpath2 = new Path("src/main/resources/step2");
        Path outpath3 = new Path("src/main/resources/step3");
        Path outpath4 = new Path("src/main/resources/step4");
        Path outpath5 = new Path("src/main/resources/step5");
        Path outpath6 = new Path("src/main/resources/step6");
        Path outpath7 = new Path("src/main/resources/step7");
        Path outpath8 = new Path("src/main/resources/step8");
        Path outpath9 = new Path("src/main/resources/step8_2");
        //获取所有mr步骤job配置
        //step1
        Job job = Job.getInstance(conf);
        job.setJarByClass(this.getClass());
        job.setMapperClass(GoodsStep1.GS1Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);
        job.setReducerClass(GoodsStep1.GS1Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,path);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,outpath);
        //step8
        Job job8 = Job.getInstance(conf);
        job8.setJarByClass(this.getClass());
        job8.setMapperClass(GoodsStep8.GS8Mapper.class);
        job8.setMapOutputKeyClass(Text.class);
        job8.setMapOutputValueClass(IntWritable.class);
        job8.setReducerClass(GoodsStep8.GS8Reducer.class);
        job8.setOutputKeyClass(Text.class);
        job8.setOutputValueClass(IntWritable.class);
        job8.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job8,path);
        job8.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job8,outpath8);
        //step2
        Job job2 = Job.getInstance(conf);
        job2.setJarByClass(this.getClass());
        job2.setMapperClass(GoodsStep2.GS2Mapper.class);
        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setReducerClass(GoodsStep2.GS2Reducer.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job2,outpath);
        job2.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job2,outpath2);
        //step3bookId
        Job job3 = Job.getInstance(conf);
        job3.setJarByClass(this.getClass());
        job3.setMapperClass(GoodsStep3.GS3Mapper.class);
        job3.setMapOutputKeyClass(Text.class);
        job3.setMapOutputValueClass(Text.class);
        job3.setReducerClass(GoodsStep3.GS3Reducer.class);
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(Text.class);
        job3.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job3,outpath);
        job3.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job3,outpath3);
        //step4
        Job job4 = Job.getInstance(conf);
        job4.setJarByClass(this.getClass());
        job4.setMapperClass(GoodsStep4.GS4Mapper.class);
        job4.setMapOutputKeyClass(Text.class);
        job4.setMapOutputValueClass(IntWritable.class);
        job4.setReducerClass(GoodsStep4.GS4Reducer.class);
        job4.setOutputKeyClass(Text.class);
        job4.setOutputValueClass(IntWritable.class);
        job4.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job4,outpath3);
        job4.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job4,outpath4);
        //step5
        Job job5 = Job.getInstance(conf);
        job5.setJarByClass(this.getClass());
        job5.setMapperClass(GoodsStep5.GS5Mapper.class);
        job5.setMapOutputKeyClass(Text.class);
        job5.setMapOutputValueClass(Text.class);
        job5.setReducerClass(GoodsStep5.GS5Reducer.class);
        job5.setOutputKeyClass(Text.class);
        job5.setOutputValueClass(Text.class);
        job5.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job5,outpath4);
        job5.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job5,outpath5);
        //step6
        Job job6 = Job.getInstance(conf);
        job6.setJarByClass(this.getClass());
        job6.setMapperClass(GoodsStep6.GS6Mapper.class);
        job6.setMapOutputKeyClass(Text.class);
        job6.setMapOutputValueClass(Text.class);
        job6.setReducerClass(GoodsStep6.GS6Reducer.class);
        job6.setOutputKeyClass(Text.class);
        job6.setOutputValueClass(DoubleWritable.class);
        job6.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.setInputPaths(job6,outpath2,outpath5);
        job6.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job6,outpath6);
        //step7
        Job job7 = Job.getInstance(conf);
        job7.setJarByClass(this.getClass());
        job7.setMapperClass(GoodsStep7.GS7Mapper.class);
        job7.setMapOutputKeyClass(Text.class);
        job7.setMapOutputValueClass(Text.class);
        job7.setReducerClass(GoodsStep7.GS7Reducer.class);
        job7.setOutputKeyClass(Text.class);
        job7.setOutputValueClass(DoubleWritable.class);
        job7.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.setInputPaths(job7,outpath6);
        job7.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job7,outpath7);
        //step9
        Job job9 = Job.getInstance(conf);
        job9.setJarByClass(this.getClass());
        job9.setMapperClass(GoodsStep8_2.GS8_1Mapper.class);
        job9.setMapOutputKeyClass(Text.class);
        job9.setMapOutputValueClass(Text.class);
        job9.setReducerClass(GoodsStep8_2.GS8_1Reducer.class);
        job9.setOutputKeyClass(Text.class);
        job9.setOutputValueClass(Text.class);
        job9.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.setInputPaths(job9,outpath7,outpath8);
        job9.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job9,outpath9);

        //创建可控作业
        ControlledJob cj = new ControlledJob(conf);
        cj.setJob(job);
        ControlledJob cj2 = new ControlledJob(conf);
        cj2.setJob(job2);
        ControlledJob cj3 = new ControlledJob(conf);
        cj3.setJob(job3);
        ControlledJob cj4 = new ControlledJob(conf);
        cj4.setJob(job4);
        ControlledJob cj5 = new ControlledJob(conf);
        cj5.setJob(job5);
        ControlledJob cj6 = new ControlledJob(conf);
        cj6.setJob(job6);
        ControlledJob cj7 = new ControlledJob(conf);
        cj7.setJob(job7);
        ControlledJob cj8 = new ControlledJob(conf);
        cj8.setJob(job8);
        ControlledJob cj9 = new ControlledJob(conf);
        cj9.setJob(job9);
        //添加作业间的依赖关系
        cj2.addDependingJob(cj);
        cj3.addDependingJob(cj);
        cj4.addDependingJob(cj3);
        cj5.addDependingJob(cj4);
        cj6.addDependingJob(cj2);
        cj6.addDependingJob(cj5);
        cj7.addDependingJob(cj6);
        cj9.addDependingJob(cj7);
        cj9.addDependingJob(cj8);
        //创建工作流,创建控制器
        JobControl jobs = new JobControl("work_flow");
        jobs.addJob(cj);
        jobs.addJob(cj2);
        jobs.addJob(cj3);
        jobs.addJob(cj4);
        jobs.addJob(cj5);
        jobs.addJob(cj6);
        jobs.addJob(cj7);
        jobs.addJob(cj8);
        jobs.addJob(cj9);
        //启动控制器-》一键完成所有mr计算任务
        Thread t=new Thread(jobs);
        t.start();
        while(true){
            if(jobs.allFinished()){
                System.out.println("作业全部完成");
                System.out.println(jobs.getSuccessfulJobList());
                jobs.stop();
                return 0;
            }else if(jobs.getFailedJobList().size()>0) {
                System.out.println("任务失败");
                System.out.println(jobs.getFailedJobList());
                jobs.stop();
                return -1;
            }
        }
    }
}

总结

电商推荐系统_第9张图片

你可能感兴趣的:(大数据,mapreduce,hadoop)