标签生成

目的:java代码实现团购网站的标签生成

最终结果:

83644298=============>体验好:1
82317795=============>味道差:1
77705462=============>服务热情:3,羊肉:2,味道赞:1
85766086=============>价格实惠:2,上菜慢:1
74145782=============>服务热情:18,味道赞:14,上菜快:13,菜品不错:12,回头客:11,性价比高:6,停车方便:5,体验好:4,不推荐:3,服务差:2
71039150=============>团建:1
70611801=============>干净卫生:4,回头客:3,味道赞:2,肉类好:1
73963176=============>味道赞:15,价格实惠:12,分量足:11,菜品不错:10,肉类好:7,环境优雅:6,回头客:4,性价比高:3,味道一般:1
84270191=============>价格实惠:2,干净卫生:1
89223651=============>环境优雅:8,技师专业:7,干净卫生:5,服务一般:4,无办卡:3,环境一般:2
82016443=============>分量足:3,味道赞:2,服务热情:1
77287793=============>干净卫生:29,环境优雅:26,交通便利:25,性价比高:19,服务热情:18,高大上:16,停车方便:13,音响效果差:1
79197522=============>服务热情:2,价格实惠:1
83084036=============>干净卫生:1
73879078=============>饮品赞:3,回头客:2,分量足:1
88284865=============>价格实惠:1
83073343=============>干净卫生:17,味道赞:16,环境优雅:15,菜品不错:11,肉类好:9,性价比高:8,体验好:7,回头客:6,价格实惠:4,上菜慢:1
76114040=============>性价比高:1
86913510=============>午餐:1
88496862=============>回头客:5,味道赞:4,分量足:3,性价比高:2,高大上:1
78477325=============>味道赞:8,回头客:7,干净卫生:5,味道一般:4,菜品不错:3,环境优雅:2,肉类好:1
83981222=============>性价比高:4,干净卫生:3,服务热情:2
82705919=============>回头客:3,饮品赞:2,性价比高:1
87994574=============>无推销:12,价格实惠:8,服务热情:7,效果赞:5,环境优雅:4,技师专业:3,没有异味:2,效果差:1
77373671=============>菜品差:1
75144086=============>服务热情:38,效果赞:30,无办卡:22,性价比高:21,无推销:19,价格实惠:18,干净卫生:13,体验好:12,韩系风格:10,美发师手艺好:3
85648235=============>味道赞:17,服务热情:15,干净卫生:13,上菜快:12,回头客:11,性价比高:10,体验好:9,价格实惠:8,分量足:7,情侣约会:1
73607905=============>菜品不错:16,回头客:15,服务热情:14,分量足:13,肉类好:11,环境优雅:7,体验好:5,体验差:2,价格实惠:1
76893145=============>服务热情:10,环境优雅:7,高大上:5,回头客:4,温馨浪漫:3,味道一般:2,饮品赞:1
78824187=============>价格实惠:13,回头客:11,分量足:10,环境优雅:8,干净卫生:7,上菜快:6,主食赞:5,味道赞:4,服务差:1

代码:


import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;

/**
 * java版实现团购网站标签生成程序
 */
public class ReviewTagsJava {
    public static void main(String [] args){
        SparkConf conf = new SparkConf();
        conf.setMaster("local[4]");
        conf.setAppName("ReviewTagsJava");

        JavaSparkContext jsc = new JavaSparkContext(conf);
        JavaRDD rdd1 = jsc.textFile("file:///d:/scala/taggen/temptags.txt");

        //以\t切割成String数组
        JavaRDD rdd2 = rdd1.map(new Function() {
            public String[] call(String s) throws Exception {
                return s.split("\t");
            }
        });

        //  过滤
        JavaRDD rdd3 = rdd2.filter(new Function() {
            public Boolean call(String[] v) throws Exception {
                return v.length == 2;
            }
        });

        //变换成数组,ID-->味道好,价格实惠,量足
        JavaPairRDD rdd4 = rdd3.mapToPair(new PairFunction() {
            public Tuple2 call(String[] t) throws Exception {
                return new Tuple2(t[0],ReviewTags.extractTags(t[1]));
            }
        });

        //过滤空评论
        JavaPairRDD rdd5 = rdd4.filter(new Function, Boolean>() {
            public Boolean call(Tuple2 t) throws Exception {
                return t._2.length() > 0;
            }
        });

        //对V进行切割,V形成数组
        JavaPairRDD rdd6 = rdd5.mapToPair(new PairFunction, String,String[]>() {
            public Tuple2 call(Tuple2 v) throws Exception {
                return new Tuple2(v._1(),v._2().split(","));
            }
        });

        //V数组压扁,形成集合。ID->味道好  ID->价格实惠  .......
        JavaPairRDD rdd7 = rdd6.flatMapValues(new Function>() {
            public Iterable call(String[] v) throws Exception {
                List list = new ArrayList();
                for(String  v1 : v){
                    list.add(v1);
                }
                return list;
            }
        });

        //K:ID 味道好     V:1......
        JavaPairRDD, Integer> rdd8 = rdd7.mapToPair(new PairFunction, Tuple2, Integer>() {
            public Tuple2, Integer> call(Tuple2 v) throws Exception {
                return new Tuple2, Integer>(v,1);
            }
        });

        JavaPairRDD,Integer> rdd9 = rdd8.reduceByKey(new Function2() {
            public Integer call(Integer v1, Integer v2) throws Exception {
                return v1 + v2;
            }
        });

        //K:ID      V:味道好,1 ......
        JavaPairRDD> rdd10 = rdd9.mapToPair(new PairFunction,Integer>, String, Tuple2>() {
            public Tuple2> call(Tuple2, Integer> v) throws Exception {
                return new Tuple2>(v._1()._1(),new Tuple2(v._1()._2(),v._2()));
            }
        });

        // V变成集合,以备聚合
        JavaPairRDD>> rdd11 = rdd10.mapToPair(new PairFunction>, String, List>>() {
            public Tuple2>> call(Tuple2> v) throws Exception {
                List> list = new ArrayList>();
                list.add(v._2());
                 return new Tuple2>>(v._1(),list);
            }
        });
        //
        JavaPairRDD>> rdd12 = rdd11.reduceByKey(new Function2>, List>, List>>() {
            public List> call(List> v1, List> v2) throws Exception {
                v1.addAll(v2);
                return  v1;
            }
        });

        //排序降序,取前10评论
        JavaPairRDD rdd13 = rdd12.mapToPair(new PairFunction>>, String, String>() {
            public Tuple2 call(Tuple2>> v) throws Exception {
                //降序,
                TreeSet> ts = new TreeSet>(new Tuple2Comparator());
                ts.addAll(v._2());
                //前10迭代
                Iterator> it = ts.iterator();
                int index = 0;
                String str = "";
                while(it.hasNext()){
                    if(index > 9){
                        break;
                    }
                    //迭代出前10的元组给t0
                    Tuple2 t0 = it.next();
                    //V:味道好 :12,量大:13
                    str = str + t0._1() + ":" + t0._2() + ",";
                    index++;
                }
                //去掉最后一个","
                str = str.substring(0,str.length()-1);
                //K:ID,       V:味道好+12评论数量
                return  new Tuple2(v._1(),str);
            }
        });
        //collect执行---->List
        List> list = rdd13.collect();
        //循环打印结果
        for(Tuple2 l : list){
            System.out.println(l._1() + "=============>" + l._2());
        }
    }
}

降序取出前10评论需要调用对比器:

import scala.Tuple2;

import java.util.Comparator;

/**
 * 对比器,比较大小,降序
 */
public class Tuple2Comparator implements Comparator> {
    public int compare(Tuple2 o1, Tuple2 o2) {
        return o2._2() - o1._2();
    }
}

 

你可能感兴趣的:(标签生成)