package test;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.fpm.AssociationRules;
import org.apache.spark.mllib.fpm.FPGrowth;
import org.apache.spark.mllib.fpm.FPGrowthModel;
import java.util.Arrays;
import java.util.List;
/**
* @description: 测试spark-FpGrowth
*/
public class FpgDemo {
public static void main(String[] args) {
String pathOfData = "C:\\Users\\yuzisheng\\Desktop\\retail.txt";
//最小支持度
double minSup = 0.01;
//最小置信度
double minConf = 0.8;
//生成的规则数
int numPart = 10;
//记录程序开始时间
long startTime = System.currentTimeMillis();
//本地模式
SparkConf conf = new SparkConf().setAppName("FPDemo").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
//读入数据集
JavaRDD> transactions = sc.textFile(pathOfData).map((Function>) s -> {
String[] parts = s.split(" ");
return Arrays.asList(parts);
});
//建立算法实例
FPGrowth fpGrowth = new FPGrowth().setMinSupport(minSup).setNumPartitions(numPart);
FPGrowthModel model = fpGrowth.run(transactions);
//查看所有频繁项集,并列出它出现的次数
System.out.println("--all frequent item sets");
for (FPGrowth.FreqItemset itemSet : model.freqItemsets().toJavaRDD().collect()) {
System.out.println(itemSet.javaItems() + ", " + itemSet.freq());
}
//通过置信度筛选出强规则(满足一定置信度的规则),antecedent表示前项,consequent表示后项
System.out.println("--all strong rules");
for (AssociationRules.Rule rule : model.generateAssociationRules(minConf).toJavaRDD().collect()) {
System.out.println(rule.javaAntecedent() + "=>" + rule.javaConsequent() + ", " + rule.confidence());
}
//记录程序结束时间
long endTime = System.currentTimeMillis();
System.out.println("Done by " + (endTime - startTime) + "ms");
}
}
数据样例(retail.txt前十行):
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
30 31 32
33 34 35
36 37 38 39 40 41 42 43 44 45 46
38 39 47 48
38 39 48 49 50 51 52 53 54 55 56 57 58
32 41 59 60 61 62
3 39 48
63 64 65 66 67 68
32 69
运行结果:
[37]=>[38], 0.9739292364990689
[36]=>[38], 0.9502724795640327
[41, 38, 48]=>[39], 0.8386689132266217
[36, 48, 39]=>[38], 0.967741935483871
[225, 48]=>[39], 0.8064516129032258
[110]=>[38], 0.9753042233357194
[170, 39]=>[38], 0.9805730937348227
[170, 48, 39]=>[38], 0.9892205638474295
[110, 48]=>[38], 0.986231884057971
[286]=>[38], 0.9433643279797126
[110, 39]=>[38], 0.9891984081864695
[170]=>[38], 0.9780574378831881
[41, 48]=>[39], 0.8168108227988468
[110, 48, 39]=>[38], 0.9942140790742526
[36, 48]=>[38], 0.96045197740113
[36, 39]=>[38], 0.9548355424644085
[170, 48]=>[38], 0.9877970456005138
Done by 8097ms