/**
* 获取top10热门品类
* @param filteredSessionid2AggrInfoRDD
* @param sessionid2actionRDD
*/
private static void getTop10Category(
long taskid,
JavaPairRDD filteredSessionid2AggrInfoRDD,
JavaPairRDD sessionid2actionRDD) {
/**
* 第一步:获取符合条件的session访问过的所有品类
*/
// 获取符合条件的session的访问明细
JavaPairRDD sessionid2detailRDD = filteredSessionid2AggrInfoRDD
.join(sessionid2actionRDD)
.mapToPair(new PairFunction>, String, Row>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(
Tuple2> tuple) throws Exception {
return new Tuple2(tuple._1, tuple._2._2);
}
});
// 获取session访问过的所有品类id
// 访问过:指的是,点击过、下单过、支付过的品类
JavaPairRDD categoryidRDD = sessionid2detailRDD.flatMapToPair(
new PairFlatMapFunction, Long, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Iterator> call(
Tuple2 tuple) throws Exception {
Row row = tuple._2;
List> list = new ArrayList>();
Object clickCategoryId = row.get(6);
if(clickCategoryId != null) {
list.add(new Tuple2(row.getLong ( 6 ), row.getLong ( 6 )));
}
String orderCategoryIds = row.getString(8);
if(orderCategoryIds != null) {
String[] orderCategoryIdsSplited = orderCategoryIds.split(",");
for(String orderCategoryId : orderCategoryIdsSplited) {
list.add(new Tuple2(Long.valueOf(orderCategoryId),
Long.valueOf(orderCategoryId)));
}
}
String payCategoryIds = row.getString(10);
if(payCategoryIds != null) {
String[] payCategoryIdsSplited = payCategoryIds.split(",");
for(String payCategoryId : payCategoryIdsSplited) {
list.add(new Tuple2(Long.valueOf(payCategoryId),
Long.valueOf(payCategoryId)));
}
}
return list.iterator ();
}
});
/**
* 第二步:计算各品类的点击、下单和支付的次数
*/
// 访问明细中,其中三种访问行为是:点击、下单和支付
// 分别来计算各品类点击、下单和支付的次数,可以先对访问明细数据进行过滤
// 分别过滤出点击、下单和支付行为,然后通过map、reduceByKey等算子来进行计算
// 计算各个品类的点击次数
JavaPairRDD clickCategoryId2CountRDD =
getClickCategoryId2CountRDD(sessionid2detailRDD);
// 计算各个品类的下单次数
JavaPairRDD orderCategoryId2CountRDD =
getOrderCategoryId2CountRDD(sessionid2detailRDD);
// 计算各个品类的支付次数
JavaPairRDD payCategoryId2CountRDD =
getPayCategoryId2CountRDD(sessionid2detailRDD);
/**
* 第三步:join各品类与它的点击、下单和支付的次数
*
* categoryidRDD中,是包含了所有的符合条件的session,访问过的品类id
*
* 上面分别计算出来的三份,各品类的点击、下单和支付的次数,可能不是包含所有品类的
* 比如,有的品类,就只是被点击过,但是没有人下单和支付
*
* 所以,这里,就不能使用join操作,要使用leftOuterJoin操作,就是说,如果categoryidRDD不能
* join到自己的某个数据,比如点击、或下单、或支付次数,那么该categoryidRDD还是要保留下来的
* 只不过,没有join到的那个数据,就是0了
*
*/
JavaPairRDD categoryid2countRDD = joinCategoryAndData(
categoryidRDD, clickCategoryId2CountRDD, orderCategoryId2CountRDD,
payCategoryId2CountRDD);
/**
* 第四步:自定义二次排序key
*/
/**
* 第五步:将数据映射成格式的RDD,然后进行二次排序(降序)
*/
JavaPairRDD sortKey2countRDD = categoryid2countRDD.mapToPair(
new PairFunction, CategorySortKey, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(
Tuple2 tuple) throws Exception {
String countInfo = tuple._2;
long clickCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_CLICK_COUNT));
long orderCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_ORDER_COUNT));
long payCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_PAY_COUNT));
CategorySortKey sortKey = new CategorySortKey(clickCount,orderCount, payCount);
return new Tuple2(sortKey, countInfo);
}
});
JavaPairRDD sortedCategoryCountRDD =
sortKey2countRDD.sortByKey(false);
/**
* 第六步:用take(10)取出top10热门品类,并写入MySQL
*/
ITop10CategoryDAO top10CategoryDAO = DAOFactory.getTop10CategoryDAO();
List> top10CategoryList =
sortedCategoryCountRDD.take(10);
for(Tuple2 tuple: top10CategoryList) {
String countInfo = tuple._2;
long categoryid = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_CATEGORY_ID));
long clickCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_CLICK_COUNT));
long orderCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_ORDER_COUNT));
long payCount = Long.valueOf(StringUtils.getFieldFromConcatString(
countInfo, "\\|", Constants.SESSION_PROJECT.FIELD_PAY_COUNT));
Top10Category category = new Top10Category();
category.setTaskid(taskid);
category.setCategoryid(categoryid);
category.setClickCount(clickCount);
category.setOrderCount(orderCount);
category.setPayCount(payCount);
category.setUUID ( UUID.randomUUID ().toString () );
top10CategoryDAO.insert(category);
}
}
/**
* 获取各品类点击次数RDD
* @param sessionid2detailRDD
* @return
*/
private static JavaPairRDD getClickCategoryId2CountRDD(
JavaPairRDD sessionid2detailRDD) {
JavaPairRDD clickActionRDD = sessionid2detailRDD.filter(
new Function, Boolean>() {
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Tuple2 tuple) throws Exception {
Row row = tuple._2;
if(row.get ( 6 )==null){
return false;
}
return Long.valueOf(row.getLong(6)) != null ? true : false;
}
});
JavaPairRDD clickCategoryIdRDD = clickActionRDD.mapToPair(
new PairFunction, Long, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(Tuple2 tuple)
throws Exception {
long clickCategoryId = tuple._2.getLong(6);
return new Tuple2(clickCategoryId, 1L);
}
});
JavaPairRDD clickCategoryId2CountRDD = clickCategoryIdRDD.reduceByKey(
new Function2 () {
private static final long serialVersionUID = 1L;
@Override
public Long call(Long v1, Long v2) throws Exception {
return v1 + v2;
}
});
return clickCategoryId2CountRDD;
}
/**
* 获取各品类的下单次数RDD
* @param sessionid2detailRDD
* @return
*/
private static JavaPairRDD getOrderCategoryId2CountRDD(
JavaPairRDD sessionid2detailRDD) {
JavaPairRDD orderActionRDD = sessionid2detailRDD.filter(
new Function, Boolean>() {
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Tuple2 tuple) throws Exception {
Row row = tuple._2;
return row.getString(8) != null ? true : false;
}
});
JavaPairRDD orderCategoryIdRDD = orderActionRDD.flatMapToPair(
new PairFlatMapFunction, Long, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Iterator> call(
Tuple2 tuple) throws Exception {
Row row = tuple._2;
String orderCategoryIds = row.getString(8);
String[] orderCategoryIdsSplited = orderCategoryIds.split(",");
List> list = new ArrayList>();
for(String orderCategoryId : orderCategoryIdsSplited) {
list.add(new Tuple2(Long.valueOf(orderCategoryId), 1L));
}
return list.iterator ();
}
});
JavaPairRDD orderCategoryId2CountRDD = orderCategoryIdRDD.reduceByKey(
new Function2() {
private static final long serialVersionUID = 1L;
@Override
public Long call(Long v1, Long v2) throws Exception {
return v1 + v2;
}
});
return orderCategoryId2CountRDD;
}
/**
* 获取各个品类的支付次数RDD
* @param sessionid2detailRDD
* @return
*/
private static JavaPairRDD getPayCategoryId2CountRDD(
JavaPairRDD sessionid2detailRDD) {
JavaPairRDD payActionRDD = sessionid2detailRDD.filter(
new Function, Boolean>() {
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Tuple2 tuple) throws Exception {
Row row = tuple._2;
return row.getString(10) != null ? true : false;
}
});
JavaPairRDD payCategoryIdRDD = payActionRDD.flatMapToPair(
new PairFlatMapFunction, Long, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Iterator> call(
Tuple2 tuple) throws Exception {
Row row = tuple._2;
String payCategoryIds = row.getString(10);
String[] payCategoryIdsSplited = payCategoryIds.split(",");
List> list = new ArrayList>();
for(String payCategoryId : payCategoryIdsSplited) {
list.add(new Tuple2(Long.valueOf(payCategoryId), 1L));
}
return list.iterator ();
}
});
JavaPairRDD payCategoryId2CountRDD = payCategoryIdRDD.reduceByKey(
new Function2() {
private static final long serialVersionUID = 1L;
@Override
public Long call(Long v1, Long v2) throws Exception {
return v1 + v2;
}
});
return payCategoryId2CountRDD;
}
/**
* 连接品类RDD与数据RDD
* @param categoryidRDD
* @param clickCategoryId2CountRDD
* @param orderCategoryId2CountRDD
* @param payCategoryId2CountRDD
* @return
*/
private static JavaPairRDD joinCategoryAndData(
JavaPairRDD categoryidRDD,
JavaPairRDD clickCategoryId2CountRDD,
JavaPairRDD orderCategoryId2CountRDD,
JavaPairRDD payCategoryId2CountRDD) {
// 解释一下,如果用leftOuterJoin,就可能出现,右边那个RDD中,join过来时,没有值
// 所以Tuple中的第二个值用Optional类型,就代表,可能有值,可能没有值
JavaPairRDD>> tmpJoinRDD =
categoryidRDD.leftOuterJoin(clickCategoryId2CountRDD);
JavaPairRDD tmpMapRDD = tmpJoinRDD.mapToPair(
new PairFunction>>, Long, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(
Tuple2>> tuple)
throws Exception {
long categoryid = tuple._1;
Optional optional = tuple._2._2;
long clickCount = 0L;
if(optional.isPresent()) {
clickCount = optional.get();
}
String value = Constants.SESSION_PROJECT.FIELD_CATEGORY_ID + "=" + categoryid + "|" +
Constants.SESSION_PROJECT.FIELD_CLICK_COUNT + "=" + clickCount;
return new Tuple2(categoryid, value);
}
});
tmpMapRDD = tmpMapRDD.leftOuterJoin(orderCategoryId2CountRDD).mapToPair(
new PairFunction>>, Long, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(
Tuple2>> tuple)
throws Exception {
long categoryid = tuple._1;
String value = tuple._2._1;
Optional optional = tuple._2._2;
long orderCount = 0L;
if(optional.isPresent()) {
orderCount = optional.get();
}
value = value + "|" + Constants.SESSION_PROJECT.FIELD_ORDER_COUNT + "=" + orderCount;
return new Tuple2(categoryid, value);
}
});
tmpMapRDD = tmpMapRDD.leftOuterJoin(payCategoryId2CountRDD).mapToPair(
new PairFunction>>, Long, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(
Tuple2>> tuple)
throws Exception {
long categoryid = tuple._1;
String value = tuple._2._1;
Optional optional = tuple._2._2;
long payCount = 0L;
if(optional.isPresent()) {
payCount = optional.get();
}
value = value + "|" + Constants.SESSION_PROJECT.FIELD_PAY_COUNT + "=" + payCount;
return new Tuple2(categoryid, value);
}
});
return tmpMapRDD;
}
欢迎关注,更多惊喜等着你