目录
重命名数据表top10_category_session
代码
Top10Session.java
ITop10SessionDAO.java
Top10SessionDAOImpl.java
DAOFactory.java
UserVisitSessionAnalyzeSpark.java
本篇文章记录用户访问session分析-top10活跃session之分组取TopN算法获取top10活跃session。
alter table top10_category_session rename to top10_session;
domain
package graduation.java.domain; /** * FileName: Top10Session * Author: hadoop * Email: [email protected] * Date: 19-3-24 下午4:59 * Description: * * top10品类session点击top10实体类 */ public class Top10Session { private long taskid; private long categoryid; private String sessionid; private long clickCount; public long getTaskid() { return taskid; } public void setTaskid(long taskid) { this.taskid = taskid; } public long getCategoryid() { return categoryid; } public void setCategoryid(long categoryid) { this.categoryid = categoryid; } public String getSessionid() { return sessionid; } public void setSessionid(String sessionid) { this.sessionid = sessionid; } public long getClickCount() { return clickCount; } public void setClickCount(long clickCount) { this.clickCount = clickCount; } @Override public String toString() { return "Top10Session{" + "taskid=" + taskid + ", categoryid=" + categoryid + ", sessionid='" + sessionid + '\'' + ", clickCount=" + clickCount + '}'; } }
dao
package graduation.java.dao; import graduation.java.domain.Top10Session; /** * FileName: ITop10SessionDAO * Author: hadoop * Email: [email protected] * Date: 19-3-24 下午5:02 * Description: * top10品类session点击top10接口 */ public interface ITop10SessionDAO { void insert(Top10Session top10Session); }
impl
package graduation.java.impl; import graduation.java.dao.ITop10SessionDAO; import graduation.java.domain.Top10Session; import graduation.java.jdbc.JDBCHelper; /** * FileName: Top10SessionDAOImpl * Author: hadoop * Email: [email protected] * Date: 19-3-24 下午5:04 * Description: * top10品类session点击top10DAO实现 */ public class Top10SessionDAOImpl implements ITop10SessionDAO { @Override public void insert(Top10Session top10Session) { String sql = "insert into top10_session values(?,?,?,?)"; Object[] param = new Object[]{ top10Session.getTaskid(), top10Session.getCategoryid(), top10Session.getSessionid(), top10Session.getClickCount() }; JDBCHelper jdbcHelper = JDBCHelper.getInstance(); jdbcHelper.executeUpdate(sql,param); } }
factory
/** * 获取top10 session管理DAO * @return */ public static Top10SessionDAOImpl getTop10SessionDAO(){ return new Top10SessionDAOImpl(); }
Spark
** * 获取top10活跃session * @param sc * @param taskId * @param top10CategoryList * @param sessionid2detailRDD */ private static void getTop10Session(JavaSparkContext sc, long taskId, List
> top10CategoryList, JavaPairRDD sessionid2detailRDD) { List > top10CategoryIdList = new ArrayList >(); for (Tuple2 category: top10CategoryList){ long categoryId = Long.valueOf(StringUtils.getFieldFromConcatString(category._2,"\\|",Constants.FIELD_CATEGORY_ID)); top10CategoryIdList.add(new Tuple2 (categoryId,categoryId)); } JavaPairRDD top10CategoryIdRDD = sc.parallelizePairs(top10CategoryIdList); /** * 第二步:计算top10热门品类被各session点击的次数 */ JavaPairRDD > sessionid2detailsRDD = sessionid2detailRDD.groupByKey(); JavaPairRDD categoryid2sessionCountRDD = sessionid2detailsRDD.flatMapToPair( new PairFlatMapFunction >, Long, String>() { private static final long serialVersionUID = 1L; @Override public Iterator > call(Tuple2 > tuple) throws Exception { String sessionid = tuple._1; Iterator iterator = tuple._2.iterator(); Map
categoryCountMap = new HashMap (); while (iterator.hasNext()){ Row row = iterator.next(); if (row.getLong(6) != Long.MAX_VALUE){ long categoryid = row.getLong(6); Long count = categoryCountMap.get(categoryid); if (count == null){ count = 0L; } count++; categoryCountMap.put(categoryid,count); } } //返回结果, 格式 List > list = new ArrayList >(); for (Map.Entry categoryCountEntry: categoryCountMap.entrySet()){ long categoryid = categoryCountEntry.getKey(); long count = categoryCountEntry.getValue(); String value = sessionid +"," + count; list.add(new Tuple2 (categoryid,value)); } return list.iterator(); } } ); //获取到top10热门品类,被各个session点击的次数 JavaPairRDD top10CategorySessionCountRDD = top10CategoryIdRDD .join(categoryid2sessionCountRDD) .mapToPair(new PairFunction >, Long, String>() { private static final long serialVersionUID = 1L; @Override public Tuple2 call(Tuple2 > tuple) throws Exception { return new Tuple2 (tuple._1,tuple._2._2); } }); /** * 第三步:分组获取TopN算法实现,获取每个品类的top10活跃用户 */ JavaPairRDD > top10CategorySessionCountsRDD = top10CategorySessionCountRDD.groupByKey(); JavaPairRDD top10SessionRDD = top10CategorySessionCountsRDD.flatMapToPair( new PairFlatMapFunction >, String, String>() { private static final long serialVersionUID = 1L; @Override public Iterator > call(Tuple2 > tuple) throws Exception { long categoryid = tuple._1; Iterator iterator = tuple._2.iterator(); //定义获取top10session的排序数组 String[] top10Sessions = new String[10]; while (iterator.hasNext()){ String sessionCount = iterator.next(); long count = Long.valueOf(sessionCount.split(",")[1]); //遍历数组 for (int i = 0; i < top10Sessions.length; i++){ //如果当前i为是null,那么直接将i位数据赋值为当前的sessionCount if (top10Sessions[i] == null){ top10Sessions[i] = sessionCount; break; } else{ long _count = Long.valueOf(top10Sessions[i].split(",")[1]); //插入排序,将数据进行排序 if(count > _count){ for (int j = 9 ; j > i; j--){ top10Sessions[j] = top10Sessions[j-1]; } top10Sessions[i] = sessionCount; break; } //比较小,继续外层for循环 } } } //将数据写入Mysql List > list = new ArrayList >(); for (String sessionCount : top10Sessions){ String sessionid = sessionCount.split(",")[0]; long count = Long.valueOf(sessionCount.split(",")[1]); //将top10session写入MySQL Top10Session top10Session = new Top10Session(); top10Session.setTaskid(taskId); top10Session.setCategoryid(categoryid); top10Session.setSessionid(sessionid); top10Session.setClickCount(count); ITop10SessionDAO top10SessionDAO = DAOFactory.getTop10SessionDAO(); top10SessionDAO.insert(top10Session); list.add(new Tuple2 (sessionid,sessionid)); } return list.iterator(); } } ); /** * 第四步:获取top10活跃session的明细数据,并写入MySQL */ JavaPairRDD > sessionDetailRDD = top10SessionRDD.join(sessionid2detailRDD); sessionDetailRDD.foreach(new VoidFunction >>() { private static final long serialVersionUID = 1L; @Override public void call(Tuple2 > tuple) throws Exception { Row row = tuple._2._2; SessionDetail sessionDetail = new SessionDetail(); sessionDetail.setTaskid(taskId); sessionDetail.setUserid(row.getLong(1)); sessionDetail.setSessionid(row.getString(2)); sessionDetail.setPageid(row.getLong(3)); sessionDetail.setActionTime(row.getString(4)); sessionDetail.setSeachKeyWord(row.getString(5)); sessionDetail.setClickCategoryId(row.getLong(6)); sessionDetail.setClickProductId(row.getLong(7)); sessionDetail.setOrderCategoryIds(row.getString(8)); sessionDetail.setOrderProductIds(row.getString(9)); sessionDetail.setPayCategoryIds(row.getString(10)); sessionDetail.setPayProductIds(row.getString(11)); ISessionDetailDAO sessionDetailDAO = DAOFactory.getSessionDetailDAO(); sessionDetailDAO.insert(sessionDetail); } }); }