val t1 = sc.textFile("/tmp/db_case1/order_created/*").map(line => line.split("\t"))
val t2 = sc.textFile("/tmp/db_case1/order_picked/*").map(line => line.split("\t"))
val t3 = sc.textFile("/tmp/db_case1/order_shipped/*").map(line => line.split("\t"))
val t1kv = t1.map(line => ( line(0), ( "created", line(1).substring(0, 19) ) ))
val t2kv = t2.map(line => ( line(0), ( "picked" , line(1).substring(0, 19) ) ))
val t3kv = t3.map(line => ( line(0), ( "shipped", line(1).substring(0, 19) ) ))
def flatValues ( events:((String, (((String, String), Option[(String, String)]), Option[(String, String)]))) ): (String, String, String, String) = {
return ( events._1, events._2._1._1._2, events._2._1._2.getOrElse(("",""))._2, events._2._2.getOrElse(("",""))._2 )
}
t1kv.leftOuterJoin(t2kv).leftOuterJoin(t3kv).map(flatValues).take(10).foreach(println)
def flatValues ( events:((String, (((String, String), Option[(String, String)]), Option[(String, String)]))) ): (String, (String, String), (String, String), (String, String)) = {
return ( events._1, events._2._1._1, events._2._1._2.getOrElse(("","")), events._2._2.getOrElse(("","")) )
}
t1kv.leftOuterJoin(t2kv).leftOuterJoin(t3kv).map(flatValues).take(10).foreach(println)
def flatValues ( events:(String, ((String, String), Option[(String, String)])) ): (String, Seq[(String,String)]) = {
return ( events._1, Seq(events._2._1, events._2._2.getOrElse(("","")) ) )
}
t1kv.leftOuterJoin(t2kv).map(flatValues).take(10).foreach(println)
def flatList ( events:(String, (Seq[(String, String)], Option[(String, String)])) ): (String, Seq[(String,String)]) = {
if (events._2._2.isEmpty)
return ( events._1, events._2._1 )
else
return ( events._1, events._2._1 :+ events._2._2.get )
}
t1kv.leftOuterJoin(t2kv).map(flatValues).leftOuterJoin(t3kv).map(flatList).take(10).foreach(println)
def filterSLA ( events:Seq[(String,String)] ): Boolean = {
val eventMap = events.toMap
if ( (eventMap contains "created") && (eventMap contains "picked") ) {
val format = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val ts1 = format.parse(eventMap("created")).getTime
val ts2 = format.parse(eventMap("picked")).getTime
if (ts2 - ts1 < 7200000)
false
else
true
} else {
true
}
}
t1kv.leftOuterJoin(t2kv).map(flatValues).leftOuterJoin(t3kv).map(flatList).filter( kv => filterSLA(kv._2) ).collect.foreach(println)
t1kv.map(kv => (kv._1, Seq(kv._2))).leftOuterJoin(t2kv).map(flatList).leftOuterJoin(t3kv).map(flatList).filter( kv => filterSLA(kv._2) ).collect.foreach(println)
t1kv.map(kv => (kv._1, Seq(kv._2))).
leftOuterJoin(
t2kv.groupByKey.map(kv => (kv._1, (kv._2.toSeq.sortBy(_._2).toMap.keys.head, kv._2.toSeq.sortBy(_._2).toMap.values.head)))
).map(flatList).
leftOuterJoin(
t3kv.groupByKey.map(kv => (kv._1, (kv._2.toSeq.sortBy(_._2).toMap.keys.head, kv._2.toSeq.sortBy(_._2).toMap.values.head)))
).map(flatList).
collect.foreach(println)
def flatGroup ( groups:(String, (Iterable[(String, String)])) ): (String, (String,String)) = {
val groupsMap = groups._2.toSeq.sortBy(_._2).toMap
return ( groups._1, (groupsMap.keys.head, groupsMap.values.head) )
}
t1kv.map(kv => (kv._1, Seq(kv._2))).
leftOuterJoin(
t2kv.groupByKey.map(flatGroup)
).map(flatList).
leftOuterJoin(
t3kv.groupByKey.map(flatGroup)
).map(flatList).
collect.foreach(println)
t1kv.map(kv => (kv._1, Seq(kv._2))).
leftOuterJoin(
t2kv.groupByKey.map(flatGroup)
).map(flatList).
leftOuterJoin(
t3kv.groupByKey.map(flatGroup)
).map(flatList).
filter( kv => filterSLA(kv._2) ).
collect.foreach(println)