package HBaseDao;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import java.io.IOException;
/*
*
* 操作Hbase的工具类
* */
public class HBaseUtils {
HBaseAdmin admin = null;
Configuration configuration = null;
/*
* 私有化构造器
* */
private HBaseUtils() {
configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum","qyl01,qyl02,qyl03");
configuration.set("hbase.rootdir","hdfs:///hbase");
try{
admin = new HBaseAdmin(configuration);
}
catch(IOException e){
e.printStackTrace();
}
}
private static HBaseUtils instance = null;
public static HBaseUtils getInstance() {
if (null == instance){
instance = new HBaseUtils();
}
return instance;
}
/*
* 根据表名获取HBase实例
* @param tableName
* @return
* */
public HTable getTable(String tableName){
HTable table = null ;
try {
table = new HTable(configuration, tableName);
}catch (IOException e){
e.printStackTrace();
}
return table;
}
/**
* 添加一条记录到HBase表
* @param tableName 表名
* @param rowkey rowkey
* @param cf columnFamily
* @param column 列
* @param value 写入的值
*/
public void put(String tableName,String rowkey,String cf,String column,String value){
HTable table = getTable(tableName);
Put put = new Put(rowkey.getBytes());
put.add(cf.getBytes(),column.getBytes(),value.getBytes());
try{
table.put(put);
}catch (IOException e){
e.printStackTrace();
}
}
/*
* 插入一条数据到HBase中进行操作
* */
/*
public static void main(String[] args) {
String tableName = "course_clickcount";
String rowkey = "20191111_188";
String cf = "info";
String column = "click_count";
String value = "2";
HBaseUtils.getInstance().put(tableName,rowkey,cf,column,value);
}
*/
}
package HBaseDao
import java.io.IOException
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.util.Bytes
import scala.collection.mutable.ListBuffer
/*
* 操作hbase的dao
* */
object ClickCourseCountDao {
/*
* hbase中的各参数,参照个人配置
* */
val tableName = "course_clickcount"
val cf = "info"
val column = "clickcount"
/*
* 插入结果数据的方法
* */
def save(List:ListBuffer[ClickCoursCount]):Unit = {
val htable = HBaseUtils.getInstance().getTable(tableName)
for(clk <- List){
htable.incrementColumnValue(
clk.dayCourse.getBytes(),
cf.getBytes(),
column.getBytes(),
clk.clickCount
)
}
}
/*
* 取出tableName中column对应的value的数据
* */
def count(dayCourse:String):Long = {
val htable = HBaseUtils.getInstance().getTable(tableName)
val get = new Get(dayCourse.getBytes())
val value = htable.get(get).getValue(cf.getBytes(),column.getBytes())
if(null == value){
0L
}else{
Bytes.toLong(value)
}
}
def main(args: Array[String]): Unit = {
val listbuffer = new ListBuffer[ClickCoursCount]
/*
* 插入数据测试
* */
listbuffer.append(ClickCoursCount("20191111_88",1L))
listbuffer.append(ClickCoursCount("20191111_88",2L))
listbuffer.append(ClickCoursCount("20191111_88",2L))
save(listbuffer)
println(count("20191111_88")+"------"+count("20191111_88"))
}
}
/*
* 点击量的实体类
* */
case class ClickCoursCount(dayCourse:String,clickCount:Long)
package com.bonc.qyl.Spark
import HBaseDao.{ClickCoursCount, ClickCourseCountDao}
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scala.collection.mutable.ListBuffer
/*
* flume + kafka +SparkStreaming +HBase
* */
object ProjectStreaming {
def main(args: Array[String]): Unit = {
/*
* 实际项目中 应该是传入参数,这里不做演示了
* if(args.length != 2){
System.err.println("Usage ProjectStreaming: ")
System.exit(1)
}
*/
/*
* 建立连接
* */
System.setProperty("HADOOP_USER_NAME","qyl")
val conf = new SparkConf().setMaster("local[2]").setAppName("ProjectStreaming")
val ssc = new StreamingContext(conf,Seconds(5))
ssc.checkpoint("hdfs:///flume-kafka-direct")
/*
* 读取kafka中的数据
* */
val kafkaParams = Map[String,String]("metadata.broker.list" -> "qyl01:9092,qyl02:9092,qyl03:9092","auto.offset.reset" -> "smallest")
val topics = Set("flume-kafka-sparkStreaming-HBase1")
val kafkaDStream: DStream[String] = KafkaUtils.createDirectStream
[String, String, StringDecoder, StringDecoder](ssc,kafkaParams,topics).map(_._2)
/*
* 数据过滤
* 数据格式
* 132.168.89.224 2018-07-13 05:53:02 "GET /class/145.html HTTP/1.1" 200 https://search.yahoo.com/search?p=Flink实战
* */
val cleanData : DStream[ClickLog] = kafkaDStream.map { x =>
val strArr = x.split("\t")
val ip = strArr(0)
val time = strArr(1).substring(0,10).trim()
val refer = strArr(2).split(" ")(1)
val status = strArr(3).toInt
val searchArr = strArr(4).replaceAll("//", "/").split("/")
var searchUrl = ""
if (searchArr.length > 2) {
searchUrl = searchArr(1)
} else {
searchUrl = searchArr(0)
}
(ip, time, refer, status, searchUrl)
}.filter(_._3.startsWith("/class")).map { x =>
// 145.html
val referStr = x._3.split("/")(2)
val refer = referStr.substring(0, referStr.lastIndexOf("."))
ClickLog(x._1, x._2, refer, x._4, x._5)
}
/*
* 需求:统计到今天为止,的访问量
*/
cleanData.map(x =>(x.time +"_"+x.refer,1)).reduceByKey(_+_).foreachRDD{rdd =>{
rdd.foreachPartition{rddPartition =>
val list = new ListBuffer[ClickCoursCount]
rddPartition.foreach{ pair =>
list.append(ClickCoursCount(pair._1,pair._2))
}
/*
* 写入数据到HBase
* */
ClickCourseCountDao.save(list)
}
}}
ssc.start()
ssc.awaitTermination()
}
}
case class ClickLog(ip:String,time:String,refer:String,status:Int,searchUrl:String)
4.0.0
com.bonc.qyl.Spark
Kafka_SparkStreaming_Hbase
1.0-SNAPSHOT
2008
UTF8
1.8
1.8
UTF-8
2.11.8
2.3.2
2.7.7
5.1.46
1.1.0
4.12
2.3.2
2.11
org.scala-lang
scala-library
${scala.version}
org.apache.spark
spark-core_2.11
${spark.version}
org.apache.spark
spark-sql_2.11
${spark.version}
org.apache.spark
spark-streaming_2.11
${spark.version}
org.apache.spark
spark-graphx_2.11
${spark.version}
org.apache.spark
spark-mllib_2.11
${spark.version}
org.apache.hadoop
hadoop-client
${hadoop.version}
org.apache.spark
spark-streaming-kafka-0-8_2.11
${streaming.kafka.version}
org.apache.spark
spark-streaming-flume_2.11
${spark.version}
mysql
mysql-connector-java
${mysql.version}
org.apache.spark
spark-hive_2.11
${spark.version}
junit
junit
${junit.version}
test
org.apache.hbase
hbase-client
1.2.6
org.apache.hbase
hbase-server
1.2.6
com.101tec
zkclient
0.3
org.apache.zookeeper
zookeeper
3.4.12
pom
net.alchim31.maven
scala-maven-plugin
3.2.2
org.apache.maven.plugins
maven-compiler-plugin
3.5.1
net.alchim31.maven
scala-maven-plugin
scala-compile-first
process-resources
add-source
compile
scala-test-compile
process-test-resources
testCompile
org.apache.maven.plugins
maven-compiler-plugin
compile
compile
org.apache.maven.plugins
maven-shade-plugin
2.3
package
shade
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA