package com.sdg.consumer.myhbase
import java.text.SimpleDateFormat
import java.util
import com.sdg.consumer.myutils.{ConnectionInstance, HBaseUtil, PropertiesUtil}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{HTable, Put}
import org.apache.hadoop.hbase.util.Bytes
object HbaseDao {
private val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
private val sdf2 = new SimpleDateFormat("yyyyMMddHHmmss")
private val cacheList = new util.ArrayList[Put]
var conf: Configuration = HBaseConfiguration.create
private var regions = Integer.valueOf(PropertiesUtil.getProperty("hbase.calllog.regions"))
private var namespace: String = PropertiesUtil.getProperty("hbase.calllog.namespace")
private var tableName: String = PropertiesUtil.getProperty("hbase.calllog.tablename")
var table: HTable = null
//首先创建命名空间 然后再创建表
if (!HBaseUtil.isExistTable(conf, tableName)) {
//这里做一个判断就好
HBaseUtil.initNamespace(conf, namespace)
HBaseUtil.createTable(conf, tableName, regions, "f1", "f2")
}
/**
* 把数据写入到hbase
* ori数据样式: 18576581848,17269452013,2017-08-14 13:38:31,1761
* rowkey样式:01_18576581848_20170814133831_17269452013_1_1761
* HBase表的列:call1 call2 build_time build_time_ts flag duration
*
* @param ori
*/
def put(ori: String): Unit = {
//插入方法
//put方式
//hive+hbase方式
//phoenix(sql)==>squueirl(松鼠)
//buldload==>效率最高 hfile:效率最高
if (cacheList.size == 0) {
val connection = ConnectionInstance.getConnection(conf)
table = connection.getTable(TableName.valueOf(tableName)).asInstanceOf[HTable]
table.setAutoFlushTo(false)
//
table.setWriteBufferSize(2 * 1024 * 1024)
}
//对传输过来的字符串用逗号进行分割 18468618874,魏明艳,13980337439,卫艺,2017-11-25 13:05:27,1088,1
val splitOri: Array[String] = ori.split(",")
val caller: String = splitOri(0)
val callee: String = splitOri(1)
val buildTime: String = splitOri(4)
val duration: String = splitOri(5)
//获取region编码
val regionCode: String = HBaseUtil.genRegionCode(caller, buildTime, regions)
//建立通话时间
val buildTimeReplace: String = sdf2.format(sdf1.parse(buildTime))
val buildTimeTs: String = String.valueOf(sdf1.parse(buildTime).getTime)
//生成rowkey
val rowkey: String = HBaseUtil.genRowKey(regionCode, caller, buildTimeReplace, callee, "1", duration)
//向表中插入该条数据
val put: Put = new Put(Bytes.toBytes(rowkey))
//主叫号码
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("call1"), Bytes.toBytes(caller))
//被叫号码
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("call2"), Bytes.toBytes(callee))
//通话日期
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("build_time"), Bytes.toBytes(buildTime))
//通话时间
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("build_time_ts"), Bytes.toBytes(buildTimeTs))
//通过标识
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("flag"), Bytes.toBytes("1"))
//通话时长
put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("duration"), Bytes.toBytes(duration))
cacheList.add(put)
if (cacheList.size >= 0) {
// val value: List[Put] = util.List[Put]
//这个类型必须是java的List,scala 的类型是不支持的
table.put(cacheList)
//把数据提交到hbase的表中
table.flushCommits()
println("插入数据成功")
//清除缓冲的内容
cacheList.clear()
}
//1.定时接受数据
//2.for循环之外在提交一次
}
}
package com.sdg.consumer.mykafka
//命名包名的时候不要冲突
import java.util
import com.sdg.consumer.myhbase.HbaseDao
import com.sdg.consumer.myutils.PropertiesUtil
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
import scala.collection.JavaConversions._
/**
def main(args: Array[String]): Unit = {
// HbaseDao.put(“18468618874,魏明艳,13980337439,卫艺,2017-11-25 13:05:27,1088,1”)
//testHbase()
//创建kafka消费者的对象
val kafkaConsumer = new KafkaConsumerString, String
//订阅指定的topic 用于数据的消费
kafkaConsumer.subscribe(util.Arrays.asList(PropertiesUtil.getProperty(“kafka.topics”)))
println(“等待消费数据--------------”)
while (true) {
//每0.1S 从指定topic中消费数据
val records: ConsumerRecords[String, String] = kafkaConsumer.poll(100)
//这个是scala和java集合类型之间的转换
for (cr <- records) {
//得到每条数据的value
val str: String = cr.value()
println(str)
//把数据写入到hbase中
HbaseDao.put(str)
}
}
}
def testHbase(): Unit = {
val str = “18468618874,魏明艳,13980337439,卫艺,2017-11-25 13:05:27,1088,1”
HbaseDao.put(str)
}
}
package com.sdg.consumer.myutils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory}
//获取连接的
object ConnectionInstance {
private var conn: Connection = null
def getConnection(conf: Configuration): Connection = {
if (conn == null || conn.isClosed) {
conn = ConnectionFactory.createConnection(conf);
}
conn
}
}
package com.sdg.consumer.myutils
import java.text.DecimalFormat
import java.util
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.client.{Admin, Connection, ConnectionFactory}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, NamespaceDescriptor, TableName}
object HBaseUtil {
/**
* regionCode_call1_buildTime_call2_flag_duration
*
* @param regionCode 区号
* @param caller 主叫号码
* @param buildTime 建立时间
* @param callee 被叫号码
* @param flag 主动被动标识
* @param duration 通话时长
* @return
*/
def genRowKey(regionCode: String, caller: String, buildTime: String, callee: String, flag: String, duration: String): String = {
val sb = new StringBuilder
sb.append(regionCode + "_")
.append(caller + "_")
.append(buildTime + "_")
.append(caller + "_")
.append(flag + "_")
.append(duration)
sb.toString()
}
/**
* 获取区号
*
* @param call1
* @param buildTime
* @param regions
* @return
*/
def genRegionCode(call1: String, buildTime: String, regions: Integer): String = {
//电话号码的长度
val len: Int = call1.length
//取出后4位号码
val lastPhone: String = call1.substring(len - 4)
//取出建立通过时间的年月2018-02-02
val ym: String = buildTime.replaceAll("-", "")
.replaceAll(":", "")
.replaceAll(" ", "")
.substring(0, 6)
//离散操作1 ^ 这个符号是异或运算 转成二进制 对应位置相同为 0 不同就为1
val x: Integer = Integer.valueOf(lastPhone) ^ Integer.valueOf(ym)
//离散操作2
val y: Int = x.hashCode
//生成分区号
val regionCode: Int = y % regions
//格式化分区号
val df = new DecimalFormat("00")
df.format(regionCode)
}
/**
* 预分区键
* 创建区号
* @param regions
* @return
*/
def genSplitKeys(regions: Integer): Array[Array[Byte]] = {
//定义一个存放分区键的数组
val keys: Array[String] = new Array[String](regions)
//目前推算,region个数不会超过2位数,所以region分区键格式化为两位数字所代表的字符串
val df: DecimalFormat = new DecimalFormat("00")
//对region个数遍历
for (i <- 0 until regions) {
//使用 | 拼接一下
keys(i) = df.format(i) + "|"
}
//定义一个二维数组
val splitKeys = new Array[Array[Byte]](regions)
//比较器 BYTES_COMPARATOR :升序排序
val treeSet: util.TreeSet[Array[Byte]] = new util.TreeSet[Array[Byte]](Bytes.BYTES_COMPARATOR)
for (i <- 0 until regions) {
//把我们生成keys 放进去
treeSet.add(Bytes.toBytes(keys(i)));
}
val splitKeysIterator: util.Iterator[Array[Byte]] = treeSet.iterator
var index = 0
while (splitKeysIterator.hasNext) {
val b: Array[Byte] = splitKeysIterator.next
println(b)
splitKeys(index) = b
index = index + 1
}
splitKeys
}
/**
*
* @param conf
* @param tableName 表名
* @param regions 分区个数
* @param columnFamily 列簇(连个列簇)
*/
def createTable(conf: Configuration, tableName: String, regions: Integer, columnFamily: String*) = {
val connection: Connection = ConnectionFactory.createConnection(conf)
val admin: Admin = connection.getAdmin
//if (isExistTable(conf, tableName)) return
val htd = new HTableDescriptor(TableName.valueOf(tableName))
for (cf <- columnFamily) {
htd.addFamily(new HColumnDescriptor(cf))
}
//创建表的时候指定支持协处理器
//htd.addCoprocessor("hbase.CalleeWriteObserver")
//指定与分区指定分区的个数
admin.createTable(htd, genSplitKeys(regions))
//此下方法创建的表只能有一个region分区
// admin.createTable(htd)
admin.close()
connection.close()
}
def main(args: Array[String]): Unit = {
/* val conf: Configuration = HbaseDao.conf
val connection: Connection = ConnectionFactory.createConnection(conf)
val admin: Admin = connection.getAdmin
//if (isExistTable(conf, tableName)) return
val htd = new HTableDescriptor(TableName.valueOf(" "))
//增加协处理器
//htd.addCoprocessor("hbase.CalleeWriteObserver")
//指定与分区指定分区的个数
admin.createTable(htd, genSplitKeys(6))
admin.close()
connection.close()*/
/*val array = genSplitKeys(6)
println(Bytes.toString(array(0)))
println(Bytes.toString(array(1)))
println(Bytes.toString(array(2)))
println(Bytes.toString(array(3)))
println(Bytes.toString(array(4)))
println(Bytes.toString(array(5)))
println(Bytes.toString(array(6)))*/
val str: String = genRegionCode("13526949099", "2018-02-02", 6)
println(str)
}
/**
* 初始化命名空间
*
* @param conf
* @param namespace
*/
def initNamespace(conf: Configuration, namespace: String) = {
//获取hbase链接
val connection: Connection = ConnectionFactory.createConnection(conf)
//获取admin对象
val admin: Admin = connection.getAdmin
//创建命令空间
val nd: NamespaceDescriptor = NamespaceDescriptor.create(namespace)
.addConfiguration("CREATE_TIME", String.valueOf(System.currentTimeMillis)).addConfiguration("AUTHOR", "liuhe").build
admin.createNamespace(nd)
admin.close()
connection.close()
}
/**
* 判断表是否存在
*
* @param conf
* @param tableName
*/
def isExistTable(conf: Configuration, tableName: String): Boolean = {
val connection: Connection = ConnectionFactory.createConnection(conf)
val admin: Admin = connection.getAdmin
//判断表是否存在
val result: Boolean = admin.tableExists(TableName.valueOf(tableName))
admin.close()
connection.close()
result
}
}
package com.sdg.consumer.myutils
import java.io.InputStream
import java.util.Properties
//读取配置文件信息
object PropertiesUtil {
val is: InputStream = ClassLoader.getSystemResourceAsStream(“hbase_consumer.properties”)
var properties = new Properties
properties.load(is)
//根据key 取出来对应的值
def getProperty(key: String): String = {
val str: String = properties.getProperty(key)
str
}
}