import java.util
import java.util.{ArrayList, HashMap, List, Map}
import org.apache.hadoop.hbase.{Cell, CompareOperator, HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.filter._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkConf
import scala.collection.JavaConversions._
object HbaseScala {
def main(args: Array[String]): Unit = {
System.setProperty("hadoop.home.dir", "D:\\hadoop-common-bin-2.7.x")
val sparkConf = new SparkConf().setAppName("HBaseTest")
val conf = HBaseConfiguration.create
val tableName:TableName =TableName.valueOf("blog_scala")
conf.set("hbase.zookeeper.quorum", "hadoop01,hadoop02,hadoop03")
conf.set("hbase.zookeeper.property.clientPort", "2181")
//Connection 是操作hbase的入口
val connection= ConnectionFactory.createConnection(conf)
//创建表测试(单列簇)
// createHTable(connection, tableName,"area")
//创建表(多列簇)
val columnFamilys: Array[String] = Array("article", "author")
// createHTable(connection, tableName,columnFamilys)
val listMap: List[Map[String, AnyRef]] = new ArrayList[Map[String, AnyRef]]
val map1: Map[String, AnyRef] = new HashMap[String, AnyRef]
map1.put("rowKey", "ce_shi1")
map1.put("columnFamily", "article")
map1.put("columnName", "title")
map1.put("columnValue", "Head First HBase")
listMap.add(map1)
val map2: Map[String, AnyRef] = new HashMap[String, AnyRef]
map2.put("rowKey", "ce_shi1")
map2.put("columnFamily", "article")
map2.put("columnName", "content")
map2.put("columnValue", "HBase is the Hadoop database")
listMap.add(map2)
val map3: Map[String, AnyRef] = new HashMap[String, AnyRef]
map3.put("rowKey", "ce_shi1")
map3.put("columnFamily", "article")
map3.put("columnName", "tag")
map3.put("columnValue", "Hadoop,HBase,NoSQL")
listMap.add(map3)
val map4: Map[String, AnyRef] = new HashMap[String, AnyRef]
map4.put("rowKey", "ce_shi1")
map4.put("columnFamily", "author")
map4.put("columnName", "name")
map4.put("columnValue", "nicholas")
listMap.add(map4)
val map5: Map[String, AnyRef] = new HashMap[String, AnyRef]
map5.put("rowKey", "ce_shi1")
map5.put("columnFamily", "author")
map5.put("columnName", "nickname")
map5.put("columnValue", "lee")
listMap.add(map5)
val map6: Map[String, AnyRef] = new HashMap[String, AnyRef]
map6.put("rowKey", "ce_shi2")
map6.put("columnFamily", "author")
map6.put("columnName", "name")
map6.put("columnValue", "spark")
listMap.add(map6)
val map7: Map[String, AnyRef] = new HashMap[String, AnyRef]
map7.put("rowKey", "ce_shi2")
map7.put("columnFamily", "author")
map7.put("columnName", "nickname")
map7.put("columnValue", "hadoop")
listMap.add(map7)
// insertMany(connection, tableName,listMap);
// insertMany(connection,tableName)
//添加单行数据
// insertSingle(connection,tableName)
//根据RowKey,列簇,列名修改值
val rowKey = "ce_shi2"
val columnFamily = "author"
val columnName = "name"
val columnValue = "hbase"
// updateData(connection,tableName,rowKey,columnFamily,columnName,columnValue);
val rowKey1 = "rowKey5"
val columnFamily1 = "author"
val columnName1 = "tag"
val columnNames = new util.ArrayList[String]
columnNames.add("name")
columnNames.add("nickname")
//删除某行某个列簇的某个列
// deleteData(connection,tableName,rowKey1,columnFamily1,columnName1);
//删除某行某个列簇
// deleteData(connection,tableName,rowKey1,columnFamily1);
//删除某行某个列簇的多个列
// deleteData(connection,tableName,rowKey1,columnFamily1,columnNames);
//删除某行
deleteData(connection,tableName,rowKey1);
//根据RowKey获取数据
// getResult(connection,tableName,"rowKey5")
//全表扫描
scanTable(connection,tableName)
//rowKey过滤器
// rowkeyFilter(connection,tableName)
//列值过滤器
// singColumnFilter(connection,tableName)
//列名前缀过滤器
// columnPrefixFilter(connection,tableName)
//过滤器集合
// filterSet(connection,tableName)
}
/**
* 创建表(只有一个列簇)
* @param connection
* @param tableName
*/
def createHTable(connection: Connection,tableName: TableName,columnFamily:String): Unit=
{
val admin = connection.getAdmin
if (!admin.tableExists(tableName)) {
//表描述器构造器
val tdb = TableDescriptorBuilder.newBuilder(tableName)
//列族描述起构造器
val cdb:ColumnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily))
//获得列描述起
val cfd: ColumnFamilyDescriptor = cdb.build
//添加列族
tdb.setColumnFamily(cfd)
//获得表描述器
val td = tdb.build
//创建表
admin.createTable(td)
println("create done.")
}else{
print("表已存在")
}
}
/**
* 创建表(多列簇)
* @param connection
* @param tableName
* @param columnFamilys
*/
def createHTable(connection: Connection,tableName: TableName, columnFamilys: Array[String]): Unit ={
val admin=connection.getAdmin
if(!admin.tableExists(tableName)){
//表描述器构造器
val tdb = TableDescriptorBuilder.newBuilder(tableName)
var cdb:ColumnFamilyDescriptorBuilder=null
//获得列描述起
var cfd: ColumnFamilyDescriptor=null
for (columnFamily <- columnFamilys) {
//列族描述起构造器
cdb = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily))
//获得列描述起
cfd = cdb.build
//添加列族
tdb.setColumnFamily(cfd)
}
//获得表描述器
val td = tdb.build
//创建表
admin.createTable(td)
println("create done.")
}
}
/**
* 添加数据(一个rowKey,一个列簇)
* @param connection
* @param tableName
*/
def insertSingle(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val put = new Put(Bytes.toBytes("rowKey5"))
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("hbase"))
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("hadoop"))
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tag"), Bytes.toBytes("sqoop"))
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("flume"))
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("hive"))
table.put(put)
table.close()
}
/**
* 添加数据(多个rowKey,多个列簇,适合由固定结构的数据)
* @param connection
* @param tableName
* @param list
*/
def insertMany(connection: Connection,tableName: TableName, list: List[Map[String, AnyRef]]): Unit = {
val puts: ArrayList[Put] = new ArrayList[Put]
val table: Table = connection.getTable(tableName)
if (list != null && list.size > 0) {
for (map <- list) {
val put: Put = new Put(Bytes.toBytes(map.get("rowKey").toString))
put.addColumn(Bytes.toBytes(map.get("columnFamily").toString), Bytes.toBytes(map.get("columnName").toString), Bytes.toBytes(map.get("columnValue").toString))
puts.add(put)
}
}
table.put(puts)
table.close()
System.out.println("add data Success!")
}
/**
* 添加数据(多个rowKey,多个列簇)
* @param connection
* @param tableName
*/
def insertMany(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val puts = new ArrayList[Put]
val put1 = new Put(Bytes.toBytes("rowKey1"))
put1.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("bigData"))
val put2 = new Put(Bytes.toBytes("rowKey2"))
put2.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("spark"))
val put3 = new Put(Bytes.toBytes("rowKey3"))
put3.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("HBase,Hive"))
val put4 = new Put(Bytes.toBytes("rowKey4"))
put4.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("HBase"))
puts.add(put1)
puts.add(put2)
puts.add(put3)
puts.add(put4)
table.put(puts)
table.close()
}
/**
* 根据RowKey,列簇,列名修改值
* @param connection
* @param tableName
* @param rowKey
* @param columnFamily
* @param columnName
* @param columnValue
*/
def updateData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnName: String, columnValue: String): Unit = {
val table = connection.getTable(tableName)
val put = new Put(Bytes.toBytes(rowKey))
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), Bytes.toBytes(columnValue))
table.put(put)
table.close()
}
/**
* 根据rowKey删除一行数据
*
* @param tableName
* @param rowKey
*/
def deleteData(connection: Connection,tableName: TableName, rowKey: String): Unit = {
val table = connection.getTable(tableName)
val delete = new Delete(Bytes.toBytes(rowKey))
table.delete(delete)
table.close()
}
/**
* 删除某一行的某一个列簇内容
*
* @param tableName
* @param rowKey
* @param columnFamily
*/
def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String): Unit = {
val table = connection.getTable(tableName)
val delete = new Delete(Bytes.toBytes(rowKey))
delete.addFamily(Bytes.toBytes(columnFamily))
table.delete(delete)
table.close()
}
/**
* 删除某一行某个列簇某列的值
*
* @param tableName
* @param rowKey
* @param columnFamily
* @param columnName
*/
def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnName: String): Unit = {
val table = connection.getTable(tableName)
val delete = new Delete(Bytes.toBytes(rowKey))
delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName))
table.delete(delete)
table.close()
}
/**
* 删除某一行某个列簇多个列的值
*
* @param tableName
* @param rowKey
* @param columnFamily
* @param columnNames
*/
def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnNames: util.List[String]): Unit = {
val table = connection.getTable(tableName)
val delete = new Delete(Bytes.toBytes(rowKey))
import scala.collection.JavaConversions._
for (columnName <- columnNames) {
delete.addColumns(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName))
}
table.delete(delete)
table.close()
}
/**
* 根据rowKey查询数据
* @param connection
* @param tableName
* @param rowKey
*/
def getResult(connection: Connection,tableName: TableName, rowKey: String): Unit = {
val table = connection.getTable(tableName)
//获得一行
val get = new Get(Bytes.toBytes(rowKey))
val set = table.get(get)
val cells = set.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
table.close()
}
/**
* 全表扫描
* @param connection
* @param tableName
*/
def scanTable(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val scan = new Scan
val rsacn = table.getScanner(scan)
import scala.collection.JavaConversions._
for (rs <- rsacn) {
val rowkey = Bytes.toString(rs.getRow)
System.out.println("row key :" + rowkey)
val cells = rs.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
System.out.println("-----------------------------------------")
}
}
//过滤器 LESS < LESS_OR_EQUAL <= EQUAL = NOT_EQUAL <> GREATER_OR_EQUAL >= GREATER > NO_OP 排除所有
/**
* rowKey过滤器
* @param connection
* @param tableName
*/
def rowkeyFilter(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val scan = new Scan
val filter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("Key1$")) //str$ 末尾匹配,相当于sql中的 %str ^str开头匹配,相当于sql中的str%
scan.setFilter(filter)
val scanner = table.getScanner(scan)
for (rs <- scanner) {
val rowkey = Bytes.toString(rs.getRow)
System.out.println("row key :" + rowkey)
val cells = rs.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
System.out.println("-----------------------------------------")
}
}
/**
* 列值过滤器
* @param connection
* @param tableName
*/
def singColumnFilter(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val scan = new Scan
//下列参数分别为,列族,列名,比较符号,值
val filter = new SingleColumnValueFilter(Bytes.toBytes("author"), Bytes.toBytes("name"), CompareOperator.LESS, Bytes.toBytes("hbase"))
scan.setFilter(filter)
val scanner = table.getScanner(scan)
for (rs <- scanner) {
val rowkey = Bytes.toString(rs.getRow)
System.out.println("row key :" + rowkey)
val cells = rs.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
System.out.println("-----------------------------------------")
}
}
/**
* 列名前缀过滤器
* @param connection
* @param tableName
*/
def columnPrefixFilter(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val scan = new Scan
val filter = new ColumnPrefixFilter(Bytes.toBytes("name"))
scan.setFilter(filter)
val scanner = table.getScanner(scan)
for (rs <- scanner) {
val rowkey = Bytes.toString(rs.getRow)
System.out.println("row key :" + rowkey)
val cells = rs.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
System.out.println("-----------------------------------------")
}
}
/**
* 过滤器集合
* @param connection
* @param tableName
*/
def filterSet(connection: Connection,tableName: TableName): Unit = {
val table = connection.getTable(tableName)
val scan = new Scan
val list = new FilterList(FilterList.Operator.MUST_PASS_ALL)
val filter1 = new SingleColumnValueFilter(Bytes.toBytes("author"), Bytes.toBytes("name"), CompareOperator.EQUAL, Bytes.toBytes("spark"))
val filter2 = new ColumnPrefixFilter(Bytes.toBytes("name"))
list.addFilter(filter1)
list.addFilter(filter2)
scan.setFilter(list)
val scanner = table.getScanner(scan)
for (rs <- scanner) {
val rowkey = Bytes.toString(rs.getRow)
System.out.println("row key :" + rowkey)
val cells = rs.rawCells
for (cell <- cells) {
System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
}
System.out.println("-----------------------------------------")
}
}
}