使用连接资源池方式
[Hbase]
hbase_host:172.8.10.xx
hbase_port:9090
hbase_username:0
hbase_password:0
hbase_db:xx
hbase_columnfamilies:info
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import json
import pandas as pd
import happybase
## READ CONFIGURATION FILE
config_file = pd.read_table(filepath_or_buffer="configuration.properties",
header=None, delim_whitespace=True,index_col=0).transpose()
config=ConfigParser()
config.read(str(config_file['configPath'].iloc[0]))
# =============================================================================
# initialization
# =============================================================================
Hbase_host = config.get('Hbase', 'Hbase_host')
Hbase_port = config.getint('Hbase', 'Hbase_port')
Hbase_db = config.get('Hbase', 'Hbase_db')
happyBasePool = happybase.ConnectionPool(
host=Hbase_host,
port=Hbase_port,
size=100)
class HbaseClient(object):
__slots__ = ['conn']
global happyBasePool
def __init__(self):
with happyBasePool.connection() as conn:
self.conn = conn
'''创建表---> tableName为表名,columnFamily=info, column=null '''
def create_table(self, tableName):
tables_list = self.conn.tables()
if (Hbase_db + ":" + tableName).encode('utf-8') not in tables_list:
self.conn.create_table(Hbase_db + ":" + tableName, {"info": {}})
'''模糊查询:所有列'''
def scan_row_data(self, tableName, start_rowKey, end_rowKey):
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
tmp = {}
for curr_key in value.keys():
curr_value = value[curr_key]
tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
data_list.append(tmp)
data_df = pd.DataFrame(data_list)
return data_df
''' 模糊查询 --> 行过滤器 RowFilter '''
def RowFilter(self, tableName, RowPrefix):
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_prefix=RowPrefix.encode('utf-8')):
curr_data_piece = value.get('info:key'.encode('utf-8'))
if curr_data_piece is None: continue
tmp = json.loads(curr_data_piece.decode('utf-8'))
data_list.append(tmp)
# df data
df = pd.DataFrame(data_list)
return df
''' 模糊查询 --> 行过滤器 advancedRowFilter '''
def advancedRowFilter(self, tableName, rowPrefix, cols=None):
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_prefix=rowPrefix.encode('utf-8'),columns=cols):
tmp = {}
for curr_key in value.keys():
curr_value = value[curr_key]
tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
data_list.append(tmp)
data_df = pd.DataFrame(data_list)
data_df.rename(lambda x: x.lstrip('info:'),axis='columns',inplace=True)
return data_df
''' 模糊查询 --> 单列值过滤器 SingleColumnValueFilter '''
def SingleColumnValueFilter(self, tableName, RowPrefix, filter_str):
# print(tableName, RowPrefix, filter_str)
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_prefix=RowPrefix.encode('utf-8'), filter=filter_str.encode('utf-8')):
tmp = {}
for curr_key in value.keys():
curr_value = value.get(curr_key)
if curr_value is None: continue
tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
data_list.append(tmp)
data_df = pd.DataFrame(data_list)
data_df.rename(lambda x: x.lstrip('info:'),axis='columns',inplace=True)
return data_df
'''模糊查询:key列'''
def filter_row_data(self, tableName, start_rowKey, end_rowKey):
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
curr_data_piece = value.get('info:key'.encode('utf-8'))
if curr_data_piece is None: continue
tmp = json.loads(curr_data_piece.decode('utf-8'))
data_list.append(tmp)
# df data
df = pd.DataFrame(data_list)
return df
'''模糊查询,返回list'''
def listByScan(self, tableName, start_rowKey, end_rowKey):
client = self.conn.table(Hbase_db + ":" + tableName)
data_list = []
# get data
for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
curr_data_piece = value.get('info:key'.encode('utf-8'))
tmp = json.loads(curr_data_piece.decode('utf-8'))
data_list.append(tmp)
return data_list
'''查询单条'''
def select_data(self, tableName, rowkey):
client = self.conn.table(Hbase_db + ":" + tableName)
rst = client.row(rowkey)
curr_step_row = {}
for key in rst.keys():
value = rst[key]
curr_step_row[key.decode('utf-8')] = value.decode('utf-8')
return curr_step_row
'''插入单条'''
def put_data(self, tableName, rowkey, curr_step_row):
client = self.conn.table(Hbase_db + ":" + tableName)
client.put(row=rowkey, data=curr_step_row)
'''插入多条'''
def send_batch_data(self, tableName, dt, current_ruleVal):
client = self.conn.table(Hbase_db + ":" + tableName)
bat = client.batch()
for i in range(dt.shape[0]):
curr_df = dt.iloc[i, :].astype('str')
curr_key = str(current_ruleVal.machineID) + \
str(current_ruleVal.spindleID) + \
str(current_ruleVal.programNum) + \
str(curr_df['info:timeID']).zfill(5)
bat.put(curr_key, curr_df.to_dict())
bat.send()
'''批量插入'''
def advancedSendBatch(self, tableName, dt, rowKeyName, prefix = ''):
'''take uniques of columns in rowKeyName as rowkey
rowKeyName:list of column names as rowKey'''
bat = self.conn.table(Hbase_db + ":" + tableName).batch()
rowKeyName = list(map(lambda col:'info:'+col,rowKeyName))
dt = dt.rename(lambda col:'info:'+col,axis = 'columns')
for i in range(dt.shape[0]):
curr_df = dt.iloc[i,:]
curr_key = '_'.join([prefix]+curr_df[rowKeyName].astype('str').tolist()+[str(i)])
bat.put(curr_key, curr_df.to_dict())
bat.send()
'''批量删除'''
def delete_data(self, tableName, start_rowKey, end_rowKey):
client = self.conn.table(Hbase_db + ":" + tableName)
# delete data
for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
client.delete(key)
# def __del__(self):
# self.conn.close()