python happybase API

一、环境准备

  • 下载happybase包   pip3 install  happybase
  • 安装thrift    安装步骤:https://mp.csdn.net/postedit/82250725

二、接口调用

使用连接资源池方式

  • configuration.properties
[Hbase]
hbase_host:172.8.10.xx
hbase_port:9090
hbase_username:0
hbase_password:0
hbase_db:xx
hbase_columnfamilies:info
  •  HbaseClient.py
#!/usr/bin/python3
# -*- coding: UTF-8 -*-

import json
import pandas as pd
import happybase

## READ CONFIGURATION FILE
config_file = pd.read_table(filepath_or_buffer="configuration.properties",
                            header=None, delim_whitespace=True,index_col=0).transpose()

config=ConfigParser()
config.read(str(config_file['configPath'].iloc[0]))

# =============================================================================
# initialization
# =============================================================================

Hbase_host = config.get('Hbase', 'Hbase_host')
Hbase_port = config.getint('Hbase', 'Hbase_port')
Hbase_db = config.get('Hbase', 'Hbase_db')


happyBasePool = happybase.ConnectionPool(
    host=Hbase_host,
    port=Hbase_port,
    size=100)


class HbaseClient(object):
    __slots__ = ['conn']

    global happyBasePool

    def __init__(self):
        with happyBasePool.connection() as conn:
            self.conn = conn


    '''创建表---> tableName为表名,columnFamily=info, column=null '''
    def create_table(self, tableName):
        tables_list = self.conn.tables()
        if (Hbase_db + ":" + tableName).encode('utf-8') not in tables_list:
            self.conn.create_table(Hbase_db + ":" + tableName, {"info": {}})


    '''模糊查询:所有列'''
    def scan_row_data(self, tableName, start_rowKey, end_rowKey):
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
            tmp = {}
            for curr_key in value.keys():
                curr_value = value[curr_key]
                tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
            data_list.append(tmp)
        data_df = pd.DataFrame(data_list)
        return data_df


    ''' 模糊查询 --> 行过滤器 RowFilter  '''
    def RowFilter(self, tableName, RowPrefix):
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_prefix=RowPrefix.encode('utf-8')):
            curr_data_piece = value.get('info:key'.encode('utf-8'))
            if curr_data_piece is None: continue
            tmp = json.loads(curr_data_piece.decode('utf-8'))
            data_list.append(tmp)
            # df data
        df = pd.DataFrame(data_list)
        return df


    ''' 模糊查询 --> 行过滤器 advancedRowFilter  '''
    def advancedRowFilter(self, tableName, rowPrefix, cols=None):
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_prefix=rowPrefix.encode('utf-8'),columns=cols):
            tmp = {}
            for curr_key in value.keys():
                curr_value = value[curr_key]
                tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
            data_list.append(tmp)
        data_df = pd.DataFrame(data_list)
        data_df.rename(lambda x: x.lstrip('info:'),axis='columns',inplace=True)
        return data_df


    ''' 模糊查询 --> 单列值过滤器 SingleColumnValueFilter  '''
    def SingleColumnValueFilter(self, tableName, RowPrefix, filter_str):
        # print(tableName, RowPrefix, filter_str)
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_prefix=RowPrefix.encode('utf-8'), filter=filter_str.encode('utf-8')):
            tmp = {}
            for curr_key in value.keys():
                curr_value = value.get(curr_key)
                if curr_value is None: continue
                tmp[curr_key.decode('utf-8')] = curr_value.decode('utf-8')
            data_list.append(tmp)
        data_df = pd.DataFrame(data_list)
        data_df.rename(lambda x: x.lstrip('info:'),axis='columns',inplace=True)
        return data_df


    '''模糊查询:key列'''
    def filter_row_data(self, tableName, start_rowKey, end_rowKey):
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
            curr_data_piece = value.get('info:key'.encode('utf-8'))
            if curr_data_piece is None: continue
            tmp = json.loads(curr_data_piece.decode('utf-8'))
            data_list.append(tmp)
        # df data
        df = pd.DataFrame(data_list)
        return df


    '''模糊查询,返回list'''
    def listByScan(self, tableName, start_rowKey, end_rowKey):
        client = self.conn.table(Hbase_db + ":" + tableName)

        data_list = []
        # get data
        for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
            curr_data_piece = value.get('info:key'.encode('utf-8'))
            tmp = json.loads(curr_data_piece.decode('utf-8'))
            data_list.append(tmp)
        return data_list


    '''查询单条'''
    def select_data(self, tableName, rowkey):
        client = self.conn.table(Hbase_db + ":" + tableName)
        rst = client.row(rowkey)
        curr_step_row = {}
        for key in rst.keys():
            value = rst[key]
            curr_step_row[key.decode('utf-8')] = value.decode('utf-8')
        return curr_step_row


    '''插入单条'''
    def put_data(self, tableName, rowkey, curr_step_row):
        client = self.conn.table(Hbase_db + ":" + tableName)
        client.put(row=rowkey, data=curr_step_row)


    '''插入多条'''
    def send_batch_data(self, tableName, dt, current_ruleVal):
        client = self.conn.table(Hbase_db + ":" + tableName)
        bat = client.batch()

        for i in range(dt.shape[0]):
            curr_df = dt.iloc[i, :].astype('str')
            curr_key = str(current_ruleVal.machineID) + \
                       str(current_ruleVal.spindleID) + \
                       str(current_ruleVal.programNum) + \
                       str(curr_df['info:timeID']).zfill(5)

            bat.put(curr_key, curr_df.to_dict())
        bat.send()


    '''批量插入'''
    def advancedSendBatch(self, tableName, dt, rowKeyName, prefix = ''):
        '''take uniques of columns in rowKeyName as rowkey
           rowKeyName:list of column names as rowKey'''
        bat = self.conn.table(Hbase_db + ":" + tableName).batch()

        rowKeyName = list(map(lambda col:'info:'+col,rowKeyName))
        dt = dt.rename(lambda col:'info:'+col,axis = 'columns')

        for i in range(dt.shape[0]):
            curr_df = dt.iloc[i,:]
            curr_key = '_'.join([prefix]+curr_df[rowKeyName].astype('str').tolist()+[str(i)])
            bat.put(curr_key, curr_df.to_dict())
        bat.send()


    '''批量删除'''
    def delete_data(self, tableName, start_rowKey, end_rowKey):
        client = self.conn.table(Hbase_db + ":" + tableName)
        # delete data
        for key, value in client.scan(row_start=start_rowKey, row_stop=end_rowKey):
            client.delete(key)

    # def __del__(self):
    #     self.conn.close()

 

你可能感兴趣的:(Hbase,Python)