Postgresql - pipelinedb - 利用kafka + python + pipelinedb 监控数据库日志

1. 数据库中创建stream以及视图

CREATE FOREIGN TABLE t_error_log(
err_date date, 
hostname varchar(128), 
err_time timestamp without time zone, 
db_user varchar(128), 
db_name varchar(128), 
client_addr varchar(128), 
log_level varchar(128), 
err_log varchar
) server pipelinedb;

CREATE VIEW vw_error with (action=materialize) AS 
SELECT 
  err_date, hostname, db_name, db_user, client_addr, err_log,
  count(*) 
FROM t_error_log
GROUP BY err_date, hostname, db_name, db_user, client_addr, err_log;


CREATE VIEW vw_error_date with (action=materialize) AS 
SELECT 
  err_date,
  count(*) 
FROM t_error_log
GROUP BY err_date;


CREATE VIEW vw_error_date_host with (action=materialize) AS 
SELECT 
  err_date, hostname,
  count(*) 
FROM t_error_log
GROUP BY err_date, hostname;


CREATE VIEW vw_error_date_host_db with (action=materialize) AS 
SELECT 
  err_date, hostname, db_name,
  count(*) 
FROM t_error_log
GROUP BY err_date, hostname, db_name;

CREATE VIEW vw_error_date_host_db_cli with (action=materialize) AS 
SELECT 
  err_date, hostname, db_name, client_addr,
  count(*) 
FROM t_error_log
GROUP BY err_date, hostname, db_name, client_addr;

 

2. 从kafka中取数,插入到数据库中的脚本 errorlog.py

#!//usr/bin/python
# coding:utf-8

from kafka import KafkaConsumer
from kafka import TopicPartition
import sys
import time
import json
import re
import psycopg2
import multiprocessing
import logging

KAFKA_LIST = ["mq01s.test.com:9092,mq02s.test.com:9092,mq03s.test.com:9092"]
CLIENT_ID = "test_pipelinedb" ##" % time.time()
DATABASE_NAME = ''
HOST = ''
PORT = ''
USER_NAME = ''
PASSWORD = ''
CHAR_SET = ''

# send mail to dbops

def george_init():
    global DATABASE_NAME
    DATABASE_NAME = 'pipelinedb'
    global HOST
    HOST = 'dbwtest03bc.test.com'
    global PORT
    PORT = '5432'
    global USER_NAME
    USER_NAME = 'mytest'
    global password
    pssword = ''
    global CHAR_SET
    CHAR_SET = 'utf8'

# conn log_parse
def get_dbops_conn():
    george_init()
    return psycopg2.connect(host = HOST, database = DATABASE_NAME, user = USER_NAME, password = PASSWORD, port = PORT)

def get_cursor(conn):
    return conn.cursor()

# close connect
def conn_close(conn):
    if conn != None:
        conn.close()

# close cursor
def cursor_close(cursor):
    if cursor != None:
        cursor.close()

# close all
def close(cursor, conn):
    cursor_close(cursor)
    conn_close(conn)

# usage
def Usage():
    if len(sys.argv) < 2:
        print('Usage: python %s ' % sys.argv[0])
        print('       e.g: python %s test_data' % sys.argv[0])
        sys.exit(1)

# get the message of topic from kafka, return all_data
# client_id = zc_time
def get_data_kafka():
    logger = logging.getLogger('[Parse-pglog]')
    KAFKA_LIST = ["mq01s.test.com:9092","mq02s.test.com:9092","mq03s.test.com:9092"]
    topic_name = "pglog_dd" # sys.argv[1]
    client_id = "test_pipeline" ##%s" % time.time()
#    consumer = KafkaConsumer(topic_name, bootstrap_servers=KAFKA_LIST, client_id=client_id, group_id=client_id, auto_offset_reset="earliest")
    while True:
        consumer = KafkaConsumer(topic_name, bootstrap_servers=KAFKA_LIST, client_id=client_id, group_id=client_id, auto_offset_reset="earliest")
        for data in consumer:
            yield data.value
        logger.info('get kafka data for end!')
    logger.info('get kafka data while end!')

# parse msg.value from get_data_kafka, and find the message.
# return error meesage
# data_v are from kafka data.
def find_err_message(data_v):
    main_errors = ['WARNING:', 'ERROR:', 'FATAL:', 'PANIC:']
    exclude_errors = ['canceling statement due to statement timeout', 'recovery is in progress', 'nonstandard use of ', 'column "waiting" does not exist at character','cannot execute CREATE TABLE in a read-only transaction','cannot execute DROP TABLE in a read-only transaction','cannot execute SELECT INTO in a read-only transaction','duplicate key value violates unique constraint "t_notif_push_user_pkey"','database is read-only! (user social_rws)']
    j_data = json.loads(data_v)
    message = j_data.get("message")
    if any(err in message for err in main_errors):
        if not any(ex_err in message for ex_err in exclude_errors):
            return data_v

# get hostname from data
def get_host(err_data_v):
    j_data = json.loads(err_data_v)
    hostname = j_data['beat']['hostname']
    return hostname

# match message to tuple
# return match message
def match_msg(err_data_v):
    for data in err_data_v:
        j_data = json.loads(err_data_v)
        err_msg = j_data.get("message")
        c_msg = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\S*\s*\d* (\w{3}) (\[\d+\]): (\[\d+-\d+\]) user=(\S*\w*).\s*\S*db=(\S*\w*) ([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+)?[:\d]* (LOG|WARNING|ERROR|FATAL|PANIC)?[:\d]*  (.*)'
        regex = re.compile(c_msg)
        msg = regex.match(err_msg)
        if msg:
            return msg.groups()

# open file to append data return 
def open_file_for_a(file):
    return open(file, 'a')

# close_file 
# fo = append_file(file)
# close_file(fo)
def close_file(file):
    return file.close()

# insert into file
# fo = append_file(file)
# insert_err_to_file(fo, msg)
def insert_err_to_file(fo,hostname,err_msg):
    fo.write('\n %s : %s '  % (hostname ,err_msg))
    fo.write('\n*******************************')



# insert pg error log to db

def insert_pgerr_to_db(conn_insert,hostname,err_msg):
    #cur_date = time.strftime('%Y-%m-%d',time.localtime(time.time()))
    logger = logging.getLogger('[Parse-pglog]')
    hostname = hostname
    try:
        err_time = err_msg[0]
        db_user = err_msg[4]
        db_name = err_msg[5]
        client_addr = err_msg[6]
        err_log_level = err_msg[7]
        err_log = err_msg[8]
        err_date = err_time[:10]
        if '\'' in err_log:
            err_log = err_log.replace('\'','\"')
        sql_insert_err = '''insert into t_error_log(err_date, hostname, err_time, db_user, db_name, client_addr, log_level, err_log) 
        values (\'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\')
        ''' % (err_date, hostname, err_time, db_user, db_name, client_addr, err_log_level, err_log)
        cursor_in_err = get_cursor(conn_insert)
        execute_insert = cursor_in_err.execute(sql_insert_err)
        logger.info('Execute %s ' % sql_insert_err)
        conn_insert.commit()
        logger.info('Commited!')
        cursor_close(cursor_in_err)
    except Exception,e:
        logger.error(e)


def main():
    # Usage()
    logfile = '/root/test/201907/log_parse_pglog.log'
    logger = logging.getLogger('[Parse-pglog]')
    logger.setLevel(logging.INFO)  
    handler = logging.FileHandler(logfile, mode='a')
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    logger.info('Start to parse pglog')

    p = multiprocessing.Pool(8)
    logger.info('Open multiprocessing pool 8.')
    
    while True:
        datas_v = get_data_kafka()
        logger.info('Connect kafka.')
        conn_insert = get_dbops_conn()
        logger.info('Connect DB.')
        # fo = open_file_for_a('kafka_data.txt')
        for data_v in datas_v:
            err_j_data = find_err_message(data_v) # contains error message
            if err_j_data is not None:
                hostname = get_host(err_j_data)
                match_err_msg = match_msg(err_j_data) # message to list
                insert_pgerr_to_db(conn_insert,hostname,match_err_msg)
                try:
                    logger.info('err_time:%s , hostname:%s , err_msg: %s' % (match_err_msg[0][:10], hostname, match_err_msg[8]))
                except Exception,e:
                    logger.info(e)
                # insert_err_to_file(fo,hostname,match_err_msg)
        logger.info('for end!')
    logger.info('while end!')
    conn_close(conn_insert)
        #fo.close()

if __name__ == "__main__":
    main()

3. 运行之后取查视图中的数据

 

你可能感兴趣的:(Postgresql)