基于从日志中截取数据推送阿里云数据总线(DataHub)

前言

公司做的是IOT智能家居产品,最近需要做大数据分析的平台开发。部分定义的元数据需要从后台日志中截取(使用Logstash),然后推送到DataHub(阿里云的数据总线),DataHub再流转到OTS(阿里云的数据表格)。本文主要介绍日志的格式化定义以及如何进行Logstash的配置。
原生logstash 的安装使用可以参考前文:利用logstash截取日志中的数据并推送至Kafka
不同版本的logstash官方使用说明:logstash官网

后台日志格式化约定

  1. 【强制】使用SLF4J定义变量

    不可直接使用日志系统(Log4j、Logback)中的API,有利于统一维护各个类的日志处理方式。

  2. 【强制】日志变量统一定义成static final且变量名用大写

  3. 【推荐】日志变量定义为private
    由该类独占通过class初始化日志变量时,会以该class作为输出的类名,若混用日志变量,会导致日志输出的类名相同,真正输出日志的类被隐藏起来,不利于排查问题。
    正例:

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
private static final Logger LOG = LoggerFactory.getLogger(Abc.class)
  1. 输出格式规范
logger.info("module={} – fuction={} – data={}","模块","功能点", ”Json串“);

例如:

logger.info("module={} – fuction={} – data={}","cs-api","snDecode", JSON.toJSONString(data));

这里cs-api是模块,snDecode是功能点,JSON.toJSONString(data) 是数据Json串。
输出的案例:

2020-08-19 15:04:12.172  INFO 21382 --- [pool-7-thread-5] c.v.v.c.b.s.helper.VeeBrainLogHelper     : module=user - function=login - data={
     \"lastLoginTime\":1597820652172,\"phoneModel\":\"iPhone\",\"lastLoginLng\":null,\"ip\":\"14.24.144.69\",\"lastLoginLat\":null,\"userId\":\"5ea3e33bb8a14c36e40108f2\",\"lastLoginType\":null}

Logstash的配置文件及解析

## ipput是配置日志文件的位置(支持正则匹配)
input {
     
    file {
     
        path => "/home/admin/vcoo-app-api/logs/spring.log"
		type => "device"
        start_position => "beginning"
    }
}
## filter 配置的是各个ETL组件
## grok 是正则匹配,符合的就会对各变量进行解析(正则说明可直接百度:logstash grok)
filter{
     
    grok {
     
        match => {
     
			"message" => [
				"(?\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})\s*%{LOGLEVEL:level}\s*%{DATA:pid}\s*\-\-\-\s*\[(?([a-zA-Z0-9._-]+))\]\s*(?([a-zA-Z0-9.]+))\s*\:\s*(?([a-zA-Z0-9._-]+))=(?([a-zA-Z0-9._-]+))\s*\-\s*(?([a-zA-Z0-9._-]+))\=(?([a-zA-Z0-9._-]+))\s*\-\s*(?([a-zA-Z0-9._-]+))\=%{GREEDYDATA:data}",
				"(?\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\,\d{3})\s*\[(?([a-zA-Z0-9._-]+))\]\s*%{DATA:pid}\s*%{LOGLEVEL:level}\s*\-\s*(?([a-zA-Z0-9._-]+))=(?([a-zA-Z0-9._-]+))\s*\-\s*(?([a-zA-Z0-9._-]+))\=(?([a-zA-Z0-9._-]+))\s*\-\s*(?([a-zA-Z0-9._-]+))\=%{GREEDYDATA:data}",
				"(?\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})\s*%{LOGLEVEL:level}\s*%{DATA:pid}\s*\-\-\-\s*\[(?([a-zA-Z0-9._-]+))\]\s*(?([a-zA-Z0-9.]+))\s*\:\s*%{DATA:info}\:%{GREEDYDATA:data}"
			]
      }
	  remove_field => ['@timestamp', '@version','host','path','current_thread','module_name','level','pid','data_name','function_name','class_info','date','path']
    }
    ## 删除不匹配
    if "_grokparsefailure" in [tags] {
     
        drop {
      }
    }
	## 如果info 为设备用户绑定  是地图冒泡功能,这里不是地图冒泡才处理
	if [info] !~ "设备用户绑定" {
     
		json {
     
			source => "data"
			remove_field => [ "data" ]
		}
	}
    ## device->rrpc 
    if "rrpc"  == [function] {
     
        mutate {
     
         rename => {
     
			"userId"        =>    "user_id"
			"rrpcTime"        =>    "rrpc_time"
			"productKey"        =>    "product_key"
			"deviceName"        =>    "device_name"
			"messageContent"        =>    "message_content"
			"rrpcCode"        =>    "rrpc_code"
			"source"        =>    "source"
			"deviceId"        =>    "device_id"
          }
        }
    }
	## device->configNetWork 配网
    if "configNetWork"  == [function] {
     
        mutate {
     
         rename => {
     
			"userId"        =>    "user_id"
			"configBeginTime"        =>    "config_begin_time"
			"configEndTime"        =>    "config_end_ime"
			"bindBeginTime"        =>    "bind_begin_time"
			"bindEndTime"        =>    "bind_end_time"
			"connectResult"        =>    "connect_result"
			"sisid"        =>    "sisid"
			"ssid"        =>    "ssid"
          }
        }
    }
	## device->unbind 解绑设备
    if "unbind"  == [function] {
     
        mutate {
     
         rename => {
     
			"userId"        =>    "user_id"
			"deviceId"        =>    "device_id"
			"unbindTime"        =>    "unbind_time"
			"result"        =>    "result"
			"unbindType"        =>    "unbind_type"
          }
        }
    }
	## device->share 菜谱烹饪
    if "cooking"  == [function] {
     
        mutate {
     
         rename => {
     
			"userId"        =>    "user_id"
			"cookingTime"        =>    "cooking_time"
			"recipeId"        =>    "recipe_id"
			"tag"        =>    "tag"
			"catalog"        =>    "catalog"
			"cookingKeepTime"        =>    "cooking_keep_time"
			"recipeTime"        =>    "recipe_time"
          }
        }
    }
	## device->bind 绑定设备
    if "bind"  == [function] {
     
        mutate {
     
         rename => {
     
			"lastBindUserId"        =>    "last_bind_user_id"
			"categoryId"        =>    "category_id"
			"lan"        =>    "lan"
			"lng"        =>    "lng"
			"deviceId"        =>    "device_id"
			"lastBindTime"        =>    "last_bind_time"
          }
        }
    }
	## device->active 设备激活
    if "active"  == [function] {
     
        mutate {
     
         rename => {
     
			"deviceId"        =>    "device_id"
			"categoryId"        =>    "category_id"
			"activeTime"        =>    "active_time"
          }
        }
    }
	## device->login 用户登录
    if "login"  == [function] {
     
        mutate {
     
         rename => {
     
			"userId"        =>    "user_id"
			"ip"        =>    "ip"
			"lastLoginLat"        =>    "last_login_lat"
			"lastLoginLng"        =>    "last_login_lng"
			"lastLoginType"        =>    "last_login_type"
			"phoneModel"        =>    "phone_model"
			"lastLoginTime"        =>    "last_login_ime"
          }
        }
    }
}

output {
     
    stdout {
     
		codec => rubydebug
    }
    if "rrpc" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_device_rrpc"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_rrpc_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "configNetWork" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_config_network"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_config_network_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "unbind" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_device_unbind"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_device_unbind_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "cooking" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_recipe_cooking"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_recipe_cooking_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "bind" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_device_bind"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_device_bind_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "active" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_device_active"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_device_active_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   if "login" == [function] {
     
     datahub {
     
		access_id => ""
		access_key => ""
		endpoint => "https://dh-cn-shanghai.aliyuncs.com"
		project_name => "vcoo_veebrain_dev"
		topic_name => "vbrain_app_user_info"
		retry_times => "10"
		dirty_data_continue => true
		dirty_data_file => "/home/admin/vcoo-app-api/device_app_user_info_dirty.data"
		dirty_data_file_max_size => 1000
     }
   }
   ## 地图冒泡功能
   if "设备用户绑定" == [info] {
     
	 kafka {
     
		bootstrap_servers => "47.101.185.79:9093,106.14.14.0:9093,106.14.177.147:9093"
		#bootstrap_servers => "120.24.43.141:9093,120.25.219.159:9093,112.74.85.153:9093"
		codec => plain {
     
			format => "%{data}"
		}
		topic_id => "saas_user_active_device_topic_sit"
		security_protocol => "SASL_SSL"
		sasl_mechanism => "PLAIN"
		#jaas_path => "/data/logstash/config/kafka_client_jaas.conf"
		jaas_path => "/home/admin/kafka-security/kafka_client_jaas.conf"
		ssl_truststore_password => "KafkaOnsClient"
		ssl_truststore_location => "/home/admin/kafka-security/kafka.client.truststore.jks"
		ssl_endpoint_identification_algorithm => "" 
	  }
   }
}

说明:input主要配置日志文件
filter 配置的是各个ETL组件:
其中grok进行正则匹配,mutate 做数据处理(类型转换,名称变换等)
output 是配置各个输出组件

附录

安装dataHub 插件

你可能感兴趣的:(大数据相关)