vim filebeat.yml
配置文件样例(两个inputs源,用filelds.document_type区分):
filebeat.inputs:
- type: log
paths:
- /home/algo/discovery_product_log/product.log
fields.document_type: "product"
- type: log
paths:
- /home/algo/doc4index/doc4index/doc4index.log
fields.document_type: "trumpet"
#多行合并匹配规则,实例是根据时间为开头的信息为一行
multiline.pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}'
multiline.negate: true
multiline.match: after
#加载不同的模板
setup.template.name: "filebeat"
setup.template.pattern: "filebeat-*"
setup.template.settings:
index.number_of_shards: 3
#将日志输出到logstash
output.logstash:
hosts: ["10.5.31.67:5044"]
#写入搜索引擎
#output.elasticsearch:
# Array of hosts to connect to.
#hosts: ["10.3.17.173:9200"]
#默认情况下,Filebeat写事件到名为filebeat-6.4.0-yyyy.MM.dd的索引,其中yyyy.MM.dd是事件被索引的日期。为了用一个不同的名字,你可以在Elasticsearch输出中设置index选项。
#index: "%{[fields.log_type]}-%{[beat.version]}-%{+yyyy.MM.dd}"
filebeat 启动
./filebeat -e -c filebeat.yml
如生成一个名为user的模板
curl -XPUT http://10.3.17.173:9200/_template/user -d '
{
order: 100, //模板的优先级是通过模板中的 order 字段定义的,数字越大,优先级越高。索引模板是有序合并的,两个模板叠加了,项目的字段,优先级高的覆盖了优先级低的,如分片数。
index_patterns: [
"log-*"
],
settings: {
index: {
number_of_shards: "3", // 主分片的个数
number_of_replicas: "0" // 主分片的拷贝分片个数
}
},
mappings: { //包含 fielddata、analyzer、index、omit_norms、type、fields 等六个字段
doc: {
properties: {
@timestamp: {
type: "date" //定义的是字段的数据类型
},
rt: {
type: "integer"
},
permid: {
type: "text",
fielddata: true //fielddata 这一字段就是对于分析字段,,数据存储在内存中,用于对字段的排序和聚合分析
},
custid: {
type: "text",
fielddata: true
},
bucket: {
type: "text",
fielddata: true
},
requestid: {
type: "text",
fielddata: true
},
hostname: {
type: "text",
fielddata: true
}
}
}
},
aliases: { } //别名
}
不同的inputs来源分别处理,写入不同的output中
其中,trumpet待处理的日志:
2020-08-13 19:54:01 - *5*result_all_timeout*20191102194411831699555742754112993^253950738^RequestWrapper(request_start_time:1597319641854, request:RequestInfo(20191102194411831699555742754112993,253950738,0,0), ip:, clicked_items:20200813195401854048974,clicked_items:bucket:click_profile)*
input {
beats {
port => 5044
type => product
}
}
filter{
if [fields][document_type] == "trumpet" {
if ([message] =~ "^((?!result_all_time).)*$"){
drop{}
} else {
mutate {
split => ["message", "*"]
add_field => {
"rt" => "%{[message][1]}"
}
add_field => {
"user_info" => "%{[message][3]}"
}
}
mutate {
convert => ["rt", "integer"]
}
mutate {
split => ["user_info", "^"]
add_field =>{
"permid" => "%{[user_info][0]}"
}
add_field =>{
"custid" => "%{[user_info][1]}"
}
add_field =>{
"ip" => "10.5.31.67"
}
remove_field => [ "user_info" ]
remove_field => [ "beat" ]
remove_field => [ "message" ]
remove_field => [ "offset" ]
remove_field => [ "tags" ]
remove_field => [ "type" ]
remove_field => [ "host" ]
remove_field => [ "input" ]
remove_field => [ "prospector" ]
remove_field => [ "source" ]
remove_field => [ "@version" ]
}
}
}
if [fields][document_type] == "product" {
if ([message] =~ "^((?!product result all time).)*$"){
drop{}
} else {
mutate {
split => ["message", "*"]
add_field => {
"rt" => "%{[message][1]}"
}
add_field => {
"user_info" => "%{[message][3]}"
}
}
mutate {
convert => ["rt", "integer"]
}
mutate {
split => ["user_info", "^"]
add_field =>{
"permid" => "%{[user_info][0]}"
}
add_field =>{
"custid" => "%{[user_info][1]}"
}
add_field =>{
"bucket" => "%{[user_info][2]}"
}
add_field =>{
"requestid" => "%{[user_info][3]}"
}
add_field =>{
"hostname" => "%{[beat][name]}"
}
remove_field => [ "user_info" ]
remove_field => [ "beat" ]
remove_field => [ "message" ]
remove_field => [ "offset" ]
remove_field => [ "tags" ]
remove_field => [ "type" ]
remove_field => [ "host" ]
remove_field => [ "input" ]
remove_field => [ "prospector" ]
remove_field => [ "source" ]
remove_field => [ "@version" ]
}
}
}
}
output {
if [fields][document_type] == "product"{
elasticsearch {
hosts => ["http://10.3.17.173:9200"]
index => "log-product-%{+YYYY.MM.dd}"
manage_template => false
template_name => "user"
}
}
if [fields][document_type] == "trumpet"{
elasticsearch {
hosts => ["http://10.3.17.173:9200"]
index => "log-trumpet-%{+YYYY.MM.dd}"
manage_template => false
template_name => "user"
}
}
}
启动:
cd bin/
./logstash -f ../config/default.conf
#保留近 N 天
KEEP_DAYS=0
# 删除前 N的所有天到 前N+10天==>每天执行
function get_todelete_days()
{
# declare -A DAY_ARR
# DAY_ARR=""
for i in $(seq 1 1);
do
THIS_DAY=$(date -d "$(($KEEP_DAYS+$i)) day ago" +%Y.%m.%d)
DAY_ARR=( "${DAY_ARR[@]}" $THIS_DAY)
done
echo ${DAY_ARR[*]}
}
# 返回数组的写法
#保留近 N 天
KEEP_DAYS=0
# 删除前 N的所有天到 前N+10天==>每天执行
function get_todelete_days()
{
# declare -A DAY_ARR
# DAY_ARR=""
for i in $(seq 1 1);
do
THIS_DAY=$(date -d "$(($KEEP_DAYS+$i)) day ago" +%Y.%m.%d)
DAY_ARR=( "${DAY_ARR[@]}" $THIS_DAY)
done
echo ${DAY_ARR[*]}
}
# 返回数组的写法
TO_DELETE_DAYS=(`get_todelete_days`)
for day in "${TO_DELETE_DAYS[@]}"
do
echo "$day will be delete"
curl -XDELETE 'http://127.0.0.1:9200/log-product-'${day}
curl -XDELETE 'http://127.0.0.1:9200/log-discovery-list-'${day}
done