logstash&&ruby

ruby单独文件

https://www.elastic.co/guide/en/logstash/current/plugins-filters-ruby.html

filter {
  ruby {
    # Cancel 90% of events
    path => "/etc/logstash/drop_percentage.rb"
    script_params => { "percentage" => 0.9 }
  }
}
The ruby script file should define the following methods:
register(params): An optional register method that receives the key/value hash passed in the script_params configuration option 
filter(event): A mandatory Ruby method that accepts a Logstash event and must return an array of events 
Below is an example implementation of the drop_percentage.rb ruby script that drops a configurable percentage of events:
# the value of `params` is the value of the hash passed to `script_params`
# in the logstash configuration
#这里通过params获取的参数是在logstash文件中通过script_params传入的
def register(params)
        @drop_percentage = params["percentage"]
end

# the filter method receives an event and must return a list of events.
# Dropping an event means not including it in the return array,
# while creating new ones only requires you to add a new instance of
# LogStash::Event to the returned array
def filter(event)
        if rand >= @drop_percentage
                return [event]
        else
                return [] # return empty array to cancel event
        end
end

ruby嵌入

md5作为_id

input{
      kafka{
        bootstrap_servers => ["127.0.0.1:9092"]
        client_id => "wangjinsong3"
        group_id => "wangjinsong3"
        auto_offset_reset => "earliest"
        consumer_threads => 20
        decorate_events => true
        topics => ["009C576CD10040448C3E06723A5D5C23"]
      }
}
filter {
    json {
        source => "message"
        #target => "doc"
        remove_field => ["message"]
    }
    ruby {
        init => "md5id=''"
        code => "
                require 'digest/md5'
                uri = event.get('uri')
                path = uri.split('?', 2)[0]
                len = uri.length
                target = path.concat('_').concat(len.to_s)
                md5id = Digest::MD5.hexdigest(target)
                event.set('id', md5id)
        "
    }
    mutate {
        remove_field => ["@version", "kafka"]
    }

}
output {
        stdout {
                codec => rubydebug
        }
        elasticsearch {
                hosts => ["127.0.0.1:9201"]
                index => "http_attack_detection"
                document_id => "%{id}"
                action => "create"
                pool_max => 500
                manage_template => true
                template_overwrite => true
                template_name => temp_access
                template => "/export/software/logstash/config/access_template.json"
        }
}

ruby判断

input {
        file {
                path => "/home/elasticSearch/software/logstash/bin/access.log"
                start_position => "beginning"
        }
}

filter {
        ruby {
                code => '
                        info=event.get("message")
                        infos = info.split("^A")
                        if infos.length == 20
                                event.set("time", infos[0])
                                event.set("remote_addr",infos[1])
                                event.set("xff", infos[2])
                                event.set("request_length", infos[3])
                                event.set("status", infos[4])
                                event.set("request_method", infos[5])
                                event.set("hostname", infos[6])
                                event.set("request_uri", infos[7])
                                event.set("bytes_sent", infos[8])
                                event.set("request_time", infos[9])
                                event.set("referer", infos[10])
                                event.set("user_agent", infos[11])
                                event.set("upstream_cache_status", infos[12])
                                event.set("sent_http_content_type", infos[13])
                                event.set("sent_http_var_cache", infos[14])
                                event.set("content_length", infos[15])
                                event.set("upstream_status", infos[16])
                                event.set("upstream_response_time", infos[17])
                                event.set("event_id", infos[18])
                                event.set("target_ip", infos[19])

                                ua = event.get("user_agent").downcase
                                bot = ""
                                if ua.include?"baiduspider"
                                    bot = "baidu"
                                elsif ua.include?"googlebot"
                                    bot = "google"
                                elsif ua.include?"360spider"
                                    bot = "360"
                                elsif ua.include?"haosouspider"
                                    bot = "360"
                                elsif ua.include?"sosospider"
                                    bot = "soso"
                                elsif ua.include?"slurp"
                                    bot = "yahoo"
                                elsif ua.include?"youdaobot"
                                    bot = "youdao"
                                elsif ua.include?"sogou"
                                    bot = "sogou"
                                elsif ua.include?"msnbot"
                                    bot = "msn"
                                elsif ua.include?"bingbot"
                                    bot = "bing"
                                else
                                    bot = "-"
                                end
                                event.set("bot", bot)
                        else
                                return
                        end
                        '
        }
        date {
                match => [ "time", "dd/MMM/yyyy:HH:mm:ss +0800"]
                target => "@timestamp"
                locale => "en"
                timezone => "+00:00"
        }
        useragent {
                source => "user_agent"
                target => "useragent"
        }
        geoip {
                source => "remote_addr"
        }
        ruby {
                code => '
                        event.set("server", event.get("host"))
                        event.set("os", event.get("[useragent][os]"))
                        event.set("device", event.get("[useragent][device]"))
                        event.set("country", event.get("[geoip][country_name]"))
                        event.set("province", event.get("[geoip][region_name]"))
                        event.set("city", event.get("[geoip][city_name]"))
                        time = event.get("time")
                        timex = time.sub(":", "T")
                        require "time"
                        timestr = Time.parse(timex,"d%/b%/%YT%H:%M:%S").strftime("%Y-%m-%dT%H:%M:%S+0800")
                        event.set("time", timestr)
                '
        }
        mutate {
                remove_field => "message"
                remove_field => "type"
                remove_field => "path"
                remove_field => "geoip"
                remove_field => "useragent"
                remove_field => "host"
                convert => {"request_length" => "integer"}
                convert => {"request_time" => "float"}
                convert => {"bytes_sent" => "integer"}
        }
}
output {
        stdout {
                codec => rubydebug
        }
        elasticsearch {
                hosts => ["127.0.0.1:9202""]
                index => "access2_test-%{+YYYY-MM-dd}"
                pool_max => 20000
                manage_template => true
                template_overwrite => true
                template_name => temp_access
                template => "/home/elasticSearch/software/logstash/config/access_template.json"
        }
}

你可能感兴趣的:(logstash&&ruby)