Nginx日志格式

    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for" '
                        '"$http_host" "$request_time" "$upstream_response_time" $http_device $http_appversion $upstream_addr '
                      '$http_openudid $http_code $http_networkType "$http_deviceModel" "$http_osVersion"';

解析格式为

%{IPORHOST:Client_IP} (%{WORD:ident}|-) (%{USERNAME:auth}|-) \[%{HTTPDATE:timestamp}\] "%{WORD:Http_Method} %{URIPATHPARAM:Http_Request} HTTP/%{NUMBER:Http_Version}" %{NUMBER:Http_Status_Code} (?:%{NUMBER:Http_Bytes}|-) (?:"(?:%{URI:Http_Referrer}|-)"|%{QS:Http_Referrer}) %{QS:User_Agent} "(%{QS:X_Forwarded_For}|-)" "(%{IPORHOST:Site}|-)" "(%{NUMBER:Request_Time}|-)" "(%{NUMBER:Upstream_Response_Time}|-)" (%{WORD:Device}|-) (%{USERNAME:App_Version}|-) (%{HOSTNAME:Upstream_Host}:%{POSINT:Upstram_Port}|-) (%{WORD:Openudid}|-) (%{WORD:Usercode}|-) (%{WORD:NetType}|-) "(%{GREEDYDATA:Device_Name}|-)" "(%{GREEDYDATA:System_Verion}|-)"

logstash配置文件如下:

input {
    redis {
        data_type => "list"
        key => "filebeat:nginx-access"
        host => "redis"
        port => 6379
        db => 0
        # password => "123456"
        # codec => "json"
    }
}
filter {
        grok {
                match => {
                        "message" => '%{IPORHOST:Client_IP} (%{WORD:ident}|-) (%{USERNAME:auth}|-) \[%{HTTPDATE:timestamp}\] "%{WORD:Http_Method} %{URIPATHPARAM:Http_Request} HTTP/%{NUMBER:Http_Version}" %{NUMBER:Http_Status_Code} (?:%{NUMBER:Http_Bytes}|-) (?:"(?:%{URI:Http_Referrer}|-)"|%{QS:Http_Referrer}) %{QS:User_Agent} "(%{QS:X_Forwarded_For}|-)" "(%{IPORHOST:Site}|-)" "(%{NUMBER:Request_Time}|-)" "(%{NUMBER:Upstream_Response_Time}|-)" (%{WORD:Device}|-) (%{USERNAME:App_Version}|-) (%{HOSTNAME:Upstream_Host}:%{POSINT:Upstram_Port}|-) (%{WORD:Openudid}|-) (%{WORD:Usercode}|-) (%{WORD:NetType}|-) "(%{GREEDYDATA:Device_Name}|-)" "(%{GREEDYDATA:System_Verion}|-)"'
                }
        }
        date {
                match => ["timestamp", "dd/MMM/yyyy:HH:mm:ss Z"]
        }
        geoip {
                database => "/usr/share/logstash/geodb/GeoLite2-City.mmdb"
                source => "Client_IP"
                target => "geoip"
                fields => ["country_name","region_name", "city_name", "ip", "longitude", "latitude", "location"]
                add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
                add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
        }
        useragent {
                source => "User_Agent"
                target => "ua"
        }
        useragent {
                source => "User_Agent"
                target => "ua"
        }
        mutate {
                convert => [ "[geoip][coordinates]", "float" ]
                convert => [ "Http_Status_Code", "integer" ]
                convert => [ "Http_Bytes", "integer" ]
                convert => [ "Request_Time", "float" ]
                convert => [ "Upstream_Response_Time", "float" ]
                # split => ["Http_Request", "?"]
                # add_field => { "Http_URI" => "%{Http_Request[0]}" }
                remove_field => [ "message", "beat", "@version", "auth", "prospector", "source", "offset"]
        }
}
output {
        #stdout {codec => rubydebug}
        elasticsearch {
                hosts => ["elasticsearch:9200"]
                index => "nginx-access-%{+YYYY.MM.dd}"
        }
}