使用Groovy调用第三方地图api更新ElasticSearch日志的ip定位信息

使用Groovy调用腾讯地图api更新ElasticSearch日志的ip定位信息

  • 索引字段约定
  • groovy脚本
  • 加入定时器运行
  • 运行结果
    • 日志输出
    • kibana查看保存的ip地址信息
    • 地图可视化

本次实例软件相关版本
Groovy: 3.0.6
ElasticSearch: 7.9.3
kibana: 7.9.3
第三方地图Api: 腾讯地图 (第三方地图对比)

索引字段约定

  1. 请求日志索引
 PUT /request_log-web
 {
     
    "mappings":{
     
        "properties":{
     
            "location":{
     
                "type":"object",
                "properties":{
     
                    "gps":{
     
                        "type":"geo_point",
                        "ignore_malformed":true
                    },
                    "desc":{
     
                        "type":"keyword",
                        "fields":{
     
                            "wildcard":{
     
                                "type":"wildcard"
                            }
                        }
                    },
                    "ad_info":{
     
                        "type":"object",
                        "properties":{
     
                            "nation":{
     
                                "type":"keyword"
                            },
                            "province":{
     
                                "type":"keyword"
                            },
                            "city":{
     
                                "type":"keyword"
                            },
                            "district":{
     
                                "type":"keyword"
                            },
                            "adcode":{
     
                                "type":"keyword"
                            }
                        }
                    }
                }
            },
            "req_ip":{
     
                "type":"ip"
            }
        }
    }
}
  1. 存储ip地址索引
PUT /ip_address
{
     
  "mappings": {
     
      "properties": {
     
        "ip": {
     
          "type": "ip"
        },
        "createTime": {
     
          "type": "date"
        },
        "location": {
     
          "properties": {
     
            "ad_info": {
     
              "properties": {
     
                "adcode": {
     
                  "type": "keyword"
                },
                "city": {
     
                  "type": "keyword"
                },
                "district": {
     
                  "type": "keyword"
                },
                "nation": {
     
                  "type": "keyword"
                },
                "province": {
     
                  "type": "keyword"
                }
              }
            },
            "gps": {
     
              "type": "geo_point",
              "ignore_malformed": true
            }
          }
        },
        "message": {
     
          "type": "keyword"
        },
        "status": {
     
          "type": "keyword"
        }
      }
  }
}

groovy脚本


import groovy.json.JsonOutput
import groovy.json.JsonSlurperClassic
import groovy.transform.Field;
import java.io.BufferedReader
import java.io.InputStreamReader
import java.io.OutputStreamWriter
import java.net.URL
import java.net.URLConnection

class UpdateGps{
     
    /**
     *
     * @param bulk_count_aggrs 单次聚合ip查询数量单个查询数量
     * @param bulk_count_ip_inser 批量插入ip数量
     * @param count_for_max 最大循环次数
     * @param es_addr   es地址
     * @param logIndex  日志的索引
     * @param ipIndex   存储ip的索引
     * @param api_keys   腾讯地址api key配置
     */
    public UpdateGps(int bulk_count_aggrs=1000,int bulk_count_ip_insert=1000,int count_for_max=10,
                     String es_addr="http://192.168.0.202:9200",
                     String logIndex="request_log-*",
                     String ipIndex="ip_address",
                     List<String> api_keys
    ){
     
        this.bulk_count_aggrs=bulk_count_aggrs;
        this.bulk_count_ip_insert=bulk_count_ip_insert;
        this.count_for_max=count_for_max;
        this.es_addr=es_addr;
        this.ipIndex=ipIndex;
        this.logIndex=logIndex;
        this.api_keys=api_keys;

    }


    //腾讯地址api key配置
    List<String> api_keys;
//工具配置
    static JsonSlurperClassic jsonSlurper = new JsonSlurperClassic()
    static JsonOutput jsonOutput = new JsonOutput()
    static String rowDateFormat = "yyyy-MM-dd HH:mm:ss.SSS"
    static boolean debugInfo=true;  //info日志开关
    static boolean debugNet=false;  //网络日志开关
//elasticsearch 配置
    static es_addr="http://192.168.0.202:9200"
    static String ipIndex="ip_address"; //ip信息
    static String logIndex="request_log-*"; //日志索引

    int bulk_count_aggrs=1000;  //单次聚合ip查询数量单个查询数量
    int bulk_count_ip_insert=1000;  //批量插入ip数量
    int count_for_max=1;    //最大循环次数


    int unknow_ip_size=0;
    int count_for_cur=1;
    String startTime=new Date().format("yyyy-MM-dd HH:mm:ss.SSS");
    int total_ip_insert=0; //总计新增ip个数
    int total_ip_update=0; //总计更新ip个数
    int total_log_update=0; //总计更新行数
    int total_unknow_ip=0; //总计未知ip
/**
 * 入口方法
 * @return
 */
    int unknow_ip_size_temp=0;
    def run(){
     
        long beginTime=System.currentTimeMillis();
        println("====================================\n更新${startTime}之前的数据 bulk_count_aggrs=${bulk_count_aggrs},bulk_count_ip_insert=${bulk_count_ip_insert},count_for_max=${count_for_max},\n====================================")
        logInfo("============================================================================================== BEGIN\t更新日志ip信息 本次最多更新${bulk_count_ip_insert*count_for_max}个ip")
        do{
     
            logInfo("---------------------------------------------------------- BEGIN\t第${count_for_cur}次循环 ")
//查找没有地址信息的日志ip列表 最多1000个
            ArrayList<String> unknowlogIpList = getUnknowlogIpList(startTime,bulk_count_aggrs);
            unknow_ip_size_temp=unknowlogIpList.size()
//第一次批量更新日志 获得不存在ip索引信息的ip
            ArrayList<String> unknowAddressIpList = updateLogAddressList(unknowlogIpList)
//刷新系统ip地址索引 并更新ip信息
            refreshIpAddress(unknowAddressIpList,api_keys)
            /*//第二次找出未知ip
            ArrayList unknowAddressIpList2 = updateLogAddressList(unknowAddressIpList)
            unknow_ip_size=unknowAddressIpList2.size();*/
            logInfo("---------------------------------------------------------- FINLISH\t第${count_for_cur}次循环 ","未知ip数量=${unknow_ip_size_temp}")
            count_for_cur++;
        }
        while (unknow_ip_size_temp==bulk_count_aggrs&&count_for_cur<(count_for_max+1)) //最多更新次数
        logInfo("============================================================================================== FINLISH\t更新日志ip信息",
                "耗时:"+(System.currentTimeMillis()-beginTime)/1000+"s,循环次数:${count_for_cur-1},未知ip个数:${total_unknow_ip},新增ip个数:${total_ip_insert},更新ip个数:${total_ip_update},更新日志行数:${total_log_update}\n\n\n\n")
    }
    String sendPost(String url, String pamare) {
     
        PrintWriter out = null;
        BufferedReader bufferedReader = null;
        String result = "";
        try {
     
            logNet("curl -X POST  -H 'Content-Type: application/json' ${url}\n${pamare}")
            // 打开和URL之间的连接
            HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
            // 设置通用的请求属性
            conn.setRequestMethod("POST");
            conn.setConnectTimeout(4 * 1000);
            conn.setRequestProperty("connection","Keep-Alive");
            conn.setRequestProperty("Accept", "*/*");
            conn.setRequestProperty("Content-Type", "application/json");
            // 发送POST请求必须设置如下两行
            conn.setDoOutput(true);
            conn.setDoInput(true);
            // 获取URLConnection对象对应的输出流
            out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream()));
            out.println(pamare);
            out.flush(); // flush输出流的缓冲
            InputStream is = null;
            if (conn.getResponseCode() >= 400) {
     
                is = conn.getErrorStream();
            } else {
     
                is = conn.getInputStream();
            }
            // 定义BufferedReader输入流来读取URL的响应
            bufferedReader = new BufferedReader(new InputStreamReader(is));
            String line;
            while ((line = bufferedReader.readLine()) != null) {
     
                result += line;
            }
            logNet(jsonOutput.prettyPrint(result))
        } catch (Exception e) {
     
            System.out.println("发送 POST 请求出现异常!" + e);
            e.printStackTrace();
        }
        finally {
          //使用finally块来关闭输出流、输入流
            try {
     
                if (out != null) {
     
                    out.close();
                }
                if (bufferedReader != null) {
     
                    bufferedReader.close();
                }
            } catch (IOException ex) {
     
                ex.printStackTrace();
            }
        }
        return result;
    }

    static String sendGet(String url, String param, boolean isGBK) throws Exception {
     
        PrintWriter out = null;
        BufferedReader bufferedReader = null;
        String result = "";
        try {
     
            logNet("curl -X POST  -H 'Content-Type: application/json' ${url}\n${param}")
            URL realUrl = new URL(url);
            // 打开和URL之间的连接
            URLConnection conn = realUrl.openConnection();
            conn.setRequestProperty("Charset", "UTF-8");
            // 设置通用的请求属性
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setRequestProperty("Content-Type", "application/json");
            conn.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)");
            conn.setConnectTimeout(30000);
            conn.setReadTimeout(30000);
            conn.setRequestMethod("GET");
            // 发送POST请求必须设置如下两行
            conn.setDoOutput(true);
            conn.setDoInput(true);

            // 获取URLConnection对象对应的输出流
            out = new PrintWriter(conn.getOutputStream());
            // 发送请求参数
            out.print(param);
            // flush输出流的缓冲
            out.flush();
            // 定义BufferedReader输入流来读取URL的响应
            //
            String charSetName = "UTF-8";
            if (isGBK) {
     
                charSetName = "GBK";
            } else {
     
                charSetName = "UTF-8";
            }
            bufferedReader = new BufferedReader(new InputStreamReader(
                    conn.getInputStream(), charSetName));
            String line;
            while ((line = bufferedReader.readLine()) != null) {
     
                result += line;
            }
            logNet(jsonOutput.prettyPrint(result))
        } catch (Exception e) {
     
            System.out.println("发送POST请求出现异常!" + e);
            e.printStackTrace();
            throw e;
        }
        // 使用finally块来关闭输出流、输入流
        finally {
     
            try {
     
                if (out != null) {
     
                    out.close();
                }
                if (bufferedReader != null) {
     
                    bufferedReader.close();
                }
            } catch (IOException ex) {
     
                ex.printStackTrace();
            }
        }
        return result;
    }


    def static logInfo(Object... msgs){
     
        if(debugInfo){
     
            println(new Date().format(rowDateFormat)+"\t"+msgs.join(","))
        }
    }
    def static logNet(Object... msgs){
     
        if(debugNet){
     
            println(new Date().format(rowDateFormat)+"\t"+msgs)
        }
    }

/**
 * 获得没有地址信息的ip
 **/
    ArrayList<String> getUnknowlogIpList(startTime,size=1000){
     
        logInfo("BEGIN\t获取没有地址信息的ip","time=${startTime},size=${size}")
        String response=sendGet("${es_addr}/${logIndex}/_search?filter_path=aggregations.*.buckets.key","{\n" +
                "  \"query\": {\n" +
                "    \"bool\": {\n" +
                "       \"must\": [{\"range\": {\"startTime\": {\"lte\": \"${startTime}\"}}}]," +
                "      \"must_not\": [ {\"exists\": {\"field\": \"location.desc\"}}]" +
                "    }\n" +
                "  }, \n" +
                "  \"size\": 0, \n" +
                "  \"aggs\": {\n" +
                "    \"ip\": {\n" +
                "      \"terms\": {\n" +
                "        \"field\": \"req_ip\",\n" +
                "        \"size\": ${size}\n" +
                "      }\n" +
                "    }\n" +
                "  }\n" +
                "}",false);
        // println jsonOutput.prettyPrint(response)
        Map map = jsonSlurper.parseText(response)
        Map aggregations = map.get("aggregations")
        ArrayList<String> ipList= new ArrayList<>();
        if(aggregations!=null){
     
            Map[] buckets = aggregations.get("ip").get("buckets");
            for (bucket in buckets){
     
                ipList.add(bucket.get("key"))
            }
        }
        logInfo("FINLISH\t获取没有地址信息的ip","总计${ipList.size()}")
        return ipList
    }
/**
 * 解析ip地址保存到${ipIndex}索引
 */
    List<Map> refreshIpAddress(List<String> ips=[],api_keys=[]){
     
        if(ips.size()<1){
     return ;}
        List addressList = new ArrayList(); //存储ip地址信息
        logInfo("BEGIN\t第三方工具解析ip","总计${ips.size()}")
        StringBuffer blukStr = new StringBuffer("");
        long beginTime = System.currentTimeMillis()
        int total_ip_insert_temp=0;
        for (int i = 0; i < ips.size(); i++) {
     
            try{
     
                Map es_resp = jsonSlurper.parseText(sendGet("${es_addr}/ip_address/_count?q=ip:${ips[i]}","",false))
                if(es_resp.get("count")>0){
     
                    //logInfo("已存在"+ips[i])
                    continue ;
                }
                //================================================================================== 获取ip地址信息
                Map response = jsonSlurper.parseText("https://apis.map.qq.com/ws/location/v1/ip?key=${api_keys[i%api_keys.size()]}&ip=${ips[i]}".toURL().text)
                sleep((long)(1000/5/api_keys.size())) //单个key qps只有5 , 每天限制1w次
                //==================================================================================封装索引数据
                Map ipAddress= new HashMap();
                ipAddress.put("ip",ips[i])
                ipAddress.put("message",response.get("message"))
                ipAddress.put("status",response.get("status"))
                ipAddress.put("createTime",System.currentTimeMillis())//new Date().format(rowDateFormat)
                if(response["status"]==0){
       //如果能找到就存地址信息
                    Map result = response.get("result")
                    Map location = new HashMap();
                    result.get("ad_info").put("adcode",""+result.get("ad_info").get("adcode"))
                    location.put("ad_info",result.get("ad_info"))
                    location.put("gps",result.get("location").getAt("lat")+","+result.get("location").getAt("lng"))
                    ipAddress.put("location",location)
                    //更新ip信息
                    updateLogAddress(ipAddress.get("ip"),location.get("gps"),jsonOutput.toJson(location.get("ad_info")),ipAddress.get("message"))
                }else{
     
                    //更新ip信息
                    updateLogAddress(ipAddress.get("ip"),"","",ipAddress.get("message"))
                }
                String ipAddressJson = JsonOutput.toJson(ipAddress)
                blukStr.append("{ \"index\" : { \"_index\" : \"${ipIndex}\"} }\n${ipAddressJson}\n")
                //==================================================================================保存索引数据
                if(i==ips.size()-1||(i>0&&i%bulk_count_ip_insert==0)){
     //批量插入
                    //发送请求保存
                    String saveResult = sendPost("${es_addr}/_bulk" ,"${blukStr}");
                    //清除当前缓存
                    blukStr =  new StringBuffer("");
                }
                total_ip_insert_temp++;
                total_ip_insert++;
                //addressList.add(ipAddress)
            }catch(Exception e){
     
                // e.printStackTrace()
                logInfo("ERROR\t第三方工具解析ip","ip=${ips[i]},err=${e.getMessage()}")
            }
        }
        //刷新索引
        logInfo("FINLISH\t 第三方工具解析ip","耗时"+(System.currentTimeMillis()-beginTime)/1000+"s,总计${ips.size()},成功${total_ip_insert_temp},失败${ips.size()-total_ip_insert_temp}")
        logInfo("刷新索引 ${ipIndex}",sendPost("${es_addr}/${ipIndex}/_refresh " ,""))
        return addressList
    }
/**
 * 更新单个ip信息的日志
 */
    def boolean updateLogAddress(String ip,String gps,String ad_info,String desc='1'){
     
        String response = "";
        boolean status=false;
        try{
     
            if(gps){
     
                response = sendPost("${es_addr}/${logIndex}/_update_by_query","{  " +
                        "\"query\": {    \"bool\": {      \"must_not\": [{\"exists\": {\"field\": \"location.desc\"}}],    " +
                        "  \"must\": [{\"term\": {\"req_ip\":{\"value\": \"${ip}\"}}}]     }  },  " +
                        " \"script\": {    \"source\": \"      " +
                        "ctx._source[\\\"location.gps\\\"]=params.gps;\\n       " +
                        " ctx._source[\\\"location.desc\\\"]=params.desc;\\n       " +
                        " ctx._source[\\\"location.ad_info\\\"]=params.ad_info\\n    \",    " +
                        " \"params\": {" +
                        "\"ad_info\":${ad_info}," +
                        "\"desc\":\"${desc}\"," +
                        "\"gps\":\"${gps}\" "+
                        "},  " +
                        "  \"lang\": \"painless\"  }}'")
            }else{
     

                response = sendPost("${es_addr}/${logIndex}/_update_by_query","{  " +
                        "\"query\": {    \"bool\": {      \"must_not\": [{\"exists\": {\"field\": \"location.desc\"}}],    " +
                        "  \"must\": [{\"term\": {\"req_ip\":{\"value\": \"${ip}\"}}}]     }  },  " +
                        " \"script\": {    \"source\": \"      " +
                        " ctx._source[\\\"location.desc\\\"]=params.desc\\n    \",    " +
                        " \"params\": {" +
                        "\"desc\":\"${desc}\"" +
                        "},  " +
                        "  \"lang\": \"painless\"  }}'")
            }
            total_ip_update++
            Map map = jsonSlurper.parseText(response)
            total_log_update+=map.get("updated")
            //logInfo("SUCCESS 更新ip信息 成功",ip,response)
            status=true;
        }catch(Exception e){
     
            //e.printStackTrace()
             logInfo("ERROR\t更新ip信息",ip,response,e.getMessage())
        }
        sleep(50) //限制更新操作并发数量 最多20qps
        return status;
    }
/**
 * 更新日志
 */
    List updateLogAddressList(ips=[]){
     
        if(ips.size()<1){
     return new ArrayList();}
        long beginTime =System.currentTimeMillis()
        logInfo("BEGIN\t更新日志相关ip地址信息,总计${ips.size()}")
        def ipJsonStr=JsonOutput.toJson(ips).toString() //IP地址字符串列表
        Map ipAddressMap = new HashMap(bulk_count_aggrs);   //存储已知ip信息 map
        List unknowIpList = new ArrayList() //存储未知ip信息
        //============================================== 从ip索引里查出传入的ip对应信息
        String response = sendGet("${es_addr}/${ipIndex}/_search?filter_path=hits.hits._source","{\"size\":\"1000\",\"query\": {\"terms\": {\"ip\": ${ipJsonStr}}}}",false);
        //println(jsonOutput.prettyPrint(response))
        Map responseMap = jsonSlurper.parseText(response)
        if(responseMap.size()>0){
     
            List<Map> ipAddressList=responseMap.get("hits").getAt("hits")
            for (ipAddr in ipAddressList){
     
                ipAddressMap.put(ipAddr.get("_source").getAt("ip"),ipAddr.get("_source"))
            }
        }
       // println  "ipAddressMap.size=${ipAddressMap.size()}"
        //============================================ BEGIN批量更新
        int count_suc=0;
        int count_err=0;
        int count_unknow=0;
        for (ip in ips){
     
            try{
     
                if(ipAddressMap.containsKey(ip)){
        //如果能找到ip
                    Map ipAddress= ipAddressMap.get(ip)
                    Map location = ipAddress.get("location")
                    boolean status = false;
                    if(location!=null){
      //如果能找到地址信息
                        //println jsonOutput.toJson(location.getAt("ad_info"))//.replace("\\","\\\\")
                        status = updateLogAddress(ip,location.getAt("gps"),jsonOutput.toJson(location.getAt("ad_info")),ipAddress.get("message"))
                    }else{
     
                        status = updateLogAddress(ip,"","",ipAddress.get("message"))
                    }
                    if(status){
     
                        count_suc++;
                    }else{
     
                        count_unknow++;
                        total_unknow_ip++;
                    }
                }else{
     
                    unknowIpList.add(ip)
                    count_unknow++;
                    total_unknow_ip++;
                }
            }catch(Exception e){
     
                logInfo("ERROR\t更新日志ip地址信息",ip,e.getMessage())
                count_err++;
            }
        }
        logInfo("FINLISH\t更新日志ip地址信息","耗时"+(System.currentTimeMillis()-beginTime)/1000+"s,总计${ips.size()},成功${count_suc},失败:${count_err},未知${count_unknow}")
        logInfo("刷新索引 ${logIndex}",sendPost("${es_addr}/${logIndex}/_refresh " ,""))
        return unknowIpList;
    }
}

/*
while (true){
    new UpdateGps(1000,1,10,"http://192.168.0.202:9200","request_log-*").run()
    sleep(1000*60*10)
}*/
new UpdateGps(500,500,1,"http://192.168.0.202:9200","request_log-*","ip_address",["",""
]).run()

加入定时器运行

编辑定时任务
crontab -e
最下行加入
#每小时运行一次 更新日志ip信息
0 */1 * * * groovy /opt/updateGps.groovy >> /opt/updateGps.log
查看定时任务
crontab -l

运行结果

日志输出

使用Groovy调用第三方地图api更新ElasticSearch日志的ip定位信息_第1张图片

kibana查看保存的ip地址信息

使用Groovy调用第三方地图api更新ElasticSearch日志的ip定位信息_第2张图片

地图可视化

使用Groovy调用第三方地图api更新ElasticSearch日志的ip定位信息_第3张图片

你可能感兴趣的:(ElasticSearch)