ES数据处理方法

由于日志数据存在ES项目里,需要从ES中获取日志进行分析,使用SQL数据进行处理,如下:

select

     traceid--    STRING   COMMENT '流程id',

    ,appnum  --   BIGINT   COMMENT '迭代号',

    ,appversion --STRING   COMMENT 'APP版本',    

    ,appcode   -- STRING   COMMENT '应用编码',

    ,type     --  STRING   COMMENT '类型',

    ,spanid   --  STRING   COMMENT '模块id',  

    ,apptype   -- STRING   COMMENT '应用类型详情见定义',

    ,eventtime -- DATETIME COMMENT '日期',

    ,name      -- STRING   COMMENT '名称',

    ,id        -- STRING   COMMENT 'id',

    ,theid     -- STRING   COMMENT 'theId'  

    ,preid

    -------------data----------------    

    ,GET_JSON_OBJECT(data_tmp,'$.allInOne') AS allInOne  

    ,GET_JSON_OBJECT(data_tmp,'$.class')      AS class

    ,GET_JSON_OBJECT(data_tmp,'$.classCode')      AS classCode

    ,GET_JSON_OBJECT(data_tmp,'$.deviceId')     AS deviceId

    ,GET_JSON_OBJECT(data_tmp,'$.grade')   AS grade

    ,GET_JSON_OBJECT(data_tmp,'$.gradeCode') AS gradeCode

    ,GET_JSON_OBJECT(data_tmp,'$.handleTime') AS handleTime

    ,GET_JSON_OBJECT(data_tmp,'$.heigth') AS heigth

    ,cast(ipint(GET_JSON_OBJECT(json_build,'$.ip')) as string) AS ipAddr

    ,GET_JSON_OBJECT(data_tmp,'$.isSuccess') AS isSuccess     --isSuccess(1.是 0否)

    ,GET_JSON_OBJECT(data_tmp,'$.loginMode') AS loginMode     -- 登陆模式 1 游客登陆 2 账户登陆

    ,GET_JSON_OBJECT(data_tmp,'$.loginType') AS loginType     -- 登陆方式 1:在线登陆 2 离线登陆

    ,GET_JSON_OBJECT(data_tmp,'$.school') AS school

    ,GET_JSON_OBJECT(data_tmp,'$.schoolCode') AS schoolCode  

    ,GET_JSON_OBJECT(data_tmp,'$.width') AS width

    ,GET_JSON_OBJECT(data_tmp,'$.subject') AS subject

    ,GET_JSON_OBJECT(data_tmp,'$.subjectCode') AS subjectCode

    ,GET_JSON_OBJECT(data_tmp,'$.classTime') AS classTime

    ,GET_JSON_OBJECT(data_tmp,'$.reason') AS reason

    ,GET_JSON_OBJECT(data_tmp,'$.operateVersion') AS operateVersion  

    ----------新增---------

    ,CASE WHEN GET_JSON_OBJECT(data_tmp,'$.userId')  is not NULL THEN GET_JSON_OBJECT(data_tmp,'$.userId')

          WHEN GET_JSON_OBJECT(data_tmp,'$.teacherCode') is not null THEN  GET_JSON_OBJECT(data_tmp,'$.teacherCode')

          ELSE  GET_JSON_OBJECT(data_tmp,'$.userId')

     END  AS userId          

    ,GET_JSON_OBJECT(data_tmp,'$.userName') AS userName

    ,GET_JSON_OBJECT(data_tmp,'$.userType') AS userType

    ,GET_JSON_OBJECT(data_tmp,'$.account') AS account

    ,GET_JSON_OBJECT(data_tmp,'$.courseId') AS courseId  

    ,GET_JSON_OBJECT(data_tmp,'$.pageName') AS pageName  

    ,GET_JSON_OBJECT(data_tmp,'$.pageTitle') AS pageTitle  

    ,CASE    WHEN GET_JSON_OBJECT(data_tmp,'$.describe') is not NULL  THEN GET_JSON_OBJECT(data_tmp,'$.describe')

             WHEN GET_JSON_OBJECT(data_tmp,'$.eventDesc') is not NULL  THEN GET_JSON_OBJECT(data_tmp,'$.eventDesc')

             ELSE  ''

             END AS  describes    

    ,GET_JSON_OBJECT(data_tmp,'$.source') AS source      

    ,GET_JSON_OBJECT(data_tmp,'$.topDistance') AS topDistance  

    ,GET_JSON_OBJECT(data_tmp,'$.size') AS sizes  

   ---------------json_build--------------------

    ,GET_JSON_OBJECT(json_build,'$.sysVersion')   AS sysVersion    

    ,GET_JSON_OBJECT(json_build,'$.cpuType')     AS cpuType  

    ,GET_JSON_OBJECT(json_build,'$.memory') AS memory

    ,GET_JSON_OBJECT(json_build,'$.netType') AS netType

    ,GET_JSON_OBJECT(json_build,'$.sysName') AS sysName

    ,GET_JSON_OBJECT(json_build,'$.deviceModel') AS deviceModel

    ,GET_JSON_OBJECT(json_build,'$.deviceNo') AS deviceNo  

   -------------------新增--------------------

   ,GET_JSON_OBJECT(json_build,'$.screenHeight') AS screenHeight  

   ,GET_JSON_OBJECT(json_build,'$.screenWidth') AS screenWidth  

   ,GET_JSON_OBJECT(json_build,'$.browserName') AS browserName

   ,GET_JSON_OBJECT(json_build,'$.browserVersion') AS browserVersion

   ,GET_JSON_OBJECT(json_build,'$.browserWidth') AS browserWidth

   ,GET_JSON_OBJECT(json_build,'$.browserHeight') AS browserHeight

   ,GET_JSON_OBJECT(json_build,'$.ip') AS ip

   ,GET_JSON_OBJECT(json_build,'$.remoteIp') AS remoteIp

   ,GET_JSON_OBJECT(data_tmp,'$.actionName') AS actionName

   ,GET_JSON_OBJECT(data_tmp,'$.finishStatus') AS finishStatus

   ,GET_JSON_OBJECT(data_tmp,'$.isFirst') AS isFirst

   ,GET_JSON_OBJECT(data_tmp,'$.bankType') AS bankType

   ,GET_JSON_OBJECT(data_tmp,'$.book') AS book

   ,GET_JSON_OBJECT(data_tmp,'$.mode') AS mode

   ,GET_JSON_OBJECT(data_tmp,'$.chapter') AS chapter

   ,GET_JSON_OBJECT(data_tmp,'$.result') AS result

   ,GET_JSON_OBJECT(data_tmp,'$.knowledgeCount') AS knowledgeCount

   ,GET_JSON_OBJECT(data_tmp,'$.questCount') AS questCount

   ,GET_JSON_OBJECT(data_tmp,'$.scoreType') AS scoreType

   ,GET_JSON_OBJECT(data_tmp,'$.scoreModule') AS scoreModule

   ,GET_JSON_OBJECT(data_tmp,'$.appName') AS appName

   ,GET_JSON_OBJECT(data_tmp,'$.voteNumber') AS voteNumber

   ,GET_JSON_OBJECT(data_tmp,'$.perVoteNubmer') AS perVoteNubmer

   ,GET_JSON_OBJECT(data_tmp,'$.type') AS attributeType

--- 新增 2022-12-09 ----

   ,GET_JSON_OBJECT(data_tmp,'$.loginTypeName') AS loginTypeName

   ,GET_JSON_OBJECT(data_tmp,'$.name') AS noteName

   ,GET_JSON_OBJECT(data_tmp,'$.notes') AS notes

   ,GET_JSON_OBJECT(data_tmp,'$.pageNum') AS pageNum

   ,GET_JSON_OBJECT(data_tmp,'$.color') AS color  

   ,GET_JSON_OBJECT(data_tmp,'$.event') AS event

   ,GET_JSON_OBJECT(data_tmp,'$.date') AS  switchDate

   ,GET_JSON_OBJECT(data_tmp,'$.input') AS  inputValue  

   ,GET_JSON_OBJECT(data_tmp,'$.title') AS  title

   ,GET_JSON_OBJECT(data_tmp,'$.fileName') AS  fileName   --文件名

-- 1.文档:doc、docx、PDF

-- 2.音频:WAV、ape、AIFF、CD、AU、MP3、WMA、VQF、FLAC、MIDI、Ogg、U-Law、VOC、aac、RA/.RM/.RAM

-- 3.视频:avi、MOV/.QT、MKV、MP4、WMV、MPEG、BD、HDVD、RMVB、PROPER、R5、Watermarks、TS、DAT、SWF、ASF、3GP、FLV、HDRIP、IMAX

-- 4.课件:ppt、pptx、pps、ppsx、ppa、ppam、pot、potx、thmx

-- 5.图片:Webp、BMP、PCX、TIF、GIF、JPEG、TGA、EXIF、FPX、SVG、PSD、CDR、PCD、DXF、UFO、EPS、AI、PNG、HDRI、RAW、WMF、FLIC、EMF、ICO

-- 6.表格:xls、csv、CSS、XPS、xlsm、et、

-- 7.压缩包:RAR、ZIP、ARJ、Z、LZH、JAR

-- 8.其他

   ,GET_JSON_OBJECT(data_tmp,'$.fileId') AS  fileId      

   ,GET_JSON_OBJECT(data_tmp,'$.fileNames') AS  fileNames  

   ,GET_JSON_OBJECT(data_tmp,'$.beginDate') AS  beginDate    

   ,GET_JSON_OBJECT(data_tmp,'$.endDate') AS  endDate  

   ,GET_JSON_OBJECT(data_tmp,'$.questionId') AS  questionId   --题号

   ,GET_JSON_OBJECT(data_tmp,'$.packageName') AS  packageName  

   ,GET_JSON_OBJECT(data_tmp,'$.versionName') AS  versionName    

   ,GET_JSON_OBJECT(data_tmp,'$.versionCode') AS  versionCode

   ,GET_JSON_OBJECT(data_tmp,'$.jobId') AS  jobId

   ,GET_JSON_OBJECT(data_tmp,'$.answer') AS  answer

   ,GET_JSON_OBJECT(data_tmp,'$.wrong') AS  wrong    

   ,GET_JSON_OBJECT(data_tmp,'$.correct') AS  correct

   ,GET_JSON_OBJECT(data_tmp,'$.unanswered') AS  unanswered  

   ,GET_JSON_OBJECT(data_tmp,'$.finishNumber') AS  finishNumber  

   ,GET_JSON_OBJECT(data_tmp,'$.totalNumber') AS  totalNumber  

   ,GET_JSON_OBJECT(data_tmp,'$.word') AS  word  

   ,GET_JSON_OBJECT(data_tmp,'$.msg') AS  msg  

   ,GET_JSON_OBJECT(data_tmp,'$.count') AS  impCount  

   ,GET_JSON_OBJECT(json_build,'$.pageHeight') AS pageHeight   --页面高度

   ,GET_JSON_OBJECT(data_tmp,'$.answers') AS  answers --答题情况

   --新增--

   ,GET_JSON_OBJECT(data_tmp,'$.num') AS  num --题目数量

   ,GET_JSON_OBJECT(data_tmp,'$.op') AS  op --随机选人 选项 op(清除(NULL)、A、B、C) op(不随机(0)、1、2、3)

   ,GET_JSON_OBJECT(data_tmp,'$.leaveTime') AS  leaveTime --收卷倒计时

   ,GET_JSON_OBJECT(data_tmp,'$.examId') AS  examId --考试id

   ,GET_JSON_OBJECT(data_tmp,'$.id') AS  idCode --id,用英文逗号隔开,组code

   ,GET_JSON_OBJECT(data_tmp,'$.code') AS  code --对调学生

   ,GET_JSON_OBJECT(data_tmp,'$.rol') AS  rol --对调学生 位置rol

   ,GET_JSON_OBJECT(data_tmp,'$.col') AS  col --对调学生 位置col

   ,GET_JSON_OBJECT(data_tmp,'$.stage') AS  stage --学段

   ,GET_JSON_OBJECT(data_tmp,'$.version') AS  versions --学段

--    ,GET_JSON_OBJECT(data_tmp,'$.type') AS  见 attributeType --类型 type(1.批注作答 2.画板作答)

  -- ,GET_JSON_OBJECT(data_tmp,'$.actionName') AS actionName      --活动名称

  -- ,GET_JSON_OBJECT(data_tmp,'$.answer') AS  answer    --答案

   --,GET_JSON_OBJECT(data_tmp,'$.color') AS  color --颜色

  -- ,GET_JSON_OBJECT(data_tmp,'$.finishNumber') AS  finishNumber --找到的数量

   --,GET_JSON_OBJECT(data_tmp,'$.totalNumber') AS  totalNumber --总词数

  -- ,GET_JSON_OBJECT(data_tmp,'$.word') AS  word --未答数

  -- `completionStatus` varchar(100) DEFAULT NULL COMMENT '完成情况',

 -- ,GET_JSON_OBJECT(data_tmp,'$.event') AS  event --收起/展开事件

  --,GET_JSON_OBJECT(data_tmp,'$.fileId') AS  fileId --云端文件ID

  --,GET_JSON_OBJECT(data_tmp,'$.fileNames') AS  fileNames --文件名列表

  --,GET_JSON_OBJECT(data_tmp,'$.fileName') AS  fileName --文件名

  --,GET_JSON_OBJECT(data_tmp,'$.date') AS    dates --日期筛选类型

  --,GET_JSON_OBJECT(data_tmp,'$.result') AS  isfinish --完成情况 result:true/false

 -- ,GET_JSON_OBJECT(data_tmp,'$.input') AS  inputValue --输入值

  --,GET_JSON_OBJECT(data_tmp,'$.jobId') AS  jobId --作业ID

  --,GET_JSON_OBJECT(data_tmp,'$.name') AS  name --名称

  --,GET_JSON_OBJECT(data_tmp,'$.questionId') AS  questionId --题目ID

  --`jobType` varchar(50) DEFAULT NULL COMMENT '作业类型',

  --`noteName` varchar(50) DEFAULT NULL COMMENT '笔记本名称',

  --`signName` varchar(100) DEFAULT NULL COMMENT '标签名',  

  --`switchDate` datetime DEFAULT NULL COMMENT '日期切换日期值',

  --`thickNess` varchar(10) DEFAULT NULL COMMENT '粗细值',

  --`timeSlot` varchar(10) DEFAULT NULL COMMENT '时间段',

  --`toolName` varchar(50) DEFAULT NULL COMMENT '工具名称',

  --`wrongBookName` varchar(50) DEFAULT NULL COMMENT '错题本名称',

--  ,GET_JSON_OBJECT(json_build,'$.loginTypeName') AS appcode

--    ,GET_JSON_OBJECT(json_build,'$.appVersion') AS appVersion

,createtime  --创建时间

from (

select  theid,

                    id,

                    name,

                    eventtime,

                    apptype,

                     regexp_replace(regexp_replace(regexp_replace(build,'^\\[',''),'\\]$',''),'},\\{','}|{') AS json_build,

                    spanid,

                    type,

                    appcode,

                    regexp_replace(regexp_replace(regexp_replace(data,'^\\[',''),'\\]$',''),'},\\{','}|{') AS json_data,

                    appversion,

                    appnum,

                    preid,

                    traceid,

                    createtime

                     from  dw_es_action_log_inc_new

                    WHERE

                   --  DATETRUNC(eventtime,'hh') >=  DATETRUNC(dateadd(TO_DATE('${cyctime}','yyyymmddhhmiss'), -1, 'hh'),'hh')     or    

                   

                     DATETRUNC(createtime,'DD') >=  DATETRUNC(TO_DATE('${bizdate}','yyyymmdd'),'DD')  --测试使用            

) a0

lateral view explode(split(json_data,'\\|')) b AS data_tmp;

从中可以发现,针对很多不同格式的 数据,可以进行这种分解处理。

select bh,bjmc,nj,xxbm,xxmc,xnid,xn,xnmc,xd,rnk from (SELECT bh,bjmc,nj,xxbm,xxmc,xnid,xn,xnmc,xd, Row_Number() OVER (partition by bh,xxbm ORDER BY nj desc) rnk

FROM dw_class where  zt='1'  and  bjlxm = '1' and xnid <> '' ) aa where rnk='1'  

同时可以使用Row_Number,进行数据处理,获取最大年级数据。

你可能感兴趣的:(elasticsearch,java,前端)