由于日志数据存在ES项目里,需要从ES中获取日志进行分析,使用SQL数据进行处理,如下:
select
traceid-- STRING COMMENT '流程id',
,appnum -- BIGINT COMMENT '迭代号',
,appversion --STRING COMMENT 'APP版本',
,appcode -- STRING COMMENT '应用编码',
,type -- STRING COMMENT '类型',
,spanid -- STRING COMMENT '模块id',
,apptype -- STRING COMMENT '应用类型详情见定义',
,eventtime -- DATETIME COMMENT '日期',
,name -- STRING COMMENT '名称',
,id -- STRING COMMENT 'id',
,theid -- STRING COMMENT 'theId'
,preid
-------------data----------------
,GET_JSON_OBJECT(data_tmp,'$.allInOne') AS allInOne
,GET_JSON_OBJECT(data_tmp,'$.class') AS class
,GET_JSON_OBJECT(data_tmp,'$.classCode') AS classCode
,GET_JSON_OBJECT(data_tmp,'$.deviceId') AS deviceId
,GET_JSON_OBJECT(data_tmp,'$.grade') AS grade
,GET_JSON_OBJECT(data_tmp,'$.gradeCode') AS gradeCode
,GET_JSON_OBJECT(data_tmp,'$.handleTime') AS handleTime
,GET_JSON_OBJECT(data_tmp,'$.heigth') AS heigth
,cast(ipint(GET_JSON_OBJECT(json_build,'$.ip')) as string) AS ipAddr
,GET_JSON_OBJECT(data_tmp,'$.isSuccess') AS isSuccess --isSuccess(1.是 0否)
,GET_JSON_OBJECT(data_tmp,'$.loginMode') AS loginMode -- 登陆模式 1 游客登陆 2 账户登陆
,GET_JSON_OBJECT(data_tmp,'$.loginType') AS loginType -- 登陆方式 1:在线登陆 2 离线登陆
,GET_JSON_OBJECT(data_tmp,'$.school') AS school
,GET_JSON_OBJECT(data_tmp,'$.schoolCode') AS schoolCode
,GET_JSON_OBJECT(data_tmp,'$.width') AS width
,GET_JSON_OBJECT(data_tmp,'$.subject') AS subject
,GET_JSON_OBJECT(data_tmp,'$.subjectCode') AS subjectCode
,GET_JSON_OBJECT(data_tmp,'$.classTime') AS classTime
,GET_JSON_OBJECT(data_tmp,'$.reason') AS reason
,GET_JSON_OBJECT(data_tmp,'$.operateVersion') AS operateVersion
----------新增---------
,CASE WHEN GET_JSON_OBJECT(data_tmp,'$.userId') is not NULL THEN GET_JSON_OBJECT(data_tmp,'$.userId')
WHEN GET_JSON_OBJECT(data_tmp,'$.teacherCode') is not null THEN GET_JSON_OBJECT(data_tmp,'$.teacherCode')
ELSE GET_JSON_OBJECT(data_tmp,'$.userId')
END AS userId
,GET_JSON_OBJECT(data_tmp,'$.userName') AS userName
,GET_JSON_OBJECT(data_tmp,'$.userType') AS userType
,GET_JSON_OBJECT(data_tmp,'$.account') AS account
,GET_JSON_OBJECT(data_tmp,'$.courseId') AS courseId
,GET_JSON_OBJECT(data_tmp,'$.pageName') AS pageName
,GET_JSON_OBJECT(data_tmp,'$.pageTitle') AS pageTitle
,CASE WHEN GET_JSON_OBJECT(data_tmp,'$.describe') is not NULL THEN GET_JSON_OBJECT(data_tmp,'$.describe')
WHEN GET_JSON_OBJECT(data_tmp,'$.eventDesc') is not NULL THEN GET_JSON_OBJECT(data_tmp,'$.eventDesc')
ELSE ''
END AS describes
,GET_JSON_OBJECT(data_tmp,'$.source') AS source
,GET_JSON_OBJECT(data_tmp,'$.topDistance') AS topDistance
,GET_JSON_OBJECT(data_tmp,'$.size') AS sizes
---------------json_build--------------------
,GET_JSON_OBJECT(json_build,'$.sysVersion') AS sysVersion
,GET_JSON_OBJECT(json_build,'$.cpuType') AS cpuType
,GET_JSON_OBJECT(json_build,'$.memory') AS memory
,GET_JSON_OBJECT(json_build,'$.netType') AS netType
,GET_JSON_OBJECT(json_build,'$.sysName') AS sysName
,GET_JSON_OBJECT(json_build,'$.deviceModel') AS deviceModel
,GET_JSON_OBJECT(json_build,'$.deviceNo') AS deviceNo
-------------------新增--------------------
,GET_JSON_OBJECT(json_build,'$.screenHeight') AS screenHeight
,GET_JSON_OBJECT(json_build,'$.screenWidth') AS screenWidth
,GET_JSON_OBJECT(json_build,'$.browserName') AS browserName
,GET_JSON_OBJECT(json_build,'$.browserVersion') AS browserVersion
,GET_JSON_OBJECT(json_build,'$.browserWidth') AS browserWidth
,GET_JSON_OBJECT(json_build,'$.browserHeight') AS browserHeight
,GET_JSON_OBJECT(json_build,'$.ip') AS ip
,GET_JSON_OBJECT(json_build,'$.remoteIp') AS remoteIp
,GET_JSON_OBJECT(data_tmp,'$.actionName') AS actionName
,GET_JSON_OBJECT(data_tmp,'$.finishStatus') AS finishStatus
,GET_JSON_OBJECT(data_tmp,'$.isFirst') AS isFirst
,GET_JSON_OBJECT(data_tmp,'$.bankType') AS bankType
,GET_JSON_OBJECT(data_tmp,'$.book') AS book
,GET_JSON_OBJECT(data_tmp,'$.mode') AS mode
,GET_JSON_OBJECT(data_tmp,'$.chapter') AS chapter
,GET_JSON_OBJECT(data_tmp,'$.result') AS result
,GET_JSON_OBJECT(data_tmp,'$.knowledgeCount') AS knowledgeCount
,GET_JSON_OBJECT(data_tmp,'$.questCount') AS questCount
,GET_JSON_OBJECT(data_tmp,'$.scoreType') AS scoreType
,GET_JSON_OBJECT(data_tmp,'$.scoreModule') AS scoreModule
,GET_JSON_OBJECT(data_tmp,'$.appName') AS appName
,GET_JSON_OBJECT(data_tmp,'$.voteNumber') AS voteNumber
,GET_JSON_OBJECT(data_tmp,'$.perVoteNubmer') AS perVoteNubmer
,GET_JSON_OBJECT(data_tmp,'$.type') AS attributeType
--- 新增 2022-12-09 ----
,GET_JSON_OBJECT(data_tmp,'$.loginTypeName') AS loginTypeName
,GET_JSON_OBJECT(data_tmp,'$.name') AS noteName
,GET_JSON_OBJECT(data_tmp,'$.notes') AS notes
,GET_JSON_OBJECT(data_tmp,'$.pageNum') AS pageNum
,GET_JSON_OBJECT(data_tmp,'$.color') AS color
,GET_JSON_OBJECT(data_tmp,'$.event') AS event
,GET_JSON_OBJECT(data_tmp,'$.date') AS switchDate
,GET_JSON_OBJECT(data_tmp,'$.input') AS inputValue
,GET_JSON_OBJECT(data_tmp,'$.title') AS title
,GET_JSON_OBJECT(data_tmp,'$.fileName') AS fileName --文件名
-- 1.文档:doc、docx、PDF
-- 2.音频:WAV、ape、AIFF、CD、AU、MP3、WMA、VQF、FLAC、MIDI、Ogg、U-Law、VOC、aac、RA/.RM/.RAM
-- 3.视频:avi、MOV/.QT、MKV、MP4、WMV、MPEG、BD、HDVD、RMVB、PROPER、R5、Watermarks、TS、DAT、SWF、ASF、3GP、FLV、HDRIP、IMAX
-- 4.课件:ppt、pptx、pps、ppsx、ppa、ppam、pot、potx、thmx
-- 5.图片:Webp、BMP、PCX、TIF、GIF、JPEG、TGA、EXIF、FPX、SVG、PSD、CDR、PCD、DXF、UFO、EPS、AI、PNG、HDRI、RAW、WMF、FLIC、EMF、ICO
-- 6.表格:xls、csv、CSS、XPS、xlsm、et、
-- 7.压缩包:RAR、ZIP、ARJ、Z、LZH、JAR
-- 8.其他
,GET_JSON_OBJECT(data_tmp,'$.fileId') AS fileId
,GET_JSON_OBJECT(data_tmp,'$.fileNames') AS fileNames
,GET_JSON_OBJECT(data_tmp,'$.beginDate') AS beginDate
,GET_JSON_OBJECT(data_tmp,'$.endDate') AS endDate
,GET_JSON_OBJECT(data_tmp,'$.questionId') AS questionId --题号
,GET_JSON_OBJECT(data_tmp,'$.packageName') AS packageName
,GET_JSON_OBJECT(data_tmp,'$.versionName') AS versionName
,GET_JSON_OBJECT(data_tmp,'$.versionCode') AS versionCode
,GET_JSON_OBJECT(data_tmp,'$.jobId') AS jobId
,GET_JSON_OBJECT(data_tmp,'$.answer') AS answer
,GET_JSON_OBJECT(data_tmp,'$.wrong') AS wrong
,GET_JSON_OBJECT(data_tmp,'$.correct') AS correct
,GET_JSON_OBJECT(data_tmp,'$.unanswered') AS unanswered
,GET_JSON_OBJECT(data_tmp,'$.finishNumber') AS finishNumber
,GET_JSON_OBJECT(data_tmp,'$.totalNumber') AS totalNumber
,GET_JSON_OBJECT(data_tmp,'$.word') AS word
,GET_JSON_OBJECT(data_tmp,'$.msg') AS msg
,GET_JSON_OBJECT(data_tmp,'$.count') AS impCount
,GET_JSON_OBJECT(json_build,'$.pageHeight') AS pageHeight --页面高度
,GET_JSON_OBJECT(data_tmp,'$.answers') AS answers --答题情况
--新增--
,GET_JSON_OBJECT(data_tmp,'$.num') AS num --题目数量
,GET_JSON_OBJECT(data_tmp,'$.op') AS op --随机选人 选项 op(清除(NULL)、A、B、C) op(不随机(0)、1、2、3)
,GET_JSON_OBJECT(data_tmp,'$.leaveTime') AS leaveTime --收卷倒计时
,GET_JSON_OBJECT(data_tmp,'$.examId') AS examId --考试id
,GET_JSON_OBJECT(data_tmp,'$.id') AS idCode --id,用英文逗号隔开,组code
,GET_JSON_OBJECT(data_tmp,'$.code') AS code --对调学生
,GET_JSON_OBJECT(data_tmp,'$.rol') AS rol --对调学生 位置rol
,GET_JSON_OBJECT(data_tmp,'$.col') AS col --对调学生 位置col
,GET_JSON_OBJECT(data_tmp,'$.stage') AS stage --学段
,GET_JSON_OBJECT(data_tmp,'$.version') AS versions --学段
-- ,GET_JSON_OBJECT(data_tmp,'$.type') AS 见 attributeType --类型 type(1.批注作答 2.画板作答)
-- ,GET_JSON_OBJECT(data_tmp,'$.actionName') AS actionName --活动名称
-- ,GET_JSON_OBJECT(data_tmp,'$.answer') AS answer --答案
--,GET_JSON_OBJECT(data_tmp,'$.color') AS color --颜色
-- ,GET_JSON_OBJECT(data_tmp,'$.finishNumber') AS finishNumber --找到的数量
--,GET_JSON_OBJECT(data_tmp,'$.totalNumber') AS totalNumber --总词数
-- ,GET_JSON_OBJECT(data_tmp,'$.word') AS word --未答数
-- `completionStatus` varchar(100) DEFAULT NULL COMMENT '完成情况',
-- ,GET_JSON_OBJECT(data_tmp,'$.event') AS event --收起/展开事件
--,GET_JSON_OBJECT(data_tmp,'$.fileId') AS fileId --云端文件ID
--,GET_JSON_OBJECT(data_tmp,'$.fileNames') AS fileNames --文件名列表
--,GET_JSON_OBJECT(data_tmp,'$.fileName') AS fileName --文件名
--,GET_JSON_OBJECT(data_tmp,'$.date') AS dates --日期筛选类型
--,GET_JSON_OBJECT(data_tmp,'$.result') AS isfinish --完成情况 result:true/false
-- ,GET_JSON_OBJECT(data_tmp,'$.input') AS inputValue --输入值
--,GET_JSON_OBJECT(data_tmp,'$.jobId') AS jobId --作业ID
--,GET_JSON_OBJECT(data_tmp,'$.name') AS name --名称
--,GET_JSON_OBJECT(data_tmp,'$.questionId') AS questionId --题目ID
--`jobType` varchar(50) DEFAULT NULL COMMENT '作业类型',
--`noteName` varchar(50) DEFAULT NULL COMMENT '笔记本名称',
--`signName` varchar(100) DEFAULT NULL COMMENT '标签名',
--`switchDate` datetime DEFAULT NULL COMMENT '日期切换日期值',
--`thickNess` varchar(10) DEFAULT NULL COMMENT '粗细值',
--`timeSlot` varchar(10) DEFAULT NULL COMMENT '时间段',
--`toolName` varchar(50) DEFAULT NULL COMMENT '工具名称',
--`wrongBookName` varchar(50) DEFAULT NULL COMMENT '错题本名称',
-- ,GET_JSON_OBJECT(json_build,'$.loginTypeName') AS appcode
-- ,GET_JSON_OBJECT(json_build,'$.appVersion') AS appVersion
,createtime --创建时间
from (
select theid,
id,
name,
eventtime,
apptype,
regexp_replace(regexp_replace(regexp_replace(build,'^\\[',''),'\\]$',''),'},\\{','}|{') AS json_build,
spanid,
type,
appcode,
regexp_replace(regexp_replace(regexp_replace(data,'^\\[',''),'\\]$',''),'},\\{','}|{') AS json_data,
appversion,
appnum,
preid,
traceid,
createtime
from dw_es_action_log_inc_new
WHERE
-- DATETRUNC(eventtime,'hh') >= DATETRUNC(dateadd(TO_DATE('${cyctime}','yyyymmddhhmiss'), -1, 'hh'),'hh') or
DATETRUNC(createtime,'DD') >= DATETRUNC(TO_DATE('${bizdate}','yyyymmdd'),'DD') --测试使用
) a0
lateral view explode(split(json_data,'\\|')) b AS data_tmp;
从中可以发现,针对很多不同格式的 数据,可以进行这种分解处理。
select bh,bjmc,nj,xxbm,xxmc,xnid,xn,xnmc,xd,rnk from (SELECT bh,bjmc,nj,xxbm,xxmc,xnid,xn,xnmc,xd, Row_Number() OVER (partition by bh,xxbm ORDER BY nj desc) rnk
FROM dw_class where zt='1' and bjlxm = '1' and xnid <> '' ) aa where rnk='1'
同时可以使用Row_Number,进行数据处理,获取最大年级数据。