DARPA-TC-engagement5-theia部分数据格式分析

转换出来的jsons数据主要分为四大类:EventSubjectObjectPrincipal,分别代表系统事件、主体、客体和用户。各种大类中子类的数量,取决于CDM的版本。ShadeWatcher使用的是e3的数据,采用的是CDM18,而e5默认使用的是CDM20。相较于cdm18,发生了一些变化,主要是改变了一些字段,增加了一些类型。这些类型大多是系统调用,用于细化事件类型。本文以ShadeWatcher和转换出来的小样本数据为参考依据对数据格式进行分析,可能存在遗漏。json数据样例(只包含EVENT)可在我GitHub进行下载,或是参考之前的博客自行转换。DARPA TC-engagement5数据集解析为json格式输出到本地

DARPA-TC-engagement5-theia部分数据格式分析_第1张图片

1. Event

e3数据集采用cdm18进行解析,Event一共有23种类型。ShadeWatcher采用了其中的19种类型,丢弃了4种类型EVENT_BOOTEVENT_MMAPEVENT_OTHEREVENT_MPROTECT。cdm20新增了五种,目测这几个类型的数据占比都不大。

syscallMap["EVENT_EXECUTE"] = SyscallType_t::Execve;
syscallMap["EVENT_CLONE"] = SyscallType_t::Clone;
syscallMap["EVENT_FORK"] = SyscallType_t::Clone;
syscallMap["EVENT_OPEN"] = SyscallType_t::Open;
syscallMap["EVENT_CLOSE"] = SyscallType_t::Close;
syscallMap["EVENT_CONNECT"] = SyscallType_t::Connect;
syscallMap["EVENT_UNLINK"] = SyscallType_t::Delete;
syscallMap["EVENT_READ"] = SyscallType_t::Read;
syscallMap["EVENT_WRITE"] = SyscallType_t::Write;
syscallMap["EVENT_RECVFROM"] = SyscallType_t::Recvfrom;
syscallMap["EVENT_SENDTO"] = SyscallType_t::Sendto;
syscallMap["EVENT_RECVMSG"] = SyscallType_t::Recvmsg;
syscallMap["EVENT_SENDMSG"] = SyscallType_t::Sendmsg;
syscallMap["EVENT_RENAME"] = SyscallType_t::Rename;
syscallMap["EVENT_READ_SOCKET_PARAMS"] = SyscallType_t::Recv;
syscallMap["EVENT_WRITE_SOCKET_PARAMS"] = SyscallType_t::Send;
syscallMap["EVENT_LOADLIBRARY"] = SyscallType_t::Load;
syscallMap["EVENT_CREATE_OBJECT"] = SyscallType_t::Create;
syscallMap["EVENT_UPDATE"] = SyscallType_t::Update;

一个数据样例如下所示:

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_EVENT",
    "#":"会话号,shadewatcher采用了这个字段但是没有采用它的值,而是将一个图视为一个会话",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.Event": {
            "#":"第二个对象,update和rename事件会有两个对象(对应两条边),其余对象该字段为全0",
            "predicateObject2": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "00000000-0000-0000-0000-000000000000"
            },
            "predicateObjectPath": null,
            "subject": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "2A266F68-012B-5E22-9CA7-575CE8BEE27C"
            },
            "programPoint": null,
            "properties": {
                "map": {}
            },
            "predicateObject": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "B5AF11CE-7902-5F60-8E72-4ECB30FDAEDA"
            },
            "threadId": {
                "int": 1958
            },
            "predicateObject2Path": null,
            "type": "EVENT_READ",
            "uuid": "FD4496E1-54A8-598C-9408-5E123500A8D4",
            "size": {
                "long": 272
            },
   
            "timestampNanos": "1557235299707",
            "names": null,
            "parameters": null,
            "#":"表示事件相对于同一执行线程中的其他事件的逻辑顺序",
            "sequence": {
                "long": 1
            },
            "location": null
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2019-05-07T13:21:39.707Z"
}

2. Subject

Subject有且只有一种,那就是进程。

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_SUBJECT",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.Subject": {
            "privilegeLevel": null,
            "unitId": null,
            "#":"ppid是父进程的进程号,path为程序地址,其余字段为一些文件权限",
            "properties": {
                "map": {
                    "sgid": "1003",
                    "suid": "1003",
                    "egid": "1003",
                    "gid": "1003",
                    "uid": "1003",
                    "tgid": "1911",
                    "fsgid": "1003",
                    "fsuid": "1003",
                    "euid": "1003",
                    "path": "/usr/lib/gvfs/gvfs-afc-volume-monitor",
                    "ppid": "1"
                }
            },
            "iteration": null,
            "type": "SUBJECT_PROCESS",
            "uuid": "C6A9DF04-D14A-57F2-97C3-3CBE2C0FF4FF",
            "parentSubject": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "DD56B598-9E74-58C3-B3E8-2C623780B8ED"
            },
            "importedLibraries": null,
            "#":"进程号",
            "cid": 1912,
            "localPrincipal": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "991869FF-5610-5CCB-9BA4-346353351B12"
            },
            "startTimestampNanos": {
                "long": 1557235386887758779
            },
            "count": null,
            "#":"内核启动参数,相当于是启动该进程的命令",
            "cmdLine": {
                "string": "/usr/lib/gvfs/gvfs-afc-volume-monitor"
            },
            "exportedLibraries": null
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2023-08-08T02:36:47.351Z"
}

3. Object

object一共4种,分别为RECORD_MEMORY_OBJECTRECORD_IPC_OBJECTRECORD_FILE_OBJECTRECORD_NET_FLOW_OBJECT。ShadeWatcher中只采用了后两种。

3.1 memory_object

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_MEMORY_OBJECT",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.MemoryObject": {
            "pageNumber": null,
            "baseObject": {
                "epoch": null,
                "properties": {
                    "map": {
                        "rc": "0"
                    }
                },
                "permission": null
            },
            "uuid": "B83AA80F-B1CD-5E10-B8A4-365281753277",
            "memoryAddress": 139867157049344,
            "pageOffset": null,
            "size": {
                "long": 2327040
            }
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2023-08-08T02:36:40.315Z"
}

3.2 ipc_object

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_IPC_OBJECT",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.IpcObject": {
            "uuid1": null,
            "baseObject": {
                "epoch": null,
                "properties": {
                    "map": {
                        "path": "@/tmp/.X11-unix/X0"
                    }
                },
                "permission": null
            },
            "type": "IPC_OBJECT_SOCKET_ABSTRACT",
            "uuid": "B5AF11CE-7902-5F60-8E72-4ECB30FDAEDA",
            "fd1": null,
            "uuid2": null,
            "fd2": null
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2023-08-08T02:36:40.327Z"
}

3.3 file_object

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_FILE_OBJECT",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.FileObject": {
            "fileDescriptor": null,
            "hashes": null,
            "peInfo": null,
            "localPrincipal": {
                "com.bbn.tc.schema.avro.cdm20.UUID": "B6C54489-38A0-5F50-A60A-FD8D76219CAE"
            },
            "baseObject": {
                "epoch": null,
                "properties": {
                    "map": {
                        "uid": "0",
                        "inode": "0x520ca3",
                        "mode": "0",
                        "dev": "0xfd00001",
                        "#":"少部分文件对象不具有这个字段",
                        "filename": "/lib/x86_64-linux-gnu/libutil-2.15.so",
                        "ids": "0/0",
                        "gid": "0"
                    }
                },
                "permission": null
            },
            "type": "FILE_OBJECT_BLOCK",
            "uuid": "0100D00F-A30C-5200-0000-0000BB90005A",
            "size": null
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2023-08-08T02:36:40.827Z"
}

3.4 netflow_object

{
    "CDMVersion": "20",
    "source": "SOURCE_LINUX_THEIA",
    "type": "RECORD_NET_FLOW_OBJECT",
    "sessionNumber": 5,
    "datum": {
        "com.bbn.tc.schema.avro.cdm20.NetFlowObject": {
            "fileDescriptor": null,
            "localAddress": {
                "string": "10.0.6.60"
            },
            "remoteAddress": {
                "string": "10.0.4.2"
            },
            "localPort": {
                "int": 22
            },
            "remotePort": {
                "int": 36764
            },
            "ipProtocol": null,
            "baseObject": {
                "epoch": null,
                "properties": {
                    "map": {}
                },
                "permission": null
            },
            "uuid": "0A00063C-1600-0A00-0402-9C8F00000000",
            "initTcpSeqNum": null
        }
    },
    "hostId": "37345038-89F2-5899-8FD2-B6D0844A7DBF",
    "@timestamp": "2023-08-08T02:36:40.324Z"
}

你可能感兴趣的:(数据集,DRAPA,TC)