Apache Doris2.0 基于Apache Doris向量化MPP引擎,增加了倒排索引和半结构化JSON数据支持,更好地满足日志存储、检索、分析需求。与基于ES的日志存储方案相比,有如下优势:
CREATE DATABASE testdb;
USE testdb;
CREATE TABLE `httplogs` (
`@timestamp` int(11) NULL COMMENT "",
`clientip` varchar(20) NULL COMMENT "",
`request` text NULL COMMENT "",
`status` int(11) NULL COMMENT "",
`size` int(11) NULL COMMENT "",
INDEX size_idx (`size`) USING INVERTED COMMENT '',
INDEX status_idx (`status`) USING INVERTED COMMENT '',
INDEX clientip_idx (`clientip`) USING INVERTED(NONE) COMMENT '',
INDEX request_idx (`request`) USING INVERTED COMMENT '',
...
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
PARTITION BY RANGE(`@timestamp`)
(PARTITION p181998 VALUES [("-2147483648"), ("894225602")),
PARTITION p191998 VALUES [("894225602"), ("894830402")),
PARTITION p201998 VALUES [("894830402"), ("895435201")),
PARTITION p211998 VALUES [("895435201"), ("896040001")),
PARTITION p221998 VALUES [("896040001"), ("896644801")),
PARTITION p231998 VALUES [("896644801"), ("897249601")),
PARTITION p241998 VALUES [("897249601"), ("897854300")),
PARTITION p251998 VALUES [("897854300"), ("2147483647")))
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 12
PROPERTIES (
"in_memory" = "false",
"storage_format" = "V2",
"compression" = "ZSTD"
)
说明:建表语句中的 …是特殊语法,用于声明可以自动感知数据结构的变化。
{"@timestamp": 893964617, "clientip":"40.135.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"@timestamp": 893964653, "clientip":"232.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"@timestamp": 893964672, "clientip":"26.1.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"@timestamp": 893964679, "clientip":"247.37.0.0", "request": "GET /french/splash_inet.html HTTP/1.0", "status": 200, "size": 3781}
curl --location-trusted -u root: -H "format: json" -H "auto_commit:true" -H "enable_vectorized_engine:true" -H "read_json_by_line:true" -T logfile.json http://127.0.0.1:8030/api/testdb/httplogs/_stream_load
mysql> select * from httplogs;
+------------+------------+---------------------------------------+--------+-------+
| @timestamp | clientip | request | status | size |
+------------+------------+---------------------------------------+--------+-------+
| 893964653 | 232.0.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 |
| 893964679 | 247.37.0.0 | GET /french/splash_inet.html HTTP/1.0 | 200 | 3781 |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 |
| 893964672 | 26.1.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 |
+------------+------------+---------------------------------------+--------+-------+
4 rows in set (0.02 sec)
mysql>
vim logfile-1.json
{"@timestamp": 893964617,"log_type":"error", "clientip":"40.135.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
curl --location-trusted -u root: -H "format: json" -H "auto_commit:true" -H "enable_vectorized_engine:true" -H "read_json_by_line:true" -T logfile-1.json http://127.0.0.1:8030/api/testdb/httplogs/_stream_load
mysql> select * from httplogs;
+------------+------------+---------------------------------------+--------+-------+----------+
| @timestamp | clientip | request | status | size | log_type |
+------------+------------+---------------------------------------+--------+-------+----------+
| 893964653 | 232.0.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964679 | 247.37.0.0 | GET /french/splash_inet.html HTTP/1.0 | 200 | 3781 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | error |
| 893964672 | 26.1.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
+------------+------------+---------------------------------------+--------+-------+----------+
5 rows in set (0.02 sec)
mysql>
可以看下新增的log_type字段已经被自动识别。
mysql> select * from httplogs where request MATCH_ALL 'images ';
+------------+------------+--------------------------------+--------+-------+----------+
| @timestamp | clientip | request | status | size | log_type |
+------------+------------+--------------------------------+--------+-------+----------+
| 893964653 | 232.0.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | error |
| 893964672 | 26.1.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
+------------+------------+--------------------------------+--------+-------+----------+
4 rows in set (0.01 sec)
mysql>
mysql> select * from httplogs where request MATCH_ALL 'images test ';
Empty set (0.01 sec)
mysql>
mysql> select * from httplogs where request MATCH_ANY 'images test ';
+------------+------------+--------------------------------+--------+-------+----------+
| @timestamp | clientip | request | status | size | log_type |
+------------+------------+--------------------------------+--------+-------+----------+
| 893964653 | 232.0.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | error |
| 893964672 | 26.1.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
+------------+------------+--------------------------------+--------+-------+----------+
4 rows in set (0.01 sec)
mysql>
mysql> select * from httplogs where request MATCH 'images test ';
+------------+------------+--------------------------------+--------+-------+----------+
| @timestamp | clientip | request | status | size | log_type |
+------------+------------+--------------------------------+--------+-------+----------+
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964617 | 40.135.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | error |
| 893964672 | 26.1.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
| 893964653 | 232.0.0.0 | GET /images/hm_bg.jpg HTTP/1.0 | 200 | 24736 | NULL |
+------------+------------+--------------------------------+--------+-------+----------+
4 rows in set (0.02 sec)