将elasticsearch-hadoop.jar添加到hive的classpath路径中
add jar /path/elasticsearch-hadoop.jar;
bin/hive -hiveconf hive.aux.jars.path=/path/elasticsearch-hadoop.jar
hive.aux.jars.path
/path/elasticsearch-hadoop.jar
A comma separated list (with no spaces) of the jar files
CREATE EXTERNAL TABLE artists (...)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'radio/artists',
'es.index.auto.create' = 'false');
默认使用Hive的表schema映射Es中的数据(使用对应的列名和类型),但是可能
存在在hive中可使用的名字,但是在Es中不可使用,这种情况下,可以使用es.mapping.names参数进行映射,格式如下:Hive field name: ElasticSearch field name
CREATE EXTERNAL TABLE artists (...)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'radio/artists',
'es.mapping.names' = 'date:@timestamp, url:url_123');
上面的例子是:Hive中的date列映射到Es中的@timestamp,Hive中的Url列映射到Es中的url_123
1、Hive是大小写不敏感,但是Es是大小写敏感,elasticsearch-hadoop会转换Hive列到小写,所以,尽量使用小写列名。
2、Hive将丢失的值设置为NULL值,要在Es中进行测试,看看是否满足查询条件
CREATE EXTERNAL TABLE artists (
id BIGINT,
name STRING,
links STRUCT)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'radio/artists');
-- insert data to Elasticsearch from another table called 'source'
INSERT OVERWRITE TABLE artists
SELECT NULL, s.name, named_struct('url', s.url, 'picture', s.picture)
FROM source s;
有时需要设置Es中文档的ID,可是用es.mapping.id参数进行设置。例如,Hive的id作为Es中文档的ID。
CREATE EXTERNAL TABLE artists (
id BIGINT,
...)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.mapping.id' = 'id'...);
CREATE EXTERNAL TABLE json (data STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = '...',
'es.input.json` = 'yes');
CREATE EXTERNAL TABLE media (
name STRING,
type STRING,
year STRING,
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'my-collection-{type}/doc');
{
"media_type":"music",
"title":"Surfing With The Alien",
"year":"1987"
}
CREATE EXTERNAL TABLE json (data STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'my-collection-{media_type}/doc',
'es.input.json` = 'yes');
CREATE EXTERNAL TABLE artists (
id BIGINT,
name STRING,
links STRUCT)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'radio/artists',
'es.query' = '?q=me*');
-- stream data from Elasticsearch
SELECT * FROM artists;
Hive类型 | ElasticSearch类型 |
---|---|
void | null |
boolean | boolean |
tinyint | byte |
smallint | short |
int | int |
bigint | long |
double | double |
float | float |
string | string |
binary | binary |
timestamp | date |
struct | map |
map | map |
array | array |
union | 不支持 |
decimal | string |
date | date |
varchar | string |
char | string |