Lucene 迁移到 elasticsearch

项目是和邮件相关,需要索引邮件的一些属性。Lucene的版本是:Lucene-2.1.0,并且还魔改了Lucene源码,所以现在升级Lucene是个问题。最后领导决定尝试elasticsearch(ES)。目前索引的存储结构是按收件日期的月份存储,也就是收件日期在同一个月的邮件放在同一个索引下,索引名称就是年份和月份(如:201810)。

ES索引模板

因为索引的邮件属性是一定的,所以每个索引的结构也就是一定的。

PUT _template/index_template
{
	"index_patterns": ["*"],
    "settings": {
		"index": {
            "number_of_shards": 1,
            "number_of_replicas": 1,
			"mapping.coerce": false,
			"refresh_interval": "30s",
			"analysis": {
				"analyzer": {
					"default": { 
						"type": "standard",
						"stopwords": "_english_",
						"filter": ["lowercase"]
					}
				},
				"normalizer": {
					"custom_normalizer": {
						"type": "custom",
						"char_filter": [],
						"filter": ["lowercase", "asciifolding"]
					}
				}
		    }
	    }
    },
    "mappings": {
        "_doc": {
			"dynamic": false,
			"_source": {
				"excludes": ["from_name", "from_domain", "to_name", "to_domain", "subject_sort", "content", "attachment", "combinatorial"]
			},
			"_field_names": {
				"enabled": false
			},
            "properties": {
				"host": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer"
				},
                "message_id": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer"
				},
				"from": {
					"type": "keyword", 
					"normalizer": "custom_normalizer",
					"copy_to": ["combinatorial"]
				},
				"from_name": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer",
					"copy_to": ["combinatorial"]
				},
				"from_domain": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer"
				},
				"to": {
					"type": "keyword", 
					"normalizer": "custom_normalizer",
					"copy_to": ["combinatorial"]
				},
				"to_name": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer",
					"copy_to": ["combinatorial"]
				},
				"to_domain": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer"
				},
				"subject": {
					"type": "text", 
					"norms": false,
					"copy_to": ["subject_sort", "combinatorial"]
				},
				"subject_sort": { 
                    "type": "keyword", 
					"normalizer": "custom_normalizer"
                },
				"content": {
					"type": "text", 
					"norms": false,
					"copy_to": ["combinatorial"]
				},
				"attachment": {
					"type": "text", 
					"norms": false,
					"copy_to": ["combinatorial"]
				},
				"attachment_name": {
					"type": "keyword", 
					"doc_values": false, 
					"normalizer": "custom_normalizer"
				},
				"attachment_size": {
					"type": "long", 
					"doc_values": false
				},
				"combinatorial": {
					"type": "text", 
					"norms": false
				},
				"date": {
					"type": "long" 
				},
				"size": {
					"type": "long"
				}
            }
        }
    }
}

转载于:https://my.oschina.net/5icode/blog/2253294

你可能感兴趣的:(Lucene 迁移到 elasticsearch)