ElaticSearch 6.7.1 pdf、word、ppt、excel、txt文件查询

  1. 安装插件
    es根目录下面执行命令,安装ingest-attachment插件。
     

    ./bin/elasticsearch-plugin install ingest-attachment
  2. 建立自己的文本抽取管道pipeline。
     

    curl -X PUT "localhost:9200/_ingest/pipeline/attachment" -d '{
    	"description": "Extract attachment information",
    	"processors": [
    		{
    			"attachment": {
    				"field": "content",
    				"ignore_missing": true
    			}
    		},
    		{
    			"remove": {
    				"field": "content"
    			}
    		}
    	]
    }'

     

  3. 创建新的索引
     

    curl -X PUT "localhost:9200/zxkfdetail_poc" '{
    	"settings": {
    		"index": {
    			"number_of_shards": 5,
    			"number_of_replicas": 2
    		}
    	}
    }'
    
    curl -X POST "http://localhost:9200/zxkfdetail_poc/zxkfdetail_poc/_mapping" '
    {
    	"zxkfdetail_poc": {
    		"properties": {
    			"agentName": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"agentid": {
    				"type": "long"
    			},
    			"annex": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"attachment": {
    				"properties": {
    					"author": {
    						"type": "text",
    						"fields": {
    							"keyword": {
    								"type": "keyword",
    								"ignore_above": 256
    							}
    						}
    					},
    					"content": {
    						"type": "text",
    						"fields": {
    							"keyword": {
    								"type": "keyword",
    								"ignore_above": 256
    							}
    						}
    					},
    					"content_length": {
    						"type": "long"
    					},
    					"content_type": {
    						"type": "text",
    						"fields": {
    							"keyword": {
    								"type": "keyword",
    								"ignore_above": 256
    							}
    						}
    					},
    					"date": {
    						"type": "date"
    					},
    					"language": {
    						"type": "text",
    						"fields": {
    							"keyword": {
    								"type": "keyword",
    								"ignore_above": 256
    							}
    						}
    					},
    					"title": {
    						"type": "text",
    						"fields": {
    							"keyword": {
    								"type": "keyword",
    								"ignore_above": 256
    							}
    						}
    					}
    				}
    			},
    			"category": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"check": {
    				"type": "long"
    			},
    			"content": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"createtime": {
    				"type": "date"
    			},
    			"filename": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"genre": {
    				"type": "long"
    			},
    			"id": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"lasttime": {
    				"type": "date"
    			},
    			"seeScope": {
    				"type": "long"
    			},
    			"similarQuestions": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"sort": {
    				"type": "long"
    			},
    			"stasts": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"tendid": {
    				"type": "long"
    			},
    			"title": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"type": {
    				"type": "long"
    			},
    			"updateman": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			},
    			"zxkfId": {
    				"type": "text",
    				"fields": {
    					"keyword": {
    						"type": "keyword",
    						"ignore_above": 256
    					}
    				}
    			}
    		}
    	}
    }
    '

     

  4. 载入数据
     

    curl -X PUT "localhost:9200/zxkfdetail_poc/zxkfdetail_poc/1?pipeline=attachment" -d '
    {
       "data":"QmFzZTY057yW56CB6K+05piOCuOAgOOAgEJhc2U2NOe8lueggeimgeaxguaKijPkuKo45L2N5a2X6IqC77yIMyo4PTI077yJ6L2s5YyW5Li6NOS4qjbkvY3nmoTlrZfoioLvvIg0KjY9MjTvvInvvIzkuYvlkI7lnKg25L2N55qE5YmN6Z2i6KGl5Lik5LiqMO+8jOW9ouaIkDjkvY3kuIDkuKrlrZfoioLnmoTlvaLlvI/jgIIg5aaC5p6c5Ymp5LiL55qE5a2X56ym5LiN6LazM+S4quWtl+iKgu+8jOWImeeUqDDloavlhYXvvIzovpPlh7rlrZfnrKbkvb/nlKgnPSfvvIzlm6DmraTnvJbnoIHlkI7ovpPlh7rnmoTmlofmnKzmnKvlsL7lj6/og73kvJrlh7rnjrAx5oiWMuS4qic9J+OAggoK44CA44CA5Li65LqG5L+d6K+B5omA6L6T5Ye655qE57yW56CB5L2N5Y+v6K+75a2X56ym77yMQmFzZTY05Yi25a6a5LqG5LiA5Liq57yW56CB6KGo77yM5Lul5L6/6L+b6KGM57uf5LiA6L2s5o2i44CC57yW56CB6KGo55qE5aSn5bCP5Li6Ml42PTY077yM6L+Z5Lmf5pivQmFzZTY05ZCN56ew55qE55Sx5p2l44CC"
    }'

 

你可能感兴趣的:(elasticsearch)