使用es reindex api 修改和迁移数据

使用es reindex api 修改和迁移数据

es 官网和其他大佬处reindex 使用方法

POST _reindex
{
  "source": {
    "index": "old_index",
	"size": 5000
  },
  "dest": {
    "index": "new_index",
    "version_type": "internal"
	"routing": "=cat"
  }
}
POST _reindex{
	"conflicts": "proceed",	//有冲突继续,默认是有冲突终止
	"size":1000,	//设定条数  
	"source": {    
	    "index": "twitter", 	//也可以为 ["twitter", "blog"]
		"type": "tweet", 	// 或["type1","type2"] 	//红字限制范围 ,非必须  限制文档
		"query": { "term": { "user": "kimchy" } },	//添加查询来限制文档
		"sort": { "date": "desc" }, 	//排序
		"_source": ["user", "tweet"],	//指定字段
		"size": 100,	//滚动批次1000更改批处理大小:
	},  
  
    "dest": {    
		"index": "new_twitter",
		"op_type": "create",	//设置将导致_reindex只在目标索引中创建丢失的文档,create 只插入没有的数据
		"version_type": "external",	//没有设置 version_type或设置为internal 将覆盖掉相同id的数据,设置为external 将更新相同ID文档当version比较后的时候
		"routing": "=cat",	//将路由设置为cat
		"pipeline": "some_ingest_pipeline",	//指定管道来使用Ingest节点特性
    },
  
	"script": { // 执行脚本 
	   "source": "if (ctx._source.foo == 'bar') {ctx._version++; ctx._source.remove('foo')} ", 	                            					                      							 	
		"lang": "painless" 
	}
}

个人实践:

POST _reindex
{
  "conflicts": "proceed",
  "source": {
    "remote": {
      "host": "http://xxx.xxx.com:80",
      "username": "xxx",
      "password": "xxxxxxxxx",
      "socket_timeout": "1m",
      "connect_timeout": "30s"
    },
    "index": "case_cause",
    "size": 5000
  },
  "dest": {
    "index": "case_cause",
    "op_type": "create",
    "routing": "=cat"
  }
}

操作图

使用es reindex api 修改和迁移数据_第1张图片

验证

使用es reindex api 修改和迁移数据_第2张图片

另附上我个人编写的批量执行脚本

cat import-es-data.sh
本脚本为在本地es集群拉取远端es集群数据

#/bin/bash
#author wxd
#
PARAMETER=$1

ES_URL="http://172.16.248.21:9200"
ES_USER="xxx"
ES_PASSWORD="xxxxxxxxx"

usage() {
    echo -e "\033[46;31mUsage\033[0m: Please use \e[0;35m$0 foo \e[0m"    
    exit 1;
}

do_reindex(){
	for new_index in {parse_task,case_record,parse_check_record,adjustment,code_category,law_question,red_book,law_cloud_category,department,predict_department,predict_judge,cause_predict,law_macroforecast,case_num_data,court_data,indicator_data,standard_value,sub_indicator,cause_weight,cause_weight_backup,events_weight,weight_rate,case_edit,qid_cache,law_case_info_business,law_search_v3,spider_case,code,inner_case,law_doc_template,law,law_firm,institutions,predict_judge_data_v2,predict_judge_data}
	do
		echo -e "\n$(date '+%Y-%m-%d %H:%M:%S'), 索引\e[0;35m$new_index\e[0m  开始reindex"
		curl -POST -H "Content-Type: application/json" -s -u $ES_USER:$ES_PASSWORD "$ES_URL/_reindex" -d '
		{
		  "conflicts": "proceed",
		  "source": {
		    "remote": {
		      "host": "http://xxx.xxx.com:80",
		      "username": "xxxx",
		      "password": "xxxxxxxxxxxxx",
		      "socket_timeout": "1m",
		      "connect_timeout": "30s"
		    },
		    "index": "'$new_index'",
		    "size": 5000
		  },
		  "dest": {
		    "index": "'$new_index'",
		    "op_type": "create",
		    "routing": "=cat"
		  }
		}'
	echo -e "\n$(date '+%Y-%m-%d %H:%M:%S'), 索引\e[0;35m$new_index\e[0m  reindex完成"
	done	
	  
}


if [ -n "$PARAMETER" ]; then
   case "$PARAMETER" in
    foo)
        echo -e "\033[46;31m Do reindex!!!\033[0m"
        do_reindex
		echo -e "-----------------------------------------------\033[46;31mAll reindex done!!!\033[0m-----------------------------------------------"
        ;;  
    *)  
        usage
        exit
        ;;  
    esac
else
    echo -e "\033[46;31merror\033[0m: please input parameter"
fi

二次使用只需填入你的索引名,远端es地址(9200端口),用户名,密码即可直接使用

你可能感兴趣的:(elasticsearch)