GET _search
{
"query": {
"match_all": {}
}
}
GET _index_template
GET _index_template/yst_crawler_template
GET /_cat/indices?v
GET /yst_crawler/_mapping
PUT /yst_crawler
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"id":{
"type": "long"
},
"cover_url": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "english"
},
"create_at": {
"type": "date",
"format": "epoch_second"
},
"article_time": {
"type": "date",
"format": "epoch_second"
},
"milli_at": {
"type": "date",
"format": "epoch_millis"
}
}
}
}
GET /yst_crawler/_mapping
DELETE /yst_crawler
POST /_analyze
{
"analyzer": "standard",
"text": "In a polarized US, how to define a patriot increasingly depends on who’s being asked"
}
GET /yst_crawler/_search
{
"query": { "match_all": {} },
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"from": 0,
"size": 20
}
GET /yst_crawler/_search
{
"query": {
"term": {
"id": {
"value": "1"
}
}
}
}
GET /yst_crawler/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "$20 million settlement"
}
},
{
"range": {
"milli_at": {
"gte": "now-30d/d",
"lte": "now/d"
}
}
}
]
}
},
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at"],
"from": 0,
"size": 20
}
GET /yst_crawler/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"_source": ["id", "title","milli_at","s3_url","article_time","detail_url","ref_id","req_md5","tags","category","content","milli_at"],
"from": 0,
"size": 10
}
GET /yst_crawler/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "$20 million settlement"
}
},
{
"terms": {
"ref_id": ["8fac888ae06647c3a7d6093c60c9d9b5"]
}
},
{
"range": {
"milli_at": {
"gte": "now-30d/d",
"lte": "now/d"
}
}
}
]
}
},
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at"],
"from": 0,
"size": 20
}
GET /yst_crawler/_search
{
"query": {
"match": {
"id": "22111"
}
},
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"_source": ["id", "title","milli_at","s3_url","article_time","detail_url","ref_id","req_md5","tags","category","content","milli_at"],
"from": 0,
"size": 20
}
GET /yst_crawler/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "$20 million settlement"
}
},
{
"range": {
"milli_at": {
"gte": "now-30d/d",
"lte": "now/d"
}
}
}
]
}
},
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id"],
"from": 0,
"size": 19
}
GET /yst_crawler/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"is_status": 0
}
},
{
"term": {
"category": "for_you"
}
},
{
"range": {
"milli_at": {
"gte": "now-30d/d",
"lte": "1690362492000"
}
}
}
]
}
},
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc","state"],
"from": 0,
"sort": [
{
"milli_at": {
"order": "desc"
}
}
],
"size": 20
}
GET /yst_crawler/_mapping
PUT /yst_crawler/_mapping
{
"properties": {
"state": {
"type": "keyword"
}
}
}
PUT /yst_city
{
"settings": {
"index.max_ngram_diff": 2,
"analysis": {
"analyzer": {
"comma_analyzer": {
"type": "pattern",
"pattern": ","
},
"code_ngram_analyzer": {
"tokenizer": "code_ngram_tokenizer"
}
},
"tokenizer": {
"code_ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 4
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "integer"
},
"state": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"city": {
"type": "text"
},
"code": {
"type": "text",
"analyzer": "comma_analyzer",
"fields": {
"ngram": {
"type": "text",
"analyzer": "code_ngram_analyzer"
}
}
},
"create_at": {
"type": "integer"
},
"is_popular": {
"type": "byte"
},
"sort": {
"type": "short"
},
"location": {
"type": "geo_point"
}
}
}
}
GET /yst_city/_search
{
"query": {
"match": {
"code": "36310"
}
},
"_source": ["id","state","city","code","is_popular","location"],
"from": 0,
"size": 20
}
GET /yst_city/_search
{
"query": {
"match": {
"code.ngram": "31"
}
},
"_source": ["id","state","city","code","is_popular","location"],
"from": 0,
"size": 20
}
GET /yst_city/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"is_popular": 1
}
}
]
}
},
"sort": [
{
"sort": {
"order": "asc"
}
}
],
"_source": ["id","state","city","code","is_popular","location"],
"from": 0,
"size": 20
}
GET /yst_city/_search
{
"query": {
"function_score": {
"query": {
"query_string": {
"default_field": "city",
"query": "e york"
}
},
"field_value_factor": {
"field": "is_popular",
"factor": 1.2,
"modifier": "sqrt",
"missing": 1
},
"boost_mode": "multiply"
}
},
"_source": ["id","state","city","is_popular","location"],
"from": 0,
"size": 20
}
GET /yst_crawler/_search
{
"query": {
"term": {
"title.keyword": {
"value": "Man kicks in door, assaults ex-girlfriend: Solon Police Blotter"
}
}
}
}
GET /yst_crawler/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "110"
}
},
{
"range": {
"milli_at": {
"gte": "now-8d/d",
"lte": "now/d"
}
}
}
]
}
},
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc"],
"from": 0,
"size": 10
}
GET /yst_crawler/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "这里填入你想要分析的文本,例如:110"
}
GET /yst_crawler/_analyze
{
"analyzer": "standard",
"text": "这里填入你想要分析的文本,例如:110"
}
POST /_analyze
{
"tokenizer": "standard",
"filter": [
"lowercase",
{
"type": "stop",
"stopwords": [
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"
]
}
],
"text": "hello 110th 10223"
}
DELETE _index_template/yst_crawler_template
该模版下,创建的索引前缀都会使用这个模版 yst_crawler
PUT _index_template/yst_crawler_template
{
"index_patterns": ["yst_crawler*"],
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"my_stop_filter": {
"type": "stop",
"stopwords": [
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"
]
}
},
"analyzer": {
"my_new_custom_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"my_stop_filter"
]
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "long"
},
"is_status": {
"type": "byte"
},
"detail_url": {
"type": "keyword"
},
"ref_id": {
"type": "keyword"
},
"article_id": {
"type": "keyword"
},
"tags": {
"type": "text",
"analyzer": "english"
},
"s3_url": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "my_new_custom_analyzer",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"article_time": {
"type": "date",
"format": "epoch_second"
},
"desc": {
"type": "keyword"
},
"content": {
"type": "keyword"
},
"milli_at": {
"type": "date",
"format": "epoch_millis"
},
"category": {
"type": "keyword"
},
"score": {
"type": "integer"
},
"state": {
"type": "keyword"
}
}
}
}
}
POST /_aliases
{
"actions": [
{
"add": {
"index": "yst_crawler",
"alias": "yst_crawler_alias"
}
}
]
}
GET yst_crawler/_stats
PUT /yst_crawler_v2
GET /yst_crawler_v2/_mapping
GET /yst_crawler_v2/_analyze
{
"analyzer": "my_new_custom_analyzer",
"text": "xxx 110 2323"
}
POST /_reindex
{
"source": {
"index": "yst_crawler"
},
"dest": {
"index": "yst_crawler_v2"
}
}
POST /_reindex?slices=9&refresh&wait_for_completion=false
{
"source": {
"index": "yst_crawler",
"size": 10000
},
"dest": {
"index": "yst_crawler_v2"
}
}
GET /_tasks
GET /_tasks/eiPlKw2_T3iKReTflMHkEQ:39797979
POST /_aliases
{
"actions": [
{
"remove": {
"index": "yst_crawler",
"alias": "yst_crawler_alias"
}
},
{
"add": {
"index": "yst_crawler_v2",
"alias": "yst_crawler_alias"
}
}
]
}
主要看数据量total
GET yst_crawler/_stats
GET yst_crawler_v2/_stats
DELETE /yst_crawler
GET /_cat/indices?v
GET /yst_crawler_alias/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "Pass State Park"
}
},
{
"range": {
"milli_at": {
"gte": "now-4d/d",
"lte": "now/d"
}
}
}
]
}
},
"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc"],
"from": 0,
"size": 20
}