ES常用命令

你懂了吗？

Sam

2022年5月18日 • 4 min read

概述

Elasticsearch版本：7.13.2

新建

创建索引

PUT 索引名字
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 0,
    "index": {
      "max_result_window": 2147483647
    },
    "analysis": {
      "tokenizer": {
        "ik_pinyin": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 10,
          "keep_none_chinese": true,
          "keep_none_chinese_together": true,
          "none_chinese_pinyin_tokenize": false,
          "keep_none_chinese_in_first_letter": false,
          "lowercase": true,
          "trim_whitespace": false,
          "remove_duplicated_term": true,
          "keep_separate_first_letter": false
        }
      },
      "analyzer": {
        "custom_ik_smart": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "char_filter": [
            "html_strip"
          ]
        },
        "custom_ik_max_word": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "html_strip"
          ]
        },
        "pinyin_analyzer": {
          "tokenizer": "ik_pinyin"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "analyzer": "custom_ik_max_word",
        "fields": {
          "pinyin": {
            "type": "text",
            "store": false,
            "term_vector": "with_offsets",
            "analyzer": "pinyin_analyzer"
          },
          "keyword": {
            "type": "keyword",
            "ignore_above": 13011
          }
        }
      },
      "sex": {
        "type": "keyword"
      },
      "similarity": {
        "type": "double"
      },
      "createTime": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
      }
    }
  }
}

备份索引

POST _reindex
{
  "source": {
    "index": "旧索引名称"
  },
  "dest": {
    "index": "新索引名称"
  }
}

插入数据

插入单条

PUT {索引名称}/_doc/{索引id}
{
  "title":{
    "input": "blow"
  }
}

查询

查询索引mapping

GET 索引名称/_mapping

搜索

1、match查询：会先对搜索词进行分词，比如“白雪公主和苹果”，会分成“白雪”、“公主”、“苹果”。含有相关内容的字段，都会被检索出来。
2、term是代表完全匹配，也就是精确查询，搜索前不会再对搜索词进行分词，所以我们的搜索词必须是文档分词集合中的一个。
3、wildcard查询：是使用通配符进行查询，其中？代表任意一个字符*代表任意的一个或多个字符。

# 查询所有数据
GET 索引名称/_search
{
  "query": {
    "match_all": {}
  }
}

# 查询name为'张'的数据，并根据条件排序，分页
GET 索引名称/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "张"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "recordTime": {
        "order": "desc"
      }
    }
  ],
  "from": 0,
  "size": 20
}

# 范围查询，age大于20且小于30
GET 索引名字/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "age": {
              "gte": 20,
              "lte": 30
            }
          }
        }
      ]
    }
  },
  "from": 0,
  "size": 10
}

# 模糊匹配
GET person-record-139/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "wildcard": {
            "name.keyword": {
              "value": "*张*"
            }
          }
        }
      ]
    }
  }
}

# 相似度搜索,min_score表示返回的最低分。返回结果有可能小于0报错，需+1
GET 索引名称/_search
{
  "query": {
    "script_score": {
      "query": {
        "match_all": {}
      },
      "script": {
        "source": "cosineSimilarity(params.queryVector, 'featureData') + 1",
        "params": {
          "queryVector": [512维数组]
        }
      }
    }
  },
  "min_score": 0,
  "from": 0,
  "size": 10
}

# 多个字段匹配，minimum_should_match最少匹配几个条件
GET 索引名称/_search
{
  "query": {
    "bool": {
      "minimum_should_match": 1,
      "should": [
        {
          "multi_match": {
            "query": "五楼",
            "fields": [
              "deviceName",
              "XZQH",
              "JKDWLX",
              "deviceTypeName",
              "channelId"
            ]
          }
        }
      ]
    }
  }
}

# 搜索eTime为null
POST 索引名称/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "exists": {
            "field": "eTime"
          }
        }
      ]
    }
  }
}

超出一万条数据，如何查询总数？

es官方默认限制索引查询最多只能查询10000条数据
所以在`hits.total.value` 最大显示10000

GET 索引名称/_search
{
  "track_total_hits": true
}

可添加参数 track_total_hits，这时候的返回值是准确的。

根据时间段分组统计之date_histogram

对于es保存的数据，需要根据其时间格式或时间戳格式的字段进行分组统计，计算每天或每小时的某字段统计值

GET notice-dev/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "TriggerTime": {
              "gte": "2022-08-10 00:00:00",
              "lte": "2022-12-30 23:59:59"
            }
          }
        },
        {
          "terms": {
            "DispositionCategory": [
              "1"
            ]
          }
        },
        {
          "range": {
            "NoticeSimilarity": {
              "gte": 10
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "dateAgg": {	# 聚合名
      "date_histogram": {
        "field": "TriggerTime",		# 根据哪个字段分组
        "offset": "0h",	# es默认是按照UTC的时间进行查询的，所以需要减掉8小时
        "format": "yyyy-MM-dd",
        "calendar_interval": "1d",
        "extended_bounds": {	# 需要填充0的范围（注意时区问题）
          "min": 1662508800000,
          "max": 1672415999000
        }
      }
    }
  },
  "from": 0,
  "size": 0
}

注意点

offset: -8h：是因为es默认是按照UTC的时间进行查询的，所以需要减掉8小时
calendar_interval 可选内容
毫秒：1ms 10ms
秒： second/1s 10s
分钟： minute/1m 10m
小时： hout/1h 2h
天： day/1d
星期： week/1w
月： month/1M
季度： quarter/1q
年： year/1y
"format": "yyyy-MM-dd"：只有date类型可以format，这里分组字段如果是long类型时间戳，format的结果时间戳

修改

更新数据

# _id为100，sex修改为1
POST 索引名字/_update/100(_id数据)
{
  "doc": {
    "sex": 1
  }
}

# 根据条件更新
POST 索引名字/_update_by_query
{
	"script": {
		"source": "ctx._source.labels.version = 1"
	},
	"query": {
		"term": {
			"name": "测试"
		}
	}
}

# 指定更新某条记录(members为nested格式)
PUT 索引名字/_doc/2
{
  "id": 2,
  "name": "Group2",
  "members": [
    {
      "id": 3,
      "name": "王五"
    }
  ]
}

# 在Group1的members（nested数组）中增加一个成员：郑十
POST group/_update/1
{
  "script": {
    "source": """
    	if (ctx._source.members == null) {
    		List ls = new ArrayList();
    		ls.add(params.member);
    		ctx._source.members = ls;
    	} else {
			ctx._source.members.add(params.member);
    	}
    """,
    "lang": "painless",
    "params": {
      "member": {
        "id": 10,
        "name": "郑十"
      }
    }
  }
}

# 在Group1的members（nested数组）中删除一个成员：id=10
POST group/_update/1
{
  "script": {
    "source": "ctx._source.members.removeIf(list_item -> list_item.id == params.member_id)",
    "lang": "painless",
    "params": {
      "member_id": 10
    }
  }
}

# 在Group1的members（nested数组）中更新id=1的member.name为张三1
POST group/_update/1
{
	"script": {
	  "source": "for (item in ctx._source.members) {if(item['id']==params.member_id){item['name']=params.name_new}}",
	  "lang": "painless",
	  "params": {
	    "member_id":1,
	    "name_new": "张三1"
	  }
	}
}

删除

删除索引

DELETE 索引名称,索引2,...

删除范围数据

# 删除recordTime为1970-01-01 08:00:00的数据
POST 索引名称/_delete_by_query
{
  "query": {
    "term": {
      "recordTime": "1970-01-01 08:00:00"
    }
  }
}

删除索引全部数据

POST 索引名称/_delete_by_query
{
  "query": {
    "match_all": {}
  }
}

Java - Elasticsearch 相关

mapping.xml和entity配置

如果entity字段使用@JsonProperty，则mapping对应字段为@JsonProperty的name属性（不是声明的字段名称）。