ES常用命令
你懂了吗?
概述
Elasticsearch版本:7.13.2
新建
- 创建索引
PUT 索引名字
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 0,
"index": {
"max_result_window": 2147483647
},
"analysis": {
"tokenizer": {
"ik_pinyin": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 10,
"keep_none_chinese": true,
"keep_none_chinese_together": true,
"none_chinese_pinyin_tokenize": false,
"keep_none_chinese_in_first_letter": false,
"lowercase": true,
"trim_whitespace": false,
"remove_duplicated_term": true,
"keep_separate_first_letter": false
}
},
"analyzer": {
"custom_ik_smart": {
"type": "custom",
"tokenizer": "ik_smart",
"char_filter": [
"html_strip"
]
},
"custom_ik_max_word": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"html_strip"
]
},
"pinyin_analyzer": {
"tokenizer": "ik_pinyin"
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "text",
"analyzer": "custom_ik_max_word",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer"
},
"keyword": {
"type": "keyword",
"ignore_above": 13011
}
}
},
"sex": {
"type": "keyword"
},
"similarity": {
"type": "double"
},
"createTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
}
- 备份索引
POST _reindex
{
"source": {
"index": "旧索引名称"
},
"dest": {
"index": "新索引名称"
}
}
插入数据
- 插入单条
PUT {索引名称}/_doc/{索引id}
{
"title":{
"input": "blow"
}
}
查询
- 查询索引mapping
GET 索引名称/_mapping
- 搜索
1、match查询:会先对搜索词进行分词,比如“白雪公主和苹果”,会分成“白雪”、“公主”、“苹果”。含有相关内容的字段,都会被检索出来。
2、term是代表完全匹配,也就是精确查询,搜索前不会再对搜索词进行分词,所以我们的搜索词必须是文档分词集合中的一个。
3、wildcard查询:是使用通配符进行查询,其中?代表任意一个字符*代表任意的一个或多个字符。
# 查询所有数据
GET 索引名称/_search
{
"query": {
"match_all": {}
}
}
# 查询name为'张'的数据,并根据条件排序,分页
GET 索引名称/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "张"
}
}
]
}
},
"sort": [
{
"recordTime": {
"order": "desc"
}
}
],
"from": 0,
"size": 20
}
# 范围查询,age大于20且小于30
GET 索引名字/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"age": {
"gte": 20,
"lte": 30
}
}
}
]
}
},
"from": 0,
"size": 10
}
# 模糊匹配
GET person-record-139/_search
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"name.keyword": {
"value": "*张*"
}
}
}
]
}
}
}
# 相似度搜索,min_score表示返回的最低分。返回结果有可能小于0报错,需+1
GET 索引名称/_search
{
"query": {
"script_score": {
"query": {
"match_all": {}
},
"script": {
"source": "cosineSimilarity(params.queryVector, 'featureData') + 1",
"params": {
"queryVector": [512维数组]
}
}
}
},
"min_score": 0,
"from": 0,
"size": 10
}
# 多个字段匹配,minimum_should_match最少匹配几个条件
GET 索引名称/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"multi_match": {
"query": "五楼",
"fields": [
"deviceName",
"XZQH",
"JKDWLX",
"deviceTypeName",
"channelId"
]
}
}
]
}
}
}
# 搜索eTime为null
POST 索引名称/_search
{
"query": {
"bool": {
"must_not": [
{
"exists": {
"field": "eTime"
}
}
]
}
}
}
- 超出一万条数据,如何查询总数?
es官方默认限制索引查询最多只能查询10000条数据
所以在`hits.total.value` 最大显示10000
GET 索引名称/_search
{
"track_total_hits": true
}
可添加参数 track_total_hits,这时候的返回值是准确的。
- 根据时间段分组统计之date_histogram
对于es保存的数据,需要根据其时间格式或时间戳格式的字段进行分组统计,计算每天或每小时的某字段统计值
GET notice-dev/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"TriggerTime": {
"gte": "2022-08-10 00:00:00",
"lte": "2022-12-30 23:59:59"
}
}
},
{
"terms": {
"DispositionCategory": [
"1"
]
}
},
{
"range": {
"NoticeSimilarity": {
"gte": 10
}
}
}
]
}
},
"aggs": {
"dateAgg": { # 聚合名
"date_histogram": {
"field": "TriggerTime", # 根据哪个字段分组
"offset": "0h", # es默认是按照UTC的时间进行查询的,所以需要减掉8小时
"format": "yyyy-MM-dd",
"calendar_interval": "1d",
"extended_bounds": { # 需要填充0的范围(注意时区问题)
"min": 1662508800000,
"max": 1672415999000
}
}
}
},
"from": 0,
"size": 0
}
注意点
- offset: -8h:是因为es默认是按照UTC的时间进行查询的,所以需要减掉8小时
- calendar_interval 可选内容
毫秒:1ms 10ms
秒: second/1s 10s
分钟: minute/1m 10m
小时: hout/1h 2h
天: day/1d
星期: week/1w
月: month/1M
季度: quarter/1q
年: year/1y - "format": "yyyy-MM-dd":只有date类型可以format,这里分组字段如果是long类型时间戳,format的结果时间戳
修改
- 更新数据
# _id为100,sex修改为1
POST 索引名字/_update/100(_id数据)
{
"doc": {
"sex": 1
}
}
# 根据条件更新
POST 索引名字/_update_by_query
{
"script": {
"source": "ctx._source.labels.version = 1"
},
"query": {
"term": {
"name": "测试"
}
}
}
# 指定更新某条记录(members为nested格式)
PUT 索引名字/_doc/2
{
"id": 2,
"name": "Group2",
"members": [
{
"id": 3,
"name": "王五"
}
]
}
# 在Group1的members(nested数组)中增加一个成员:郑十
POST group/_update/1
{
"script": {
"source": """
if (ctx._source.members == null) {
List ls = new ArrayList();
ls.add(params.member);
ctx._source.members = ls;
} else {
ctx._source.members.add(params.member);
}
""",
"lang": "painless",
"params": {
"member": {
"id": 10,
"name": "郑十"
}
}
}
}
# 在Group1的members(nested数组)中删除一个成员:id=10
POST group/_update/1
{
"script": {
"source": "ctx._source.members.removeIf(list_item -> list_item.id == params.member_id)",
"lang": "painless",
"params": {
"member_id": 10
}
}
}
# 在Group1的members(nested数组)中更新id=1的member.name为张三1
POST group/_update/1
{
"script": {
"source": "for (item in ctx._source.members) {if(item['id']==params.member_id){item['name']=params.name_new}}",
"lang": "painless",
"params": {
"member_id":1,
"name_new": "张三1"
}
}
}
删除
- 删除索引
DELETE 索引名称,索引2,...
- 删除范围数据
# 删除recordTime为1970-01-01 08:00:00的数据
POST 索引名称/_delete_by_query
{
"query": {
"term": {
"recordTime": "1970-01-01 08:00:00"
}
}
}
- 删除索引全部数据
POST 索引名称/_delete_by_query
{
"query": {
"match_all": {}
}
}
Java - Elasticsearch 相关
- mapping.xml和entity配置
如果entity字段使用@JsonProperty
,则mapping对应字段为@JsonProperty
的name
属性(不是声明的字段名称)。