ES常用命令

你懂了吗?

ES常用命令
概述

Elasticsearch版本:7.13.2

新建
  • 创建索引
PUT 索引名字
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 0,
    "index": {
      "max_result_window": 2147483647
    },
    "analysis": {
      "tokenizer": {
        "ik_pinyin": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 10,
          "keep_none_chinese": true,
          "keep_none_chinese_together": true,
          "none_chinese_pinyin_tokenize": false,
          "keep_none_chinese_in_first_letter": false,
          "lowercase": true,
          "trim_whitespace": false,
          "remove_duplicated_term": true,
          "keep_separate_first_letter": false
        }
      },
      "analyzer": {
        "custom_ik_smart": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "char_filter": [
            "html_strip"
          ]
        },
        "custom_ik_max_word": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "html_strip"
          ]
        },
        "pinyin_analyzer": {
          "tokenizer": "ik_pinyin"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "analyzer": "custom_ik_max_word",
        "fields": {
          "pinyin": {
            "type": "text",
            "store": false,
            "term_vector": "with_offsets",
            "analyzer": "pinyin_analyzer"
          },
          "keyword": {
            "type": "keyword",
            "ignore_above": 13011
          }
        }
      },
      "sex": {
        "type": "keyword"
      },
      "similarity": {
        "type": "double"
      },
      "createTime": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
      }
    }
  }
}
  • 备份索引
POST _reindex
{
  "source": {
    "index": "旧索引名称"
  },
  "dest": {
    "index": "新索引名称"
  }
}

插入数据
  • 插入单条
PUT {索引名称}/_doc/{索引id}
{
  "title":{
    "input": "blow"
  }
}

查询
  • 查询索引mapping
GET 索引名称/_mapping
  • 搜索

1、match查询:会先对搜索词进行分词,比如“白雪公主和苹果”,会分成“白雪”、“公主”、“苹果”。含有相关内容的字段,都会被检索出来。
2、term是代表完全匹配,也就是精确查询,搜索前不会再对搜索词进行分词,所以我们的搜索词必须是文档分词集合中的一个。
3、wildcard查询:是使用通配符进行查询,其中?代表任意一个字符*代表任意的一个或多个字符。

# 查询所有数据
GET 索引名称/_search
{
  "query": {
    "match_all": {}
  }
}

# 查询name为'张'的数据,并根据条件排序,分页
GET 索引名称/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "张"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "recordTime": {
        "order": "desc"
      }
    }
  ],
  "from": 0,
  "size": 20
}

# 范围查询,age大于20且小于30
GET 索引名字/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "age": {
              "gte": 20,
              "lte": 30
            }
          }
        }
      ]
    }
  },
  "from": 0,
  "size": 10
}

# 模糊匹配
GET person-record-139/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "wildcard": {
            "name.keyword": {
              "value": "*张*"
            }
          }
        }
      ]
    }
  }
}

# 相似度搜索,min_score表示返回的最低分。返回结果有可能小于0报错,需+1
GET 索引名称/_search
{
  "query": {
    "script_score": {
      "query": {
        "match_all": {}
      },
      "script": {
        "source": "cosineSimilarity(params.queryVector, 'featureData') + 1",
        "params": {
          "queryVector": [512维数组]
        }
      }
    }
  },
  "min_score": 0,
  "from": 0,
  "size": 10
}

# 多个字段匹配,minimum_should_match最少匹配几个条件
GET 索引名称/_search
{
  "query": {
    "bool": {
      "minimum_should_match": 1,
      "should": [
        {
          "multi_match": {
            "query": "五楼",
            "fields": [
              "deviceName",
              "XZQH",
              "JKDWLX",
              "deviceTypeName",
              "channelId"
            ]
          }
        }
      ]
    }
  }
}

# 搜索eTime为null
POST 索引名称/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "exists": {
            "field": "eTime"
          }
        }
      ]
    }
  }
}
  • 超出一万条数据,如何查询总数?

es官方默认限制索引查询最多只能查询10000条数据
所以在`hits.total.value` 最大显示10000

GET 索引名称/_search
{
  "track_total_hits": true
}

可添加参数 track_total_hits,这时候的返回值是准确的。
  • 根据时间段分组统计之date_histogram

对于es保存的数据,需要根据其时间格式或时间戳格式的字段进行分组统计,计算每天或每小时的某字段统计值

GET notice-dev/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "TriggerTime": {
              "gte": "2022-08-10 00:00:00",
              "lte": "2022-12-30 23:59:59"
            }
          }
        },
        {
          "terms": {
            "DispositionCategory": [
              "1"
            ]
          }
        },
        {
          "range": {
            "NoticeSimilarity": {
              "gte": 10
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "dateAgg": {	# 聚合名
      "date_histogram": {
        "field": "TriggerTime",		# 根据哪个字段分组
        "offset": "0h",	# es默认是按照UTC的时间进行查询的,所以需要减掉8小时
        "format": "yyyy-MM-dd",
        "calendar_interval": "1d",
        "extended_bounds": {	# 需要填充0的范围(注意时区问题)
          "min": 1662508800000,
          "max": 1672415999000
        }
      }
    }
  },
  "from": 0,
  "size": 0
}

注意点

  • offset: -8h:是因为es默认是按照UTC的时间进行查询的,所以需要减掉8小时
  • calendar_interval 可选内容
    毫秒:1ms 10ms
    秒: second/1s 10s
    分钟: minute/1m 10m
    小时: hout/1h 2h
    天: day/1d
    星期: week/1w
    月: month/1M
    季度: quarter/1q
    年: year/1y
  • "format": "yyyy-MM-dd":只有date类型可以format,这里分组字段如果是long类型时间戳,format的结果时间戳

修改
  • 更新数据
# _id为100,sex修改为1
POST 索引名字/_update/100(_id数据)
{
  "doc": {
    "sex": 1
  }
}

# 根据条件更新
POST 索引名字/_update_by_query
{
	"script": {
		"source": "ctx._source.labels.version = 1"
	},
	"query": {
		"term": {
			"name": "测试"
		}
	}
}

# 指定更新某条记录(members为nested格式)
PUT 索引名字/_doc/2
{
  "id": 2,
  "name": "Group2",
  "members": [
    {
      "id": 3,
      "name": "王五"
    }
  ]
}

# 在Group1的members(nested数组)中增加一个成员:郑十
POST group/_update/1
{
  "script": {
    "source": """
    	if (ctx._source.members == null) {
    		List ls = new ArrayList();
    		ls.add(params.member);
    		ctx._source.members = ls;
    	} else {
			ctx._source.members.add(params.member);
    	}
    """,
    "lang": "painless",
    "params": {
      "member": {
        "id": 10,
        "name": "郑十"
      }
    }
  }
}

# 在Group1的members(nested数组)中删除一个成员:id=10
POST group/_update/1
{
  "script": {
    "source": "ctx._source.members.removeIf(list_item -> list_item.id == params.member_id)",
    "lang": "painless",
    "params": {
      "member_id": 10
    }
  }
}

# 在Group1的members(nested数组)中更新id=1的member.name为张三1
POST group/_update/1
{
	"script": {
	  "source": "for (item in ctx._source.members) {if(item['id']==params.member_id){item['name']=params.name_new}}",
	  "lang": "painless",
	  "params": {
	    "member_id":1,
	    "name_new": "张三1"
	  }
	}
}
删除
  • 删除索引
DELETE 索引名称,索引2,...
  • 删除范围数据
# 删除recordTime为1970-01-01 08:00:00的数据
POST 索引名称/_delete_by_query
{
  "query": {
    "term": {
      "recordTime": "1970-01-01 08:00:00"
    }
  }
}
  • 删除索引全部数据
POST 索引名称/_delete_by_query
{
  "query": {
    "match_all": {}
  }
}

Java - Elasticsearch 相关
  • mapping.xml和entity配置

如果entity字段使用@JsonProperty,则mapping对应字段为@JsonPropertyname属性(不是声明的字段名称)。