ES 7.0.0的简单使用操作

山小杰 10月 01, 2019

本文是基于ES 7.0.0的简单使用操作。
大部分是一些RESTFUL API的使用，内容比较基础且杂乱，仅仅是一个记录。

ES 7.0.0版本去除了类型type的概念，每个index下默认创建一个类型_doc

创建索引示例：
创建一个名为laws的索引，指定默认分词器为ik_max_word，并过滤掉文档中的html标签，同时定义文档结构mapping

  PUT /laws
  {
    "settings": {
      "number_of_shards" : 1,
      "number_of_replicas" : 0,
      "analysis.analyzer.default.type":"ik_max_word",
      "analysis.char_filter":["html_strip"]
    },
    "mappings": {
      "properties": {
        "title":{
          "type": "text"
        },
        "DocNo":{
          "type": "text"
        },
        "unit":{
          "type": "text"
        },
        "content":{
          "type": "text"
        }
      }
    }
  }

为每个字段设置分词器analyzer以及搜索分词器search_analyzer

  PUT /laws20190718
  {
    "settings": {
      "number_of_shards" : 1,
      "number_of_replicas" : 0,
      "analysis.char_filter":["html_strip"]
    },
    "mappings": {
      "properties": {
        "id":{
          "type":"keyword"
        },
        "title":{
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
        },
        "strs":{
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
        },
        "category":{
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
        },
        "date":{
          "type":"date"
        }
      }
    }
  }

写入单条数据，文档内容含有英文双引号的，需要使用三引号”””包围在文档内容两侧
```
  PUT laws/_doc/1
  {
    "tid" : "1",
    "content" : """<p align="center">2019</p>"""
  }
```

批量写入文档数据，文档内容含有英文双引号的，需要使用三引号”””包围在文档内容两侧

  PUT /laws/_bulk
  {"index":{"_id" : 2}}
  {"tid" : "1","content" : """<p align="center">2019</p>"""}
  {"index":{"_id" : 3}}
  {"tid" : "1","content" : """<p align="center">2020</p>"""}
  {"index":{"_id" : 4}}
  {"tid" : "1","content" : """<p align="center">2021</p>"""}

简单搜索match
参数：from 从指定的偏移量中提取搜索结果，默认为 0
参数：size 返回搜索结果条数，默认为 10
```
  GET /laws/_search?from=0&size=10
  {
    "query": {
      "match": {
        "content": "采购"
      }
    }
  }
```
ik分词模式
- ik_smart 最粗粒度的拆分
- ik_max_word 将文本做最细粒度的拆分，会穷尽各种可能的词语组合
结巴分词模式
- jieba_search 倾向于完整、顺序的切分，类似于ik_smart
- jieba_index 倾向于分出更多可能的词，类似于ik_max_word

测试分词引擎

  GET _analyze
  {
    "text" : "我爱中华人民共和国",
    "analyzer": "ik_smart"
  }
  ik_smart返回结果
  {
    "tokens" : [
      {
        "token" : "我",
        "start_offset" : 0,
        "end_offset" : 1,
        "type" : "CN_CHAR",
        "position" : 0
      },
      {
        "token" : "爱",
        "start_offset" : 1,
        "end_offset" : 2,
        "type" : "CN_CHAR",
        "position" : 1
      },
      {
        "token" : "中华人民共和国",
        "start_offset" : 2,
        "end_offset" : 9,
        "type" : "CN_WORD",
        "position" : 2
      }
    ]
  }

  GET _analyze
  {
    "text" : "我爱中华人民共和国",
    "analyzer": "ik_max_word"
  }
  ik_max_word返回结果
  {
    "tokens" : [
      {
        "token" : "我",
        "start_offset" : 0,
        "end_offset" : 1,
        "type" : "CN_CHAR",
        "position" : 0
      },
      {
        "token" : "爱",
        "start_offset" : 1,
        "end_offset" : 2,
        "type" : "CN_CHAR",
        "position" : 1
      },
      {
        "token" : "中华人民共和国",
        "start_offset" : 2,
        "end_offset" : 9,
        "type" : "CN_WORD",
        "position" : 2
      },
      {
        "token" : "中华人民",
        "start_offset" : 2,
        "end_offset" : 6,
        "type" : "CN_WORD",
        "position" : 3
      },
      {
        "token" : "中华",
        "start_offset" : 2,
        "end_offset" : 4,
        "type" : "CN_WORD",
        "position" : 4
      },
      {
        "token" : "华人",
        "start_offset" : 3,
        "end_offset" : 5,
        "type" : "CN_WORD",
        "position" : 5
      },
      {
        "token" : "人民共和国",
        "start_offset" : 4,
        "end_offset" : 9,
        "type" : "CN_WORD",
        "position" : 6
      },
      {
        "token" : "人民",
        "start_offset" : 4,
        "end_offset" : 6,
        "type" : "CN_WORD",
        "position" : 7
      },
      {
        "token" : "共和国",
        "start_offset" : 6,
        "end_offset" : 9,
        "type" : "CN_WORD",
        "position" : 8
      },
      {
        "token" : "共和",
        "start_offset" : 6,
        "end_offset" : 8,
        "type" : "CN_WORD",
        "position" : 9
      },
      {
        "token" : "国",
        "start_offset" : 8,
        "end_offset" : 9,
        "type" : "CN_CHAR",
        "position" : 10
      }
    ]
  }

  GET _analyze
  {
    "text" : "我爱中华人民共和国",
    "analyzer": "jieba_index"
  }
  jieba_index返回结果
  {
    "tokens" : [
      {
        "token" : "我爱",
        "start_offset" : 0,
        "end_offset" : 2,
        "type" : "word",
        "position" : 0
      },
      {
        "token" : "中华",
        "start_offset" : 2,
        "end_offset" : 4,
        "type" : "word",
        "position" : 1
      },
      {
        "token" : "中华人民共和国",
        "start_offset" : 2,
        "end_offset" : 9,
        "type" : "word",
        "position" : 1
      },
      {
        "token" : "华人",
        "start_offset" : 3,
        "end_offset" : 5,
        "type" : "word",
        "position" : 1
      },
      {
        "token" : "人民",
        "start_offset" : 4,
        "end_offset" : 6,
        "type" : "word",
        "position" : 2
      },
      {
        "token" : "共和",
        "start_offset" : 6,
        "end_offset" : 8,
        "type" : "word",
        "position" : 3
      },
      {
        "token" : "共和国",
        "start_offset" : 6,
        "end_offset" : 9,
        "type" : "word",
        "position" : 3
      }
    ]
  }

  GET _analyze
  {
    "text" : "我爱中华人民共和国",
    "analyzer": "jieba_search"
  }
  jieba_search返回结果
  {
    "tokens" : [
      {
        "token" : "我爱",
        "start_offset" : 0,
        "end_offset" : 2,
        "type" : "word",
        "position" : 0
      },
      {
        "token" : "中华人民共和国",
        "start_offset" : 2,
        "end_offset" : 9,
        "type" : "word",
        "position" : 1
      }
    ]
  }

多字段检索multi_match

  GET /lawss/_search
  {
    "query": {
      "multi_match": {
        "query": "spark",
        "fields": ["title","strs"]
      }
    }
  }

结果关键词高亮

  GET /lawss/_search
  {
    "query": {
      "multi_match": {
        "query": "数据仓库",
        "fields": ["title","strs"]
      }
    },
    "highlight": {
      "pre_tags": ["<b>"],
      "post_tags": ["</b>"],
      "fields": {
        "title": {},
        "strs": {}
      }
    }
  }

简单SQL查询

  POST /_sql
  {
    "query": "SELECT title,category FROM lawss WHERE date > '2018-01-01'"
  }

  # 加入format=txt参数可以将json结果转为表格形式
  # 支持的返回格式：csv、json（默认）、tsv、txt、yaml、cbor（二进制）、smile（二进制）
  # 设置“fetch_size”数值可控制返回记录数（可在SQL语句中添加LIMIT控制返回记录数）
  # 默认每次请求提取1000条记录
  POST /_sql?format=txt
  {
    "query": "SELECT title,category FROM lawss WHERE date > '2018-01-01'",
    "fetch_size" : 5
  }

索引别名，重建索引时可不影响现有业务正常运行。官方文档

  # 添加别名
  POST /_aliases
  {
    "actions": [
      {
        "add": {
          "index": "lawss",
          "alias": "laws"
        }
      }
    ]
  }
  # 删除别名
  POST /_aliases
  {
    "actions": [
      {
        "remove": {
          "index": "lawss",
          "alias": "laws"
        }
      }
    ]
  }

重建索引reindex
需要预先创建好新索引的设置以及映射等。官方文档

  # 将索引"lawss"重建至索引"laws20190718"
  POST _reindex
  {
    "source": {
      "index": "lawss"
    },
    "dest": {
      "index": "laws20190718"
    }
  }

自定义检索返回字段
如下所示，只返回ID、NAME、TYPE这三个字段的数据

  GET /audit_law/_search
  {
    "query": {
      "multi_match": {
        "query": "项目",
        "fields": []
      }
    },
    "_source": ["ID","NAME","TYPE"]
  }

多条件组合查询

  GET /audit_law/_search
  {
    "query": {
      "bool": {
        "must": [
          {"multi_match": {"query": "专项资金","fields": []}}
        ],
        "filter": [
          {"term": {"RELEASE_ORGAN": {"value": "财政"}}},
          {"term": {"REGION_NAME": {"value": "全国"}}},
          {"term": {"NAME": {"value": "财政"}}},
          {"term": {"DOCUMENT_NO": {"value": "2018年12月28日"}}}    ,
          {"term": {"INDUSTRY_NAME": "教育"}},
          {"terms": {"LAW_TYPE_CODE": ["10805","10303"]}}
        ]
      }
    },
    "sort": [
      {
        "PUBLISH_TIME": {
          "order": "desc"
        }
      }
    ], 
    "_source": ["ID","NAME","TYPE","INDUSTRY_NAME",    "RELEASE_ORGAN","DOCUMENT_NO","REGION_NAME"]
  }