1 搜索分离
通过 Search Template实现代码调用与语句优化的分离
POST _scripts/tmdb
{
"script": {
"lang": "mustache",
"source": {
"_source": [
"title","overview"
],
"size": 20,
"query": {
"multi_match": {
"query": "{{q}}",
"fields": ["title","overview"]
}
}
}
}
}
POST tmdb/_search/template
{
"id":"tmdb",
"params": {
"q": "basketball with cartoon aliens"
}
}
#程序传入查询条件。其他的参数配置都在_scripts/tmdb中配置复制
2 搜索优化
功能
Complete & Context Suggester
Term & Phrase Suggester
搜索推荐
自动补全
比较
Completion > Phrase > Term
Term > Phrase > Completion
Completion > Phrase > Term
精准度
召回率
性能
2.1 搜索推荐
将输入的文本分解为Token,然后在索引的字典里查找相似的Term并返回
DELETE articles
PUT articles
{
"mappings": {
"properties": {
"title_completion":{
"type": "completion"
}
}
}
}
POST articles/_bulk
{ "index" : { } }
{ "title_completion": "lucene is very cool"}
{ "index" : { } }
{ "title_completion": "Elasticsearch builds on top of lucene"}
{ "index" : { } }
{ "title_completion": "Elasticsearch rocks"}
{ "index" : { } }
{ "title_completion": "elastic is the company behind ELK stack"}
{ "index" : { } }
{ "title_completion": "Elk stack rocks"}
{ "index" : {} }
POST articles/_search?pretty
{
"size": 0,
"suggest": {
"article-suggester": {
"prefix": "elk ",
"completion": {
"field": "title_completion"
}
}
}
}
DELETE articles
POST articles/_bulk
{ "index" : { } }
{ "body": "lucene is very cool"}
{ "index" : { } }
{ "body": "Elasticsearch builds on top of lucene"}
{ "index" : { } }
{ "body": "Elasticsearch rocks"}
{ "index" : { } }
{ "body": "elastic is the company behind ELK stack"}
{ "index" : { } }
{ "body": "Elk stack rocks"}
{ "index" : {} }
{ "body": "elasticsearch is rock solid"}
POST _analyze
{
"analyzer": "standard",
"text": ["Elk stack rocks rock"]
}
POST /articles/_search
{
"size": 1,
"query": {
"match": {
"body": "lucen rock"
}
},
"suggest": {
"term-suggestion": {
"text": "lucen rock",
"term": {
"suggest_mode": "missing",
"field": "body"
}
}
}
}
#到字段body中搜索,当无法搜索到结果时(missing),返回建议的词
#suggest_mode支持:missing,如果索引中已经存在,则不提供建议;popular 推荐出现频次更高的词;always 无论是否存在,都提供建议
POST /articles/_search
{
"suggest": {
"term-suggestion": {
"text": "lucen rock",
"term": {
"suggest_mode": "popular",
"field": "body"
}
}
}
}
POST /articles/_search
{
"suggest": {
"term-suggestion": {
"text": "lucen rock",
"term": {
"suggest_mode": "always",
"field": "body",
}
}
}
}
POST /articles/_search
{
"suggest": {
"term-suggestion": {
"text": "lucen hocks",
"term": {
"suggest_mode": "always",
"field": "body",
"prefix_length":0,
"sort": "frequency"
}
}
}
}
#sort用来设置排序,也可以按照频率(frequency)
#prefix_length 默认首字母不一样不会匹配推荐,但是若该参数为0则会推荐匹配
POST /articles/_search
{
"suggest": {
"my-suggestion": {
"text": "lucne and elasticsear rock hello world ",
"phrase": {
"field": "body",
"max_errors":2,
"confidence":0,
"direct_generator":[{
"field":"body",
"suggest_mode":"always"
}],
"highlight": {
"pre_tag": "<em>",
"post_tag": "</em>"
}
}
}
}
}
#Phrase Suggester相较于Term Suggester增加一些逻辑
#max_errors 最多可以拼错的Terms
#confidence 限制返回结果数,默认为1复制
2.2 自动补全
Completion Suggester提供了"自动完成"的功能。用户每输入一个字符,就需要即时发送一个查询请求到后端查找匹配项
对性能要求比较苛刻。Elasticsearch采用了不同的数据结构,并非通过倒排索引来完成。而是将Analyer的数据编码成FST和索引一起存放。FST会被ES整个加载进内存。速度很快
FST只能用于前缀查找
DELETE articles
PUT articles
{
"mappings": {
"properties": {
"title_completion":{
"type": "completion"
}
}
}
}
#事先需要在mapping中设置字段类型为completion
POST articles/_bulk
{ "index" : { } }
{ "title_completion": "lucene is very cool"}
{ "index" : { } }
{ "title_completion": "Elasticsearch builds on top of lucene"}
{ "index" : { } }
{ "title_completion": "Elasticsearch rocks"}
{ "index" : { } }
{ "title_completion": "elastic is the company behind ELK stack"}
{ "index" : { } }
{ "title_completion": "Elk stack rocks"}
{ "index" : {} }
POST articles/_search?pretty
{
"size": 0,
"suggest": {
"article-suggester": {
"prefix": "elk ",
"completion": {
"field": "title_completion"
}
}
}
}
#查询时指定字段及前缀
DELETE comments
PUT comments
PUT comments/_mapping
{
"properties": {
"comment_autocomplete":{
"type": "completion",
"contexts":[{
"type":"category",
"name":"comment_category"
}]
}
}
}
#contexts用于指定上下午,方便针对不同场景查询不同上下文。如同样是Star根据场景不同分别查询咖啡相关(Starbucks)、电影相关(star wars)
#contexts中的类型支持category(任意字符串)、Geo(地理位置信息)
POST comments/_doc
{
"comment":"I love the star war movies",
"comment_autocomplete":{
"input":["star wars"],
"contexts":{
"comment_category":"movies"
}
}
}
POST comments/_doc
{
"comment":"Where can I find a Starbucks",
"comment_autocomplete":{
"input":["starbucks"],
"contexts":{
"comment_category":"coffee"
}
}
}
POST comments/_search
{
"suggest": {
"MY_SUGGESTION": {
"prefix": "sta",
"completion":{
"field":"comment_autocomplete",
"contexts":{
"comment_category":"coffee"
}
}
}
}
}复制
文章转载自lin在路上,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。
评论
相关阅读
2025年4月中国数据库流行度排行榜:OB高分复登顶,崖山稳驭撼十强
墨天轮编辑部
2453次阅读
2025-04-09 15:33:27
数据库国产化替代深化:DBA的机遇与挑战
代晓磊
1128次阅读
2025-04-27 16:53:22
2025年3月国产数据库中标情况一览:TDSQL大单622万、GaussDB大单581万……
通讯员
811次阅读
2025-04-10 15:35:48
2025年4月国产数据库中标情况一览:4个千万元级项目,GaussDB与OceanBase大放异彩!
通讯员
642次阅读
2025-04-30 15:24:06
数据库,没有关税却有壁垒
多明戈教你玩狼人杀
563次阅读
2025-04-11 09:38:42
天津市政府数据库框采结果公布,7家数据库产品入选!
通讯员
546次阅读
2025-04-10 12:32:35
国产数据库需要扩大场景覆盖面才能在竞争中更有优势
白鳝的洞穴
526次阅读
2025-04-14 09:40:20
最近我为什么不写评论国产数据库的文章了
白鳝的洞穴
499次阅读
2025-04-07 09:44:54
【活动】分享你的压箱底干货文档,三篇解锁进阶奖励!
墨天轮编辑部
451次阅读
2025-04-17 17:02:24
一页概览:Oracle GoldenGate
甲骨文云技术
447次阅读
2025-04-30 12:17:56