mirror of
https://github.com/dillonzq/LoveIt.git
synced 2024-11-14 02:46:16 +01:00
feat(search): improve search index by chunked separated by h2 and h3 (#290)
This commit is contained in:
parent
108679e137
commit
3096ff6235
8 changed files with 32 additions and 25 deletions
2
assets/js/theme.min.js
vendored
2
assets/js/theme.min.js
vendored
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -116,7 +116,7 @@ enableEmoji = true
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "algolia"
|
type = "algolia"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# max number of results length
|
# max number of results length
|
||||||
|
@ -310,7 +310,7 @@ enableEmoji = true
|
||||||
# 搜索引擎的类型 ("lunr", "algolia")
|
# 搜索引擎的类型 ("lunr", "algolia")
|
||||||
type = "algolia"
|
type = "algolia"
|
||||||
# 文章内容最长索引长度
|
# 文章内容最长索引长度
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# 搜索框的占位提示语
|
# 搜索框的占位提示语
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# 最大结果数目
|
# 最大结果数目
|
||||||
|
@ -500,7 +500,7 @@ enableEmoji = true
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "algolia"
|
type = "algolia"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# max number of results length
|
# max number of results length
|
||||||
|
|
|
@ -222,7 +222,7 @@ Please open the code block below to view the complete sample configuration :(far
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# {{< version 0.2.1 >}} max number of results length
|
# {{< version 0.2.1 >}} max number of results length
|
||||||
|
@ -972,7 +972,7 @@ Here is the search configuration in your [site configuration](#site-configuratio
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# {{< version 0.2.1 >}} max number of results length
|
# {{< version 0.2.1 >}} max number of results length
|
||||||
|
@ -992,8 +992,8 @@ The following is a comparison of two search engines:
|
||||||
but high bandwidth and low performance (Especially for Chinese which needs a large segmentit library)
|
but high bandwidth and low performance (Especially for Chinese which needs a large segmentit library)
|
||||||
* `algolia`: high performance and low bandwidth, but need to synchronize `index.json` and limit for `contentLength`
|
* `algolia`: high performance and low bandwidth, but need to synchronize `index.json` and limit for `contentLength`
|
||||||
|
|
||||||
{{< version 0.2.1 >}} The content of the post is separated by `h2` HTML tag to improve query performance and basically implement full-text search.
|
{{< version 0.2.3 >}} The content of the post is separated by `h2` and `h3` HTML tag to improve query performance and basically implement full-text search.
|
||||||
`contentLength` is used to limit the max index length of the part starting with `h2` HTML tag.
|
`contentLength` is used to limit the max index length of the part starting with `h2` and `h3` HTML tag.
|
||||||
{{< /admonition >}}
|
{{< /admonition >}}
|
||||||
|
|
||||||
{{< admonition tip "Tips about algolia" >}}
|
{{< admonition tip "Tips about algolia" >}}
|
||||||
|
|
|
@ -227,7 +227,7 @@ Please open the code block below to view the complete sample configuration :(far
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# {{< version 0.2.1 >}} max number of results length
|
# {{< version 0.2.1 >}} max number of results length
|
||||||
|
@ -977,7 +977,7 @@ Here is the search configuration in your [site configuration](#site-configuratio
|
||||||
# type of search engine ("lunr", "algolia")
|
# type of search engine ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# max index length of the chunked content
|
# max index length of the chunked content
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# placeholder of the search bar
|
# placeholder of the search bar
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# {{< version 0.2.1 >}} max number of results length
|
# {{< version 0.2.1 >}} max number of results length
|
||||||
|
@ -997,8 +997,8 @@ The following is a comparison of two search engines:
|
||||||
but high bandwidth and low performance (Especially for Chinese which needs a large segmentit library)
|
but high bandwidth and low performance (Especially for Chinese which needs a large segmentit library)
|
||||||
* `algolia`: high performance and low bandwidth, but need to synchronize `index.json` and limit for `contentLength`
|
* `algolia`: high performance and low bandwidth, but need to synchronize `index.json` and limit for `contentLength`
|
||||||
|
|
||||||
{{< version 0.2.1 >}} The content of the post is separated by `h2` HTML tag to improve query performance and basically implement full-text search.
|
{{< version 0.2.3 >}} The content of the post is separated by `h2` and `h3` HTML tag to improve query performance and basically implement full-text search.
|
||||||
`contentLength` is used to limit the max index length of the part starting with `h2` HTML tag.
|
`contentLength` is used to limit the max index length of the part starting with `h2` and `h3` HTML tag.
|
||||||
{{< /admonition >}}
|
{{< /admonition >}}
|
||||||
|
|
||||||
{{< admonition tip "Tips about algolia" >}}
|
{{< admonition tip "Tips about algolia" >}}
|
||||||
|
|
|
@ -225,7 +225,7 @@ hugo
|
||||||
# 搜索引擎的类型 ("lunr", "algolia")
|
# 搜索引擎的类型 ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# 文章内容最长索引长度
|
# 文章内容最长索引长度
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# 搜索框的占位提示语
|
# 搜索框的占位提示语
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# 最大结果数目
|
# 最大结果数目
|
||||||
|
@ -977,7 +977,7 @@ defaultContentLanguage = "zh-cn"
|
||||||
# 搜索引擎的类型 ("lunr", "algolia")
|
# 搜索引擎的类型 ("lunr", "algolia")
|
||||||
type = "lunr"
|
type = "lunr"
|
||||||
# 文章内容最长索引长度
|
# 文章内容最长索引长度
|
||||||
contentLength = 5000
|
contentLength = 4000
|
||||||
# 搜索框的占位提示语
|
# 搜索框的占位提示语
|
||||||
placeholder = ""
|
placeholder = ""
|
||||||
# 最大结果数目
|
# 最大结果数目
|
||||||
|
@ -996,8 +996,8 @@ defaultContentLanguage = "zh-cn"
|
||||||
* `lunr`: 简单, 无需同步 `index.json`, 没有 `contentLength` 的限制, 但占用带宽大且性能低 (特别是中文需要一个较大的分词依赖库)
|
* `lunr`: 简单, 无需同步 `index.json`, 没有 `contentLength` 的限制, 但占用带宽大且性能低 (特别是中文需要一个较大的分词依赖库)
|
||||||
* `algolia`: 高性能并且占用带宽低, 但需要同步 `index.json` 且有 `contentLength` 的限制
|
* `algolia`: 高性能并且占用带宽低, 但需要同步 `index.json` 且有 `contentLength` 的限制
|
||||||
|
|
||||||
{{< version 0.2.1 >}} 文章内容被 `h2` HTML 标签切分来提供查询效果并且基本实现全文搜索.
|
{{< version 0.2.3 >}} 文章内容被 `h2` 和 `h3` HTML 标签切分来提供查询效果并且基本实现全文搜索.
|
||||||
`contentLength` 用来限制 `h2` HTML 标签开头的内容部分的最大长度.
|
`contentLength` 用来限制 `h2` 和 `h3` HTML 标签开头的内容部分的最大长度.
|
||||||
{{< /admonition >}}
|
{{< /admonition >}}
|
||||||
|
|
||||||
{{< admonition tip "关于 algolia 的使用技巧" >}}
|
{{< admonition tip "关于 algolia 的使用技巧" >}}
|
||||||
|
|
|
@ -14,16 +14,23 @@
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- $params := .Params | merge $.Site.Params.page -}}
|
{{- $params := .Params | merge $.Site.Params.page -}}
|
||||||
{{- $content := dict "content" .Content "ruby" $params.ruby "fraction" $params.fraction "fontawesome" $params.fontawesome | partial "function/content.html" -}}
|
{{- $content := dict "content" .Content "ruby" $params.ruby "fraction" $params.fraction "fontawesome" $params.fontawesome | partial "function/content.html" -}}
|
||||||
{{- range $i, $chunked := split $content "<h2 id=" -}}
|
{{- range $i, $contenti := split $content "<h2 id=" -}}
|
||||||
{{- if gt $i 0 -}}
|
{{- if gt $i 0 -}}
|
||||||
{{- $chunked = printf "<h2 id=%s" $chunked -}}
|
{{- $contenti = printf "<h2 id=%s" $contenti -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- $chunked = $chunked | plainify | htmlUnescape | replace "\n" " " | replace "\t" " " | replaceRE " +" " " -}}
|
{{- range $j, $contentj := split $contenti "<h3 id=" -}}
|
||||||
{{- if gt $.Site.Params.search.contentLength 0 -}}
|
{{- if gt $j 0 -}}
|
||||||
{{- $chunked = substr $chunked 0 $.Site.Params.search.contentLength -}}
|
{{- $contentj = printf "<h3 id=%s" $contentj -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- $contentj = $contentj | plainify | htmlUnescape | replaceRE `[\n\t ]+` " " -}}
|
||||||
|
{{- if gt $.Site.Params.search.contentLength 0 -}}
|
||||||
|
{{- $contentj = substr $contentj 0 $.Site.Params.search.contentLength -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- if $contentj | and (ne $contentj " ") -}}
|
||||||
|
{{- $one := printf "%s:%d:%d" $uri $i $j | dict "content" $contentj "objectID" | merge $meta -}}
|
||||||
|
{{- $index = $index | append $one -}}
|
||||||
|
{{- end -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- $one := printf "%s:%d" $uri $i | dict "content" $chunked "objectID" | merge $meta -}}
|
|
||||||
{{- $index = $index | append $one -}}
|
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
||||||
|
|
|
@ -237,7 +237,7 @@ class Theme {
|
||||||
this._algoliaIndex
|
this._algoliaIndex
|
||||||
.search(query, {
|
.search(query, {
|
||||||
offset: 0,
|
offset: 0,
|
||||||
length: searchConfig.maxResultLength * 3,
|
length: searchConfig.maxResultLength * 10,
|
||||||
attributesToHighlight: ['title'],
|
attributesToHighlight: ['title'],
|
||||||
attributesToSnippet: ['content:30'],
|
attributesToSnippet: ['content:30'],
|
||||||
highlightPreTag: `<${searchConfig.highlightTag}>`,
|
highlightPreTag: `<${searchConfig.highlightTag}>`,
|
||||||
|
|
Loading…
Reference in a new issue