mirror of
https://github.com/mastodon/mastodon.git
synced 2024-12-22 21:06:01 +01:00
Full-text search for authorized statuses (#6423)
* Add full-text search for authorized statuses - Search API will return statuses that match the query - Only for logged in users - Only if you are author of the status, - Or you were mentioned in it - Or you favourited or reblogged it - Configuration over `ES_ENABLED`, `ES_HOST`, `ES_PORT`, `ES_PREFIX` - Run `rails chewy:deploy` to create & populate index Fix #5880 Fix #4293 Fix #1152 * Add commented out docker-compose configuration for ES container * Optimize index import, filter search results * Add basic normalization to the index * Add better stemming and normalization to the index * Skip webfinger request if search query includes both @ and a space * Fix code style * Visually separate search result sections * Fix code style issues
This commit is contained in:
parent
235c14c79d
commit
3ebc0ad4d3
13 changed files with 230 additions and 5 deletions
|
@ -9,6 +9,10 @@ DB_USER=postgres
|
||||||
DB_NAME=postgres
|
DB_NAME=postgres
|
||||||
DB_PASS=
|
DB_PASS=
|
||||||
DB_PORT=5432
|
DB_PORT=5432
|
||||||
|
# Optional ElasticSearch configuration
|
||||||
|
# ES_ENABLED=true
|
||||||
|
# ES_HOST=localhost
|
||||||
|
# ES_PORT=9200
|
||||||
|
|
||||||
# Federation
|
# Federation
|
||||||
# Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation.
|
# Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation.
|
||||||
|
|
1
Gemfile
1
Gemfile
|
@ -27,6 +27,7 @@ gem 'bootsnap'
|
||||||
gem 'browser'
|
gem 'browser'
|
||||||
gem 'charlock_holmes', '~> 0.7.5'
|
gem 'charlock_holmes', '~> 0.7.5'
|
||||||
gem 'iso-639'
|
gem 'iso-639'
|
||||||
|
gem 'chewy', '~> 0.10', git: 'https://github.com/toptal/chewy.git'
|
||||||
gem 'cld3', '~> 3.2.0'
|
gem 'cld3', '~> 3.2.0'
|
||||||
gem 'devise', '~> 4.4'
|
gem 'devise', '~> 4.4'
|
||||||
gem 'devise-two-factor', '~> 3.0'
|
gem 'devise-two-factor', '~> 3.0'
|
||||||
|
|
22
Gemfile.lock
22
Gemfile.lock
|
@ -1,3 +1,12 @@
|
||||||
|
GIT
|
||||||
|
remote: https://github.com/toptal/chewy.git
|
||||||
|
revision: a7d21eb4b0bd7415533ef134bb6d31b2df309701
|
||||||
|
specs:
|
||||||
|
chewy (0.10.1)
|
||||||
|
activesupport (>= 4.0)
|
||||||
|
elasticsearch (>= 2.0.0)
|
||||||
|
elasticsearch-dsl
|
||||||
|
|
||||||
GEM
|
GEM
|
||||||
remote: https://rubygems.org/
|
remote: https://rubygems.org/
|
||||||
specs:
|
specs:
|
||||||
|
@ -154,6 +163,15 @@ GEM
|
||||||
json
|
json
|
||||||
thread
|
thread
|
||||||
thread_safe
|
thread_safe
|
||||||
|
elasticsearch (6.0.1)
|
||||||
|
elasticsearch-api (= 6.0.1)
|
||||||
|
elasticsearch-transport (= 6.0.1)
|
||||||
|
elasticsearch-api (6.0.1)
|
||||||
|
multi_json
|
||||||
|
elasticsearch-dsl (0.1.5)
|
||||||
|
elasticsearch-transport (6.0.1)
|
||||||
|
faraday
|
||||||
|
multi_json
|
||||||
encryptor (3.0.0)
|
encryptor (3.0.0)
|
||||||
erubi (1.7.0)
|
erubi (1.7.0)
|
||||||
et-orbi (1.0.8)
|
et-orbi (1.0.8)
|
||||||
|
@ -163,6 +181,8 @@ GEM
|
||||||
fabrication (2.18.0)
|
fabrication (2.18.0)
|
||||||
faker (1.8.4)
|
faker (1.8.4)
|
||||||
i18n (~> 0.5)
|
i18n (~> 0.5)
|
||||||
|
faraday (0.14.0)
|
||||||
|
multipart-post (>= 1.2, < 3)
|
||||||
fast_blank (1.0.0)
|
fast_blank (1.0.0)
|
||||||
ffi (1.9.18)
|
ffi (1.9.18)
|
||||||
fog-core (1.45.0)
|
fog-core (1.45.0)
|
||||||
|
@ -291,6 +311,7 @@ GEM
|
||||||
minitest (5.11.3)
|
minitest (5.11.3)
|
||||||
msgpack (1.1.0)
|
msgpack (1.1.0)
|
||||||
multi_json (1.12.2)
|
multi_json (1.12.2)
|
||||||
|
multipart-post (2.0.0)
|
||||||
net-scp (1.2.1)
|
net-scp (1.2.1)
|
||||||
net-ssh (>= 2.6.5)
|
net-ssh (>= 2.6.5)
|
||||||
net-ssh (4.2.0)
|
net-ssh (4.2.0)
|
||||||
|
@ -583,6 +604,7 @@ DEPENDENCIES
|
||||||
capistrano-yarn (~> 2.0)
|
capistrano-yarn (~> 2.0)
|
||||||
capybara (~> 2.15)
|
capybara (~> 2.15)
|
||||||
charlock_holmes (~> 0.7.5)
|
charlock_holmes (~> 0.7.5)
|
||||||
|
chewy (~> 0.10)!
|
||||||
cld3 (~> 3.2.0)
|
cld3 (~> 3.2.0)
|
||||||
climate_control (~> 0.2)
|
climate_control (~> 0.2)
|
||||||
devise (~> 4.4)
|
devise (~> 4.4)
|
||||||
|
|
61
app/chewy/statuses_index.rb
Normal file
61
app/chewy/statuses_index.rb
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class StatusesIndex < Chewy::Index
|
||||||
|
settings index: { refresh_interval: '15m' }, analysis: {
|
||||||
|
filter: {
|
||||||
|
english_stop: {
|
||||||
|
type: 'stop',
|
||||||
|
stopwords: '_english_',
|
||||||
|
},
|
||||||
|
english_stemmer: {
|
||||||
|
type: 'stemmer',
|
||||||
|
language: 'english',
|
||||||
|
},
|
||||||
|
english_possessive_stemmer: {
|
||||||
|
type: 'stemmer',
|
||||||
|
language: 'possessive_english',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
analyzer: {
|
||||||
|
content: {
|
||||||
|
tokenizer: 'uax_url_email',
|
||||||
|
filter: %w(
|
||||||
|
english_possessive_stemmer
|
||||||
|
lowercase
|
||||||
|
asciifolding
|
||||||
|
cjk_width
|
||||||
|
english_stop
|
||||||
|
english_stemmer
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
define_type ::Status.without_reblogs do
|
||||||
|
crutch :mentions do |collection|
|
||||||
|
data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
|
||||||
|
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||||
|
end
|
||||||
|
|
||||||
|
crutch :favourites do |collection|
|
||||||
|
data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
|
||||||
|
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||||
|
end
|
||||||
|
|
||||||
|
crutch :reblogs do |collection|
|
||||||
|
data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id)
|
||||||
|
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||||
|
end
|
||||||
|
|
||||||
|
root date_detection: false do
|
||||||
|
field :account_id, type: 'long'
|
||||||
|
|
||||||
|
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].join("\n\n") } do
|
||||||
|
field :stemmed, type: 'text', analyzer: 'content'
|
||||||
|
end
|
||||||
|
|
||||||
|
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
|
||||||
|
field :created_at, type: 'date'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -22,6 +22,8 @@ export default class SearchResults extends ImmutablePureComponent {
|
||||||
count += results.get('accounts').size;
|
count += results.get('accounts').size;
|
||||||
accounts = (
|
accounts = (
|
||||||
<div className='search-results__section'>
|
<div className='search-results__section'>
|
||||||
|
<h5><FormattedMessage id='search_results.accounts' defaultMessage='People' /></h5>
|
||||||
|
|
||||||
{results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)}
|
{results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
@ -31,6 +33,8 @@ export default class SearchResults extends ImmutablePureComponent {
|
||||||
count += results.get('statuses').size;
|
count += results.get('statuses').size;
|
||||||
statuses = (
|
statuses = (
|
||||||
<div className='search-results__section'>
|
<div className='search-results__section'>
|
||||||
|
<h5><FormattedMessage id='search_results.statuses' defaultMessage='Toots' /></h5>
|
||||||
|
|
||||||
{results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)}
|
{results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
@ -40,6 +44,8 @@ export default class SearchResults extends ImmutablePureComponent {
|
||||||
count += results.get('hashtags').size;
|
count += results.get('hashtags').size;
|
||||||
hashtags = (
|
hashtags = (
|
||||||
<div className='search-results__section'>
|
<div className='search-results__section'>
|
||||||
|
<h5><FormattedMessage id='search_results.hashtags' defaultMessage='Hashtags' /></h5>
|
||||||
|
|
||||||
{results.get('hashtags').map(hashtag => (
|
{results.get('hashtags').map(hashtag => (
|
||||||
<Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}>
|
<Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}>
|
||||||
#{hashtag}
|
#{hashtag}
|
||||||
|
|
|
@ -1786,7 +1786,7 @@
|
||||||
flex: 1;
|
flex: 1;
|
||||||
min-height: 47px;
|
min-height: 47px;
|
||||||
|
|
||||||
> img {
|
> img {
|
||||||
display: block;
|
display: block;
|
||||||
object-fit: contain;
|
object-fit: contain;
|
||||||
object-position: bottom left;
|
object-position: bottom left;
|
||||||
|
@ -3229,6 +3229,43 @@
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.search-results__section {
|
||||||
|
margin-bottom: 20px;
|
||||||
|
|
||||||
|
h5 {
|
||||||
|
position: relative;
|
||||||
|
|
||||||
|
&::before {
|
||||||
|
content: "";
|
||||||
|
display: block;
|
||||||
|
position: absolute;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
top: 50%;
|
||||||
|
width: 100%;
|
||||||
|
height: 0;
|
||||||
|
border-top: 1px solid lighten($ui-base-color, 8%);
|
||||||
|
}
|
||||||
|
|
||||||
|
span {
|
||||||
|
display: inline-block;
|
||||||
|
background: $ui-base-color;
|
||||||
|
color: $ui-primary-color;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 500;
|
||||||
|
padding: 10px;
|
||||||
|
position: relative;
|
||||||
|
z-index: 1;
|
||||||
|
cursor: default;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.account:last-child,
|
||||||
|
& > div:last-child .status {
|
||||||
|
border-bottom: 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
.search-results__hashtag {
|
.search-results__hashtag {
|
||||||
display: block;
|
display: block;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
|
|
|
@ -9,6 +9,7 @@ class StatusFilter
|
||||||
end
|
end
|
||||||
|
|
||||||
def filtered?
|
def filtered?
|
||||||
|
return false if !account.nil? && account.id == status.account_id
|
||||||
blocked_by_policy? || (account_present? && filtered_status?) || silenced_account?
|
blocked_by_policy? || (account_present? && filtered_status?) || silenced_account?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,8 @@
|
||||||
class Favourite < ApplicationRecord
|
class Favourite < ApplicationRecord
|
||||||
include Paginable
|
include Paginable
|
||||||
|
|
||||||
|
update_index('statuses#status', :status) if Chewy.enabled?
|
||||||
|
|
||||||
belongs_to :account, inverse_of: :favourites
|
belongs_to :account, inverse_of: :favourites
|
||||||
belongs_to :status, inverse_of: :favourites, counter_cache: true
|
belongs_to :status, inverse_of: :favourites, counter_cache: true
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,8 @@ class Status < ApplicationRecord
|
||||||
include Cacheable
|
include Cacheable
|
||||||
include StatusThreadingConcern
|
include StatusThreadingConcern
|
||||||
|
|
||||||
|
update_index('statuses#status', :proper) if Chewy.enabled?
|
||||||
|
|
||||||
enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility
|
enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility
|
||||||
|
|
||||||
belongs_to :application, class_name: 'Doorkeeper::Application', optional: true
|
belongs_to :application, class_name: 'Doorkeeper::Application', optional: true
|
||||||
|
@ -78,6 +80,22 @@ class Status < ApplicationRecord
|
||||||
|
|
||||||
delegate :domain, to: :account, prefix: true
|
delegate :domain, to: :account, prefix: true
|
||||||
|
|
||||||
|
def searchable_by(preloaded = nil)
|
||||||
|
ids = [account_id]
|
||||||
|
|
||||||
|
if preloaded.nil?
|
||||||
|
ids += mentions.pluck(:account_id)
|
||||||
|
ids += favourites.pluck(:account_id)
|
||||||
|
ids += reblogs.pluck(:account_id)
|
||||||
|
else
|
||||||
|
ids += preloaded.mentions[id] || []
|
||||||
|
ids += preloaded.favourites[id] || []
|
||||||
|
ids += preloaded.reblogs[id] || []
|
||||||
|
end
|
||||||
|
|
||||||
|
ids.uniq
|
||||||
|
end
|
||||||
|
|
||||||
def reply?
|
def reply?
|
||||||
!in_reply_to_id.nil? || attributes['reply']
|
!in_reply_to_id.nil? || attributes['reply']
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,21 +1,43 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class SearchService < BaseService
|
class SearchService < BaseService
|
||||||
attr_accessor :query
|
attr_accessor :query, :account, :limit, :resolve
|
||||||
|
|
||||||
def call(query, limit, resolve = false, account = nil)
|
def call(query, limit, resolve = false, account = nil)
|
||||||
@query = query
|
@query = query
|
||||||
|
@account = account
|
||||||
|
@limit = limit
|
||||||
|
@resolve = resolve
|
||||||
|
|
||||||
default_results.tap do |results|
|
default_results.tap do |results|
|
||||||
if url_query?
|
if url_query?
|
||||||
results.merge!(url_resource_results) unless url_resource.nil?
|
results.merge!(url_resource_results) unless url_resource.nil?
|
||||||
elsif query.present?
|
elsif query.present?
|
||||||
results[:accounts] = AccountSearchService.new.call(query, limit, account, resolve: resolve)
|
results[:accounts] = perform_accounts_search! if account_searchable?
|
||||||
results[:hashtags] = Tag.search_for(query.gsub(/\A#/, ''), limit) unless query.start_with?('@')
|
results[:statuses] = perform_statuses_search! if full_text_searchable?
|
||||||
|
results[:hashtags] = perform_hashtags_search! if hashtag_searchable?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def perform_accounts_search!
|
||||||
|
AccountSearchService.new.call(query, limit, account, resolve: resolve)
|
||||||
|
end
|
||||||
|
|
||||||
|
def perform_statuses_search!
|
||||||
|
statuses = StatusesIndex.filter(term: { searchable_by: account.id })
|
||||||
|
.query(multi_match: { type: 'most_fields', query: query, operator: 'and', fields: %w(text text.stemmed) })
|
||||||
|
.limit(limit).objects
|
||||||
|
|
||||||
|
statuses.reject { |status| StatusFilter.new(status, account).filtered? }
|
||||||
|
end
|
||||||
|
|
||||||
|
def perform_hashtags_search!
|
||||||
|
Tag.search_for(query.gsub(/\A#/, ''), limit)
|
||||||
|
end
|
||||||
|
|
||||||
def default_results
|
def default_results
|
||||||
{ accounts: [], hashtags: [], statuses: [] }
|
{ accounts: [], hashtags: [], statuses: [] }
|
||||||
end
|
end
|
||||||
|
@ -35,4 +57,17 @@ class SearchService < BaseService
|
||||||
def url_resource_symbol
|
def url_resource_symbol
|
||||||
url_resource.class.name.downcase.pluralize.to_sym
|
url_resource.class.name.downcase.pluralize.to_sym
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def full_text_searchable?
|
||||||
|
return false unless Chewy.enabled?
|
||||||
|
!account.nil? && !((query.start_with?('#') || query.include?('@')) && !query.include?(' '))
|
||||||
|
end
|
||||||
|
|
||||||
|
def account_searchable?
|
||||||
|
!(query.include?('@') && query.include?(' '))
|
||||||
|
end
|
||||||
|
|
||||||
|
def hashtag_searchable?
|
||||||
|
!query.include?('@')
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
22
config/initializers/chewy.rb
Normal file
22
config/initializers/chewy.rb
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
enabled = ENV['ES_ENABLED'] == 'true'
|
||||||
|
host = ENV.fetch('ES_HOST') { 'localhost' }
|
||||||
|
port = ENV.fetch('ES_PORT') { 9200 }
|
||||||
|
fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil }
|
||||||
|
prefix = ENV.fetch('ES_PREFIX') { fallback_prefix }
|
||||||
|
|
||||||
|
Chewy.settings = {
|
||||||
|
host: "#{host}:#{port}",
|
||||||
|
prefix: prefix,
|
||||||
|
enabled: enabled,
|
||||||
|
journal: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
Chewy.root_strategy = enabled ? :sidekiq : :bypass
|
||||||
|
|
||||||
|
module Chewy
|
||||||
|
class << self
|
||||||
|
def enabled?
|
||||||
|
settings[:enabled]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -19,6 +19,17 @@ services:
|
||||||
# volumes:
|
# volumes:
|
||||||
# - ./redis:/data
|
# - ./redis:/data
|
||||||
|
|
||||||
|
# es:
|
||||||
|
# restart: always
|
||||||
|
# image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.3
|
||||||
|
# environment:
|
||||||
|
# - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||||
|
# networks:
|
||||||
|
# - internal_network
|
||||||
|
#### Uncomment to enable ES persistance
|
||||||
|
## volumes:
|
||||||
|
## - ./elasticsearch:/usr/share/elasticsearch/data
|
||||||
|
|
||||||
web:
|
web:
|
||||||
build: .
|
build: .
|
||||||
image: gargron/mastodon
|
image: gargron/mastodon
|
||||||
|
@ -33,6 +44,7 @@ services:
|
||||||
depends_on:
|
depends_on:
|
||||||
- db
|
- db
|
||||||
- redis
|
- redis
|
||||||
|
# - es
|
||||||
volumes:
|
volumes:
|
||||||
- ./public/assets:/mastodon/public/assets
|
- ./public/assets:/mastodon/public/assets
|
||||||
- ./public/packs:/mastodon/public/packs
|
- ./public/packs:/mastodon/public/packs
|
||||||
|
|
|
@ -25,6 +25,10 @@ RSpec.configure do |config|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
config.before :suite do
|
||||||
|
Chewy.strategy(:bypass)
|
||||||
|
end
|
||||||
|
|
||||||
config.after :suite do
|
config.after :suite do
|
||||||
gc_counter = 0
|
gc_counter = 0
|
||||||
FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"])
|
FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"])
|
||||||
|
|
Loading…
Reference in a new issue