~cytrogen/masto-fe

72423bc8f69ea54faf29eefe6aff32f6d27c217e — Eugen Rochko 2 years ago 60fbb0f
Change account search tokenizer and queries (#26378)

2 files changed, 144 insertions(+), 89 deletions(-)

M app/chewy/accounts_index.rb
M app/services/account_search_service.rb
M app/chewy/accounts_index.rb => app/chewy/accounts_index.rb +1 -1
@@ 33,7 33,7 @@ class AccountsIndex < Chewy::Index
      },

      verbatim: {
        tokenizer: 'whitespace',
        tokenizer: 'standard',
        filter: %w(lowercase asciifolding cjk_width),
      },


M app/services/account_search_service.rb => app/services/account_search_service.rb +143 -88
@@ 8,6 8,143 @@ class AccountSearchService < BaseService
  # Min. number of characters to look for non-exact matches
  MIN_QUERY_LENGTH = 5

  class QueryBuilder
    def initialize(query, account, options = {})
      @query = query
      @account = account
      @options = options
    end

    def build
      AccountsIndex.query(
        bool: {
          must: {
            function_score: {
              query: {
                bool: {
                  must: must_clauses,
                },
              },

              functions: [
                reputation_score_function,
                followers_score_function,
                time_distance_function,
              ],
            },
          },

          should: should_clauses,
        }
      )
    end

    private

    def must_clauses
      if @account && @options[:following]
        [core_query, only_following_query]
      else
        [core_query]
      end
    end

    def should_clauses
      if @account && !@options[:following]
        [boost_following_query]
      else
        []
      end
    end

    # This function limits results to only the accounts the user is following
    def only_following_query
      {
        terms: {
          id: following_ids,
        },
      }
    end

    # This function promotes accounts the user is following
    def boost_following_query
      {
        terms: {
          id: following_ids,
          boost: 100,
        },
      }
    end

    # This function deranks accounts that follow more people than follow them
    def reputation_score_function
      {
        script_score: {
          script: {
            source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
          },
        },
      }
    end

    # This function promotes accounts that have more followers
    def followers_score_function
      {
        script_score: {
          script: {
            source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
          },
        },
      }
    end

    # This function deranks accounts that haven't posted in a long time
    def time_distance_function
      {
        gauss: {
          last_status_at: {
            scale: '30d',
            offset: '30d',
            decay: 0.3,
          },
        },
      }
    end

    def following_ids
      @following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
    end
  end

  class AutocompleteQueryBuilder < QueryBuilder
    private

    def core_query
      {
        multi_match: {
          query: @query,
          type: 'bool_prefix',
          fields: %w(username username.* display_name display_name.*),
        },
      }
    end
  end

  class FullQueryBuilder < QueryBuilder
    private

    def core_query
      {
        multi_match: {
          query: @query,
          type: 'most_fields',
          fields: %w(username^2 display_name^2 text text.*),
          operator: 'and',
        },
      }
    end
  end

  def call(query, account = nil, options = {})
    @query   = query&.strip&.gsub(/\A@/, '')
    @limit   = options[:limit].to_i


@@ 71,27 208,15 @@ class AccountSearchService < BaseService
  end

  def from_elasticsearch
    must_clauses   = must_clause
    should_clauses = should_clause

    if account
      return [] if options[:following] && following_ids.empty?

      if options[:following]
        must_clauses << { terms: { id: following_ids } }
      elsif following_ids.any?
        should_clauses << { terms: { id: following_ids, boost: 100 } }
    query_builder = begin
      if options[:use_searchable_text]
        FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
      else
        AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
      end
    end

    query     = { bool: { must: must_clauses, should: should_clauses } }
    functions = [reputation_score_function, followers_score_function, time_distance_function]

    records = AccountsIndex.query(function_score: { query: query, functions: functions })
                           .limit(limit_for_non_exact_results)
                           .offset(offset)
                           .objects
                           .compact
    records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact

    ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)



@@ 100,76 225,6 @@ class AccountSearchService < BaseService
    nil
  end

  def reputation_score_function
    {
      script_score: {
        script: {
          source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
        },
      },
    }
  end

  def followers_score_function
    {
      script_score: {
        script: {
          source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
        },
      },
    }
  end

  def time_distance_function
    {
      gauss: {
        last_status_at: {
          scale: '30d',
          offset: '30d',
          decay: 0.3,
        },
      },
    }
  end

  def must_clause
    if options[:start_with_hashtag]
      fields = %w(text text.*)
    else
      fields = %w(username username.* display_name display_name.*)
      fields << 'text' << 'text.*' if options[:use_searchable_text]
    end

    [
      {
        multi_match: {
          query: terms_for_query,
          fields: fields,
          type: 'best_fields',
          operator: 'or',
        },
      },
    ]
  end

  def should_clause
    [
      {
        multi_match: {
          query: terms_for_query,
          fields: %w(username username.* display_name display_name.*),
          type: 'best_fields',
          operator: 'and',
          boost: 10,
        },
      },
    ]
  end

  def following_ids
    @following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
  end

  def limit_for_non_exact_results
    return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH