~cytrogen/masto-fe

38b2974a83a0fcbfffb01f87c332d70dd4f49ef9 — Matt Jankowski 2 years ago f1c1dd0
Extract AccountSearch concern from Account (#24716)

3 files changed, 141 insertions(+), 99 deletions(-)

M .rubocop_todo.yml
M app/models/account.rb
A app/models/concerns/account_search.rb
M .rubocop_todo.yml => .rubocop_todo.yml +0 -2
@@ 1994,7 1994,6 @@ Style/HashAsLastArrayItem:
  Exclude:
    - 'app/controllers/admin/statuses_controller.rb'
    - 'app/controllers/api/v1/statuses_controller.rb'
    - 'app/models/account.rb'
    - 'app/models/concerns/account_counters.rb'
    - 'app/models/concerns/status_threading_concern.rb'
    - 'app/models/status.rb'


@@ 2068,7 2067,6 @@ Style/MapToHash:
# SupportedStyles: literals, strict
Style/MutableConstant:
  Exclude:
    - 'app/models/account.rb'
    - 'app/models/tag.rb'
    - 'app/services/delete_account_service.rb'
    - 'config/initializers/twitter_regex.rb'

M app/models/account.rb => app/models/account.rb +1 -97
@@ 78,6 78,7 @@ class Account < ApplicationRecord
  include DomainNormalizable
  include DomainMaterializable
  include AccountMerging
  include AccountSearch

  enum protocol: { ostatus: 0, activitypub: 1 }
  enum suspension_origin: { local: 0, remote: 1 }, _prefix: true


@@ 410,14 411,6 @@ class Account < ApplicationRecord
  end

  class << self
    DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/
    TEXTSEARCH = "(setweight(to_tsvector('simple', accounts.display_name), 'A') || setweight(to_tsvector('simple', accounts.username), 'B') || setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C'))"

    REPUTATION_SCORE_FUNCTION = '(greatest(0, coalesce(s.followers_count, 0)) / (greatest(0, coalesce(s.following_count, 0)) + 1.0))'
    FOLLOWERS_SCORE_FUNCTION  = 'log(greatest(0, coalesce(s.followers_count, 0)) + 2)'
    TIME_DISTANCE_FUNCTION    = '(case when s.last_status_at is null then 0 else exp(-1.0 * ((greatest(0, abs(extract(DAY FROM age(s.last_status_at))) - 30.0)^2) / (2.0 * ((-1.0 * 30^2) / (2.0 * ln(0.3)))))) end)'
    BOOST                     = "((#{REPUTATION_SCORE_FUNCTION} + #{FOLLOWERS_SCORE_FUNCTION} + #{TIME_DISTANCE_FUNCTION}) / 3.0)"

    def readonly_attributes
      super - %w(statuses_count following_count followers_count)
    end


@@ 427,37 420,6 @@ class Account < ApplicationRecord
      DeliveryFailureTracker.without_unavailable(urls)
    end

    def search_for(terms, limit: 10, offset: 0)
      tsquery = generate_query_for_search(terms)

      sql = <<-SQL.squish
        SELECT
          accounts.*,
          #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank
        FROM accounts
        LEFT JOIN users ON accounts.id = users.account_id
        LEFT JOIN account_stats AS s ON accounts.id = s.account_id
        WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH}
          AND accounts.suspended_at IS NULL
          AND accounts.moved_to_account_id IS NULL
          AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
        ORDER BY rank DESC
        LIMIT :limit OFFSET :offset
      SQL

      records = find_by_sql([sql, limit: limit, offset: offset, tsquery: tsquery])
      ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
      records
    end

    def advanced_search_for(terms, account, limit: 10, following: false, offset: 0)
      tsquery = generate_query_for_search(terms)
      sql = advanced_search_for_sql_template(following)
      records = find_by_sql([sql, id: account.id, limit: limit, offset: offset, tsquery: tsquery])
      ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
      records
    end

    def from_text(text)
      return [] if text.blank?



@@ 471,64 433,6 @@ class Account < ApplicationRecord
        EntityCache.instance.mention(username, domain)
      end
    end

    private

    def generate_query_for_search(unsanitized_terms)
      terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ')

      # The final ":*" is for prefix search.
      # The trailing space does not seem to fit any purpose, but `to_tsquery`
      # behaves differently with and without a leading space if the terms start
      # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep
      # the same query.
      "' #{terms} ':*"
    end

    def advanced_search_for_sql_template(following)
      if following
        <<-SQL.squish
          WITH first_degree AS (
            SELECT target_account_id
            FROM follows
            WHERE account_id = :id
            UNION ALL
            SELECT :id
          )
          SELECT
            accounts.*,
            (count(f.id) + 1) * #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank
          FROM accounts
          LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id)
          LEFT JOIN account_stats AS s ON accounts.id = s.account_id
          WHERE accounts.id IN (SELECT * FROM first_degree)
            AND to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH}
            AND accounts.suspended_at IS NULL
            AND accounts.moved_to_account_id IS NULL
          GROUP BY accounts.id, s.id
          ORDER BY rank DESC
          LIMIT :limit OFFSET :offset
        SQL
      else
        <<-SQL.squish
          SELECT
            accounts.*,
            #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank,
            count(f.id) AS followed
          FROM accounts
          LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id)
          LEFT JOIN users ON accounts.id = users.account_id
          LEFT JOIN account_stats AS s ON accounts.id = s.account_id
          WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH}
            AND accounts.suspended_at IS NULL
            AND accounts.moved_to_account_id IS NULL
            AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
          GROUP BY accounts.id, s.id
          ORDER BY followed DESC, rank DESC
          LIMIT :limit OFFSET :offset
        SQL
      end
    end
  end

  def emojis

A app/models/concerns/account_search.rb => app/models/concerns/account_search.rb +140 -0
@@ 0,0 1,140 @@
# frozen_string_literal: true

module AccountSearch
  extend ActiveSupport::Concern

  DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/

  TEXT_SEARCH_RANKS = <<~SQL.squish
    (
        setweight(to_tsvector('simple', accounts.display_name), 'A') ||
        setweight(to_tsvector('simple', accounts.username), 'B') ||
        setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C')
    )
  SQL

  REPUTATION_SCORE_FUNCTION = <<~SQL.squish
    (
        greatest(0, coalesce(s.followers_count, 0)) / (
            greatest(0, coalesce(s.following_count, 0)) + 1.0
        )
    )
  SQL

  FOLLOWERS_SCORE_FUNCTION = <<~SQL.squish
    log(
        greatest(0, coalesce(s.followers_count, 0)) + 2
    )
  SQL

  TIME_DISTANCE_FUNCTION = <<~SQL.squish
    (
        case
            when s.last_status_at is null then 0
            else exp(
                -1.0 * (
                    (
                        greatest(0, abs(extract(DAY FROM age(s.last_status_at))) - 30.0)^2) /#{' '}
                        (2.0 * ((-1.0 * 30^2) / (2.0 * ln(0.3)))
                    )
                )
            )
        end
    )
  SQL

  BOOST = <<~SQL.squish
    (
        (#{REPUTATION_SCORE_FUNCTION} + #{FOLLOWERS_SCORE_FUNCTION} + #{TIME_DISTANCE_FUNCTION}) / 3.0
    )
  SQL

  BASIC_SEARCH_SQL = <<~SQL.squish
    SELECT
      accounts.*,
      #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank
    FROM accounts
    LEFT JOIN users ON accounts.id = users.account_id
    LEFT JOIN account_stats AS s ON accounts.id = s.account_id
    WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
      AND accounts.suspended_at IS NULL
      AND accounts.moved_to_account_id IS NULL
      AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
    ORDER BY rank DESC
    LIMIT :limit OFFSET :offset
  SQL

  ADVANCED_SEARCH_WITH_FOLLOWING = <<~SQL.squish
    WITH first_degree AS (
      SELECT target_account_id
      FROM follows
      WHERE account_id = :id
      UNION ALL
      SELECT :id
    )
    SELECT
      accounts.*,
      (count(f.id) + 1) * #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank
    FROM accounts
    LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id)
    LEFT JOIN account_stats AS s ON accounts.id = s.account_id
    WHERE accounts.id IN (SELECT * FROM first_degree)
      AND to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
      AND accounts.suspended_at IS NULL
      AND accounts.moved_to_account_id IS NULL
    GROUP BY accounts.id, s.id
    ORDER BY rank DESC
    LIMIT :limit OFFSET :offset
  SQL

  ADVANCED_SEARCH_WITHOUT_FOLLOWING = <<~SQL.squish
    SELECT
      accounts.*,
      #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank,
      count(f.id) AS followed
    FROM accounts
    LEFT OUTER JOIN follows AS f ON
      (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id)
    LEFT JOIN users ON accounts.id = users.account_id
    LEFT JOIN account_stats AS s ON accounts.id = s.account_id
    WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
      AND accounts.suspended_at IS NULL
      AND accounts.moved_to_account_id IS NULL
      AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
    GROUP BY accounts.id, s.id
    ORDER BY followed DESC, rank DESC
    LIMIT :limit OFFSET :offset
  SQL

  class_methods do
    def search_for(terms, limit: 10, offset: 0)
      tsquery = generate_query_for_search(terms)

      find_by_sql([BASIC_SEARCH_SQL, { limit: limit, offset: offset, tsquery: tsquery }]).tap do |records|
        ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
      end
    end

    def advanced_search_for(terms, account, limit: 10, following: false, offset: 0)
      tsquery = generate_query_for_search(terms)
      sql_template = following ? ADVANCED_SEARCH_WITH_FOLLOWING : ADVANCED_SEARCH_WITHOUT_FOLLOWING

      find_by_sql([sql_template, { id: account.id, limit: limit, offset: offset, tsquery: tsquery }]).tap do |records|
        ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
      end
    end

    private

    def generate_query_for_search(unsanitized_terms)
      terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ')

      # The final ":*" is for prefix search.
      # The trailing space does not seem to fit any purpose, but `to_tsquery`
      # behaves differently with and without a leading space if the terms start
      # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep
      # the same query.
      "' #{terms} ':*"
    end
  end
end