~cytrogen/masto-fe

501d6197c4a32172e2340c90379b9c3fdb925c08 — Claire 2 years ago fbb4de3
Change automatic post deletion thresholds and load detection (#24614)

1 files changed, 25 insertions(+), 22 deletions(-)

M app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
M app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb => app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +25 -22
@@ 7,28 7,30 @@ class Scheduler::AccountsStatusesCleanupScheduler
  # This limit is mostly to be nice to the fediverse at large and not
  # generate too much traffic.
  # This also helps limiting the running time of the scheduler itself.
  MAX_BUDGET         = 150
  MAX_BUDGET         = 300

  # This is an attempt to spread the load across instances, as various
  # accounts are likely to have various followers.
  # This is an attempt to spread the load across remote servers, as
  # spreading deletions across diverse accounts is likely to spread
  # the deletion across diverse followers. It also helps each individual
  # user see some effect sooner.
  PER_ACCOUNT_BUDGET = 5

  # This is an attempt to limit the workload generated by status removal
  # jobs to something the particular instance can handle.
  PER_THREAD_BUDGET  = 6

  # Those avoid loading an instance that is already under load
  MAX_DEFAULT_SIZE    = 200
  MAX_DEFAULT_LATENCY = 5
  MAX_PUSH_SIZE       = 500
  MAX_PUSH_LATENCY    = 10

  # 'pull' queue has lower priority jobs, and it's unlikely that pushing
  # deletes would cause much issues with this queue if it didn't cause issues
  # with default and push. Yet, do not enqueue deletes if the instance is
  # lagging behind too much.
  MAX_PULL_SIZE       = 10_000
  MAX_PULL_LATENCY    = 5.minutes.to_i
  # jobs to something the particular server can handle.
  PER_THREAD_BUDGET  = 5

  # These are latency limits on various queues above which a server is
  # considered to be under load, causing the auto-deletion to be entirely
  # skipped for that run.
  LOAD_LATENCY_THRESHOLDS = {
    default: 5,
    push: 10,
    # The `pull` queue has lower priority jobs, and it's unlikely that
    # pushing deletes would cause much issues with this queue if it didn't
    # cause issues with `default` and `push`. Yet, do not enqueue deletes
    # if the instance is lagging behind too much.
    pull: 5.minutes.to_i,
  }.freeze

  sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i



@@ 62,19 64,20 @@ class Scheduler::AccountsStatusesCleanupScheduler
  end

  def compute_budget
    # Each post deletion is a `RemovalWorker` job (on `default` queue), each
    # potentially spawning many `ActivityPub::DeliveryWorker` jobs (on the `push` queue).
    threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.pluck('concurrency').sum
    [PER_THREAD_BUDGET * threads, MAX_BUDGET].min
  end

  def under_load?
    queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
    LOAD_LATENCY_THRESHOLDS.any? { |queue, max_latency| queue_under_load?(queue, max_latency) }
  end

  private

  def queue_under_load?(name, max_size, max_latency)
    queue = Sidekiq::Queue.new(name)
    queue.size > max_size || queue.latency > max_latency
  def queue_under_load?(name, max_latency)
    Sidekiq::Queue.new(name).latency > max_latency
  end

  def last_processed_id