Refactor raw SQL queries to use Arel

This commit is contained in:
Eugen Rochko 2021-03-12 07:00:05 +01:00
parent b5057c4717
commit 19a64c9687
7 changed files with 301 additions and 211 deletions

View file

@ -26,6 +26,7 @@ gem 'streamio-ffmpeg', '~> 3.0'
gem 'blurhash', '~> 0.1' gem 'blurhash', '~> 0.1'
gem 'active_model_serializers', '~> 0.10' gem 'active_model_serializers', '~> 0.10'
gem 'activerecord-cte', '~> 0.1'
gem 'addressable', '~> 2.7' gem 'addressable', '~> 2.7'
gem 'bootsnap', '~> 1.6.0', require: false gem 'bootsnap', '~> 1.6.0', require: false
gem 'browser' gem 'browser'

View file

@ -54,6 +54,8 @@ GEM
activemodel (= 5.2.4.5) activemodel (= 5.2.4.5)
activesupport (= 5.2.4.5) activesupport (= 5.2.4.5)
arel (>= 9.0) arel (>= 9.0)
activerecord-cte (0.1.1)
activerecord
activestorage (5.2.4.5) activestorage (5.2.4.5)
actionpack (= 5.2.4.5) actionpack (= 5.2.4.5)
activerecord (= 5.2.4.5) activerecord (= 5.2.4.5)
@ -695,6 +697,7 @@ PLATFORMS
DEPENDENCIES DEPENDENCIES
active_model_serializers (~> 0.10) active_model_serializers (~> 0.10)
active_record_query_trace (~> 1.8) active_record_query_trace (~> 1.8)
activerecord-cte (~> 0.1)
addressable (~> 2.7) addressable (~> 2.7)
annotate (~> 3.1) annotate (~> 3.1)
aws-sdk-s3 (~> 1.89) aws-sdk-s3 (~> 1.89)

View file

@ -0,0 +1,188 @@
# frozen_string_literal: true
class AccountSearchQueryBuilder
DISALLOWED_TSQUERY_CHARACTERS = /['?\\:]/.freeze
LANGUAGE = Arel::Nodes.build_quoted('simple').freeze
EMPTY_STRING = Arel::Nodes.build_quoted('').freeze
WEIGHT_A = Arel::Nodes.build_quoted('A').freeze
WEIGHT_B = Arel::Nodes.build_quoted('B').freeze
WEIGHT_C = Arel::Nodes.build_quoted('C').freeze
FIELDS = {
display_name: { weight: WEIGHT_A }.freeze,
username: { weight: WEIGHT_B }.freeze,
domain: { weight: WEIGHT_C, nullable: true }.freeze,
}.freeze
RANK_NORMALIZATION = 32
DEFAULT_OPTIONS = {
limit: 10,
only_following: false,
}.freeze
# @param [String] terms
# @param [Hash] options
# @option [Account] :account
# @option [Boolean] :only_following
# @option [Integer] :limit
# @option [Integer] :offset
def initialize(terms, options = {})
@terms = terms
@options = DEFAULT_OPTIONS.merge(options)
end
# @return [ActiveRecord::Relation]
def build
search_scope.tap do |scope|
scope.merge!(personalization_scope) if with_account?
if with_account? && only_following?
scope.merge!(only_following_scope)
scope.with!(first_degree_definition) # `merge!` does not handle `with`
end
end
end
# @return [Array<Account>]
def results
build.to_a
end
private
def search_scope
Account.select(projections)
.where(match_condition)
.searchable
.includes(:account_stat)
.order(rank: :desc)
.limit(limit)
.offset(offset)
end
def personalization_scope
join_condition = accounts_table.join(follows_table, Arel::Nodes::OuterJoin)
.on(accounts_table.grouping(accounts_table[:id].eq(follows_table[:account_id]).and(follows_table[:target_account_id].eq(account.id))).or(accounts_table.grouping(accounts_table[:id].eq(follows_table[:target_account_id]).and(follows_table[:account_id].eq(account.id)))))
.join_sources
Account.joins(join_condition)
.group(accounts_table[:id])
end
def only_following_scope
Account.where(accounts_table[:id].in(first_degree_table.project('*')))
end
def first_degree_definition
target_account_ids_query = follows_table.project(follows_table[:target_account_id]).where(follows_table[:account_id].eq(account.id))
account_id_query = Arel::SelectManager.new.project(account.id)
Arel::Nodes::As.new(
first_degree_table,
target_account_ids_query.union(:all, account_id_query)
)
end
def projections
rank_column = begin
if with_account?
weighted_tsrank_template.as('rank')
else
tsrank_template.as('rank')
end
end
[all_columns, rank_column]
end
def all_columns
accounts_table[Arel.star]
end
def match_condition
Arel::Nodes::InfixOperation.new('@@', tsvector_template, tsquery_template)
end
def tsrank_template
@tsrank_template ||= Arel::Nodes::NamedFunction.new('ts_rank_cd', [tsvector_template, tsquery_template, RANK_NORMALIZATION])
end
def weighted_tsrank_template
@weighted_tsrank_template ||= Arel::Nodes::Multiplication.new(weight, tsrank_template)
end
def weight
Arel::Nodes::Addition.new(follows_table[:id].count, 1)
end
def tsvector_template
return @tsvector_template if defined?(@tsvector_template)
vectors = FIELDS.keys.map do |column|
options = FIELDS[column]
vector = accounts_table[column]
vector = Arel::Nodes::NamedFunction.new('coalesce', [vector, EMPTY_STRING]) if options[:nullable]
vector = Arel::Nodes::NamedFunction.new('to_tsvector', [LANGUAGE, vector])
Arel::Nodes::NamedFunction.new('setweight', [vector, options[:weight]])
end
@tsvector_template = Arel::Nodes::Grouping.new(vectors.reduce { |memo, vector| Arel::Nodes::Concat.new(memo, vector) })
end
def query_vector
@query_vector ||= Arel::Nodes::NamedFunction.new('to_tsquery', [LANGUAGE, tsquery_template])
end
def sanitized_terms
@sanitized_terms ||= @terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ')
end
def tsquery_template
return @tsquery_template if defined?(@tsquery_template)
terms = [
Arel::Nodes.build_quoted("' "),
Arel::Nodes.build_quoted(sanitized_terms),
Arel::Nodes.build_quoted(" '"),
Arel::Nodes.build_quoted(':*'),
]
@tsquery_template = Arel::Nodes::NamedFunction.new('to_tsquery', [LANGUAGE, terms.reduce { |memo, term| Arel::Nodes::Concat.new(memo, term) }])
end
def account
@options[:account]
end
def with_account?
account.present?
end
def limit
@options[:limit]
end
def offset
@options[:offset]
end
def only_following?
@options[:only_following]
end
def accounts_table
Account.arel_table
end
def follows_table
Follow.arel_table
end
def first_degree_table
Arel::Table.new(:first_degree)
end
end

View file

@ -432,75 +432,6 @@ class Account < ApplicationRecord
DeliveryFailureTracker.without_unavailable(urls) DeliveryFailureTracker.without_unavailable(urls)
end end
def search_for(terms, limit = 10, offset = 0)
textsearch, query = generate_query_for_search(terms)
sql = <<-SQL.squish
SELECT
accounts.*,
ts_rank_cd(#{textsearch}, #{query}, 32) AS rank
FROM accounts
WHERE #{query} @@ #{textsearch}
AND accounts.suspended_at IS NULL
AND accounts.moved_to_account_id IS NULL
ORDER BY rank DESC
LIMIT ? OFFSET ?
SQL
records = find_by_sql([sql, limit, offset])
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
records
end
def advanced_search_for(terms, account, limit = 10, following = false, offset = 0)
textsearch, query = generate_query_for_search(terms)
if following
sql = <<-SQL.squish
WITH first_degree AS (
SELECT target_account_id
FROM follows
WHERE account_id = ?
UNION ALL
SELECT ?
)
SELECT
accounts.*,
(count(f.id) + 1) * ts_rank_cd(#{textsearch}, #{query}, 32) AS rank
FROM accounts
LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = ?)
WHERE accounts.id IN (SELECT * FROM first_degree)
AND #{query} @@ #{textsearch}
AND accounts.suspended_at IS NULL
AND accounts.moved_to_account_id IS NULL
GROUP BY accounts.id
ORDER BY rank DESC
LIMIT ? OFFSET ?
SQL
records = find_by_sql([sql, account.id, account.id, account.id, limit, offset])
else
sql = <<-SQL.squish
SELECT
accounts.*,
(count(f.id) + 1) * ts_rank_cd(#{textsearch}, #{query}, 32) AS rank
FROM accounts
LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = ?) OR (accounts.id = f.target_account_id AND f.account_id = ?)
WHERE #{query} @@ #{textsearch}
AND accounts.suspended_at IS NULL
AND accounts.moved_to_account_id IS NULL
GROUP BY accounts.id
ORDER BY rank DESC
LIMIT ? OFFSET ?
SQL
records = find_by_sql([sql, account.id, account.id, limit, offset])
end
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
records
end
def from_text(text) def from_text(text)
return [] if text.blank? return [] if text.blank?
@ -512,19 +443,10 @@ class Account < ApplicationRecord
TagManager.instance.normalize_domain(domain) TagManager.instance.normalize_domain(domain)
end end
end end
EntityCache.instance.mention(username, domain) EntityCache.instance.mention(username, domain)
end end
end end
private
def generate_query_for_search(terms)
terms = Arel.sql(connection.quote(terms.gsub(/['?\\:]/, ' ')))
textsearch = "(setweight(to_tsvector('simple', accounts.display_name), 'A') || setweight(to_tsvector('simple', accounts.username), 'B') || setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C'))"
query = "to_tsquery('simple', ''' ' || #{terms} || ' ''' || ':*')"
[textsearch, query]
end
end end
def emojis def emojis

View file

@ -53,19 +53,13 @@ class AccountSearchService < BaseService
end end
def from_database def from_database
if account AccountSearchQueryBuilder.new(
advanced_search_results terms_for_query,
else account: account,
simple_search_results only_following: options[:following],
end limit: limit_for_non_exact_results,
end offset: offset
).results
def advanced_search_results
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
end
def simple_search_results
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
end end
def from_elasticsearch def from_elasticsearch

View file

@ -0,0 +1,101 @@
# frozen_string_literal: true
require 'rails_helper'
describe AccountSearchQueryBuilder do
before do
Fabricate(
:account,
display_name: "Missing",
username: "missing",
domain: "missing.com"
)
end
context 'without account' do
it 'accepts ?, \, : and space as delimiter' do
needle = Fabricate(
:account,
display_name: 'A & l & i & c & e',
username: 'username',
domain: 'example.com'
)
results = described_class.new('A?l\i:c e').build.to_a
expect(results).to eq [needle]
end
it 'finds accounts with matching display_name' do
needle = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = described_class.new("display").build.to_a
expect(results).to eq [needle]
end
it 'finds accounts with matching username' do
needle = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = described_class.new("username").build.to_a
expect(results).to eq [needle]
end
it 'finds accounts with matching domain' do
needle = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = described_class.new("example").build.to_a
expect(results).to eq [needle]
end
it 'limits by 10 by default' do
11.times.each { Fabricate(:account, display_name: "Display Name") }
results = described_class.new("display").build.to_a
expect(results.size).to eq 10
end
it 'accepts arbitrary limits' do
2.times.each { Fabricate(:account, display_name: "Display Name") }
results = described_class.new("display", limit: 1).build.to_a
expect(results.size).to eq 1
end
it 'ranks multiple matches higher' do
needles = [
{ username: "username", display_name: "username" },
{ display_name: "Display Name", username: "username", domain: "example.com" },
].map(&method(:Fabricate).curry(2).call(:account))
results = described_class.new("username").build.to_a
expect(results).to eq needles
end
end
context 'with account' do
let(:account) { Fabricate(:account) }
it 'ranks followed accounts higher' do
needle = Fabricate(:account, username: "Matching")
followed_needle = Fabricate(:account, username: "Matcher")
account.follow!(followed_needle)
results = described_class.new("match", account: account).build.to_a
expect(results).to eq [followed_needle, needle]
expect(results.first.rank).to be > results.last.rank
end
end
end

View file

@ -309,125 +309,6 @@ RSpec.describe Account, type: :model do
end end
end end
describe '.search_for' do
before do
_missing = Fabricate(
:account,
display_name: "Missing",
username: "missing",
domain: "missing.com"
)
end
it 'accepts ?, \, : and space as delimiter' do
match = Fabricate(
:account,
display_name: 'A & l & i & c & e',
username: 'username',
domain: 'example.com'
)
results = Account.search_for('A?l\i:c e')
expect(results).to eq [match]
end
it 'finds accounts with matching display_name' do
match = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = Account.search_for("display")
expect(results).to eq [match]
end
it 'finds accounts with matching username' do
match = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = Account.search_for("username")
expect(results).to eq [match]
end
it 'finds accounts with matching domain' do
match = Fabricate(
:account,
display_name: "Display Name",
username: "username",
domain: "example.com"
)
results = Account.search_for("example")
expect(results).to eq [match]
end
it 'limits by 10 by default' do
11.times.each { Fabricate(:account, display_name: "Display Name") }
results = Account.search_for("display")
expect(results.size).to eq 10
end
it 'accepts arbitrary limits' do
2.times.each { Fabricate(:account, display_name: "Display Name") }
results = Account.search_for("display", 1)
expect(results.size).to eq 1
end
it 'ranks multiple matches higher' do
matches = [
{ username: "username", display_name: "username" },
{ display_name: "Display Name", username: "username", domain: "example.com" },
].map(&method(:Fabricate).curry(2).call(:account))
results = Account.search_for("username")
expect(results).to eq matches
end
end
describe '.advanced_search_for' do
it 'accepts ?, \, : and space as delimiter' do
account = Fabricate(:account)
match = Fabricate(
:account,
display_name: 'A & l & i & c & e',
username: 'username',
domain: 'example.com'
)
results = Account.advanced_search_for('A?l\i:c e', account)
expect(results).to eq [match]
end
it 'limits by 10 by default' do
11.times { Fabricate(:account, display_name: "Display Name") }
results = Account.search_for("display")
expect(results.size).to eq 10
end
it 'accepts arbitrary limits' do
2.times { Fabricate(:account, display_name: "Display Name") }
results = Account.search_for("display", 1)
expect(results.size).to eq 1
end
it 'ranks followed accounts higher' do
account = Fabricate(:account)
match = Fabricate(:account, username: "Matching")
followed_match = Fabricate(:account, username: "Matcher")
Fabricate(:follow, account: account, target_account: followed_match)
results = Account.advanced_search_for("match", account)
expect(results).to eq [followed_match, match]
expect(results.first.rank).to be > results.last.rank
end
end
describe '#statuses_count' do describe '#statuses_count' do
subject { Fabricate(:account) } subject { Fabricate(:account) }