From e754083e8a33778b5dd8d43efc5604ed50efb0e4 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 1 Sep 2023 09:43:12 +0200 Subject: [PATCH] Fix unmatched quotes and prefixes causing search to fail (#26701) --- app/lib/search_query_parser.rb | 4 +- app/lib/search_query_transformer.rb | 101 +++++++++++----------- spec/lib/search_query_parser_spec.rb | 98 +++++++++++++++++++++ spec/lib/search_query_transformer_spec.rb | 57 ++++++++++-- 4 files changed, 200 insertions(+), 60 deletions(-) create mode 100644 spec/lib/search_query_parser_spec.rb diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb index 5d6ffbf29d..1c57b9b024 100644 --- a/app/lib/search_query_parser.rb +++ b/app/lib/search_query_parser.rb @@ -6,10 +6,10 @@ class SearchQueryParser < Parslet::Parser rule(:colon) { str(':') } rule(:space) { match('\s').repeat(1) } rule(:operator) { (str('+') | str('-')).as(:operator) } - rule(:prefix) { (term >> colon).as(:prefix) } + rule(:prefix) { term >> colon } rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) } rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } - rule(:clause) { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) } + rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) } rule(:query) { (clause >> space.maybe).repeat.as(:query) } root(:query) end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index f10ccfb283..e81c0c081e 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -1,50 +1,32 @@ # frozen_string_literal: true class SearchQueryTransformer < Parslet::Transform + SUPPORTED_PREFIXES = %w( + has + is + language + from + before + after + during + ).freeze + class Query - attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses + attr_reader :must_not_clauses, :must_clauses, :filter_clauses def initialize(clauses) - grouped = clauses.chunk(&:operator).to_h - @should_clauses = grouped.fetch(:should, []) + grouped = clauses.compact.chunk(&:operator).to_h @must_not_clauses = grouped.fetch(:must_not, []) @must_clauses = grouped.fetch(:must, []) @filter_clauses = grouped.fetch(:filter, []) end def apply(search) - should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) } - must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) } - must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) } - filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) } + must_clauses.each { |clause| search = search.query.must(clause.to_query) } + must_not_clauses.each { |clause| search = search.query.must_not(clause.to_query) } + filter_clauses.each { |clause| search = search.filter(**clause.to_query) } search.query.minimum_should_match(1) end - - private - - def clause_to_query(clause) - case clause - when TermClause - { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'], operator: 'and' } } - when PhraseClause - { match_phrase: { text: { query: clause.phrase } } } - else - raise "Unexpected clause type: #{clause}" - end - end - - def clause_to_filter(clause) - case clause - when PrefixClause - if clause.negated? - { bool: { must_not: { clause.type => { clause.filter => clause.term } } } } - else - { clause.type => { clause.filter => clause.term } } - end - else - raise "Unexpected clause type: #{clause}" - end - end end class Operator @@ -63,31 +45,38 @@ class SearchQueryTransformer < Parslet::Transform end class TermClause - attr_reader :prefix, :operator, :term + attr_reader :operator, :term - def initialize(prefix, operator, term) - @prefix = prefix + def initialize(operator, term) @operator = Operator.symbol(operator) @term = term end + + def to_query + { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } } + end end class PhraseClause - attr_reader :prefix, :operator, :phrase + attr_reader :operator, :phrase - def initialize(prefix, operator, phrase) - @prefix = prefix + def initialize(operator, phrase) @operator = Operator.symbol(operator) @phrase = phrase end + + def to_query + { match_phrase: { text: { query: @phrase } } } + end end class PrefixClause - attr_reader :type, :filter, :operator, :term + attr_reader :operator, :prefix, :term def initialize(prefix, operator, term, options = {}) - @negated = operator == '-' - @options = options + @prefix = prefix + @negated = operator == '-' + @options = options @operator = :filter case prefix @@ -116,12 +105,16 @@ class SearchQueryTransformer < Parslet::Transform @type = :range @term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' } else - raise Mastodon::SyntaxError + raise "Unknown prefix: #{prefix}" end end - def negated? - @negated + def to_query + if @negated + { bool: { must_not: { @type => { @filter => @term } } } } + else + { @type => { @filter => @term } } + end end private @@ -159,18 +152,26 @@ class SearchQueryTransformer < Parslet::Transform prefix = clause[:prefix][:term].to_s if clause[:prefix] operator = clause[:operator]&.to_s - if clause[:prefix] + if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix) PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account) + elsif clause[:prefix] + TermClause.new(operator, "#{prefix} #{clause[:term]}") elsif clause[:term] - TermClause.new(prefix, operator, clause[:term].to_s) + TermClause.new(operator, clause[:term].to_s) elsif clause[:shortcode] - TermClause.new(prefix, operator, ":#{clause[:term]}:") + TermClause.new(operator, ":#{clause[:term]}:") elsif clause[:phrase] - PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s) + PhraseClause.new(operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s) else raise "Unexpected clause type: #{clause}" end end - rule(query: sequence(:clauses)) { Query.new(clauses) } + rule(junk: subtree(:junk)) do + nil + end + + rule(query: sequence(:clauses)) do + Query.new(clauses) + end end diff --git a/spec/lib/search_query_parser_spec.rb b/spec/lib/search_query_parser_spec.rb new file mode 100644 index 0000000000..66b0e8f9e2 --- /dev/null +++ b/spec/lib/search_query_parser_spec.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require 'rails_helper' +require 'parslet/rig/rspec' + +describe SearchQueryParser do + let(:parser) { described_class.new } + + context 'with term' do + it 'consumes "hello"' do + expect(parser.term).to parse('hello') + end + end + + context 'with prefix' do + it 'consumes "foo:"' do + expect(parser.prefix).to parse('foo:') + end + end + + context 'with operator' do + it 'consumes "+"' do + expect(parser.operator).to parse('+') + end + + it 'consumes "-"' do + expect(parser.operator).to parse('-') + end + end + + context 'with shortcode' do + it 'consumes ":foo:"' do + expect(parser.shortcode).to parse(':foo:') + end + end + + context 'with phrase' do + it 'consumes "hello world"' do + expect(parser.phrase).to parse('"hello world"') + end + end + + context 'with clause' do + it 'consumes "foo"' do + expect(parser.clause).to parse('foo') + end + + it 'consumes "-foo"' do + expect(parser.clause).to parse('-foo') + end + + it 'consumes "foo:bar"' do + expect(parser.clause).to parse('foo:bar') + end + + it 'consumes "-foo:bar"' do + expect(parser.clause).to parse('-foo:bar') + end + + it 'consumes \'foo:"hello world"\'' do + expect(parser.clause).to parse('foo:"hello world"') + end + + it 'consumes \'-foo:"hello world"\'' do + expect(parser.clause).to parse('-foo:"hello world"') + end + + it 'consumes "foo:"' do + expect(parser.clause).to parse('foo:') + end + + it 'consumes \'"\'' do + expect(parser.clause).to parse('"') + end + end + + context 'with query' do + it 'consumes "hello -world"' do + expect(parser.query).to parse('hello -world') + end + + it 'consumes \'foo "hello world"\'' do + expect(parser.query).to parse('foo "hello world"') + end + + it 'consumes "foo:bar hello"' do + expect(parser.query).to parse('foo:bar hello') + end + + it 'consumes \'"hello" world "\'' do + expect(parser.query).to parse('"hello" world "') + end + + it 'consumes "foo:bar bar: hello"' do + expect(parser.query).to parse('foo:bar bar: hello') + end + end +end diff --git a/spec/lib/search_query_transformer_spec.rb b/spec/lib/search_query_transformer_spec.rb index 953f9acb2f..17f06d2833 100644 --- a/spec/lib/search_query_transformer_spec.rb +++ b/spec/lib/search_query_transformer_spec.rb @@ -3,16 +3,57 @@ require 'rails_helper' describe SearchQueryTransformer do - describe 'initialization' do - let(:parser) { SearchQueryParser.new.parse('query') } + subject { described_class.new.apply(parser, current_account: nil) } - it 'sets attributes' do - transformer = described_class.new.apply(parser) + let(:parser) { SearchQueryParser.new.parse(query) } - expect(transformer.should_clauses.first).to be_nil - expect(transformer.must_clauses.first).to be_a(SearchQueryTransformer::TermClause) - expect(transformer.must_not_clauses.first).to be_nil - expect(transformer.filter_clauses.first).to be_nil + context 'with "hello world"' do + let(:query) { 'hello world' } + + it 'transforms clauses' do + expect(subject.must_clauses.map(&:term)).to match_array %w(hello world) + expect(subject.must_not_clauses).to be_empty + expect(subject.filter_clauses).to be_empty + end + end + + context 'with "hello -world"' do + let(:query) { 'hello -world' } + + it 'transforms clauses' do + expect(subject.must_clauses.map(&:term)).to match_array %w(hello) + expect(subject.must_not_clauses.map(&:term)).to match_array %w(world) + expect(subject.filter_clauses).to be_empty + end + end + + context 'with "hello is:reply"' do + let(:query) { 'hello is:reply' } + + it 'transforms clauses' do + expect(subject.must_clauses.map(&:term)).to match_array %w(hello) + expect(subject.must_not_clauses).to be_empty + expect(subject.filter_clauses.map(&:term)).to match_array %w(reply) + end + end + + context 'with "foo: bar"' do + let(:query) { 'foo: bar' } + + it 'transforms clauses' do + expect(subject.must_clauses.map(&:term)).to match_array %w(foo bar) + expect(subject.must_not_clauses).to be_empty + expect(subject.filter_clauses).to be_empty + end + end + + context 'with "foo:bar"' do + let(:query) { 'foo:bar' } + + it 'transforms clauses' do + expect(subject.must_clauses.map(&:term)).to contain_exactly('foo bar') + expect(subject.must_not_clauses).to be_empty + expect(subject.filter_clauses).to be_empty end end end