Compare commits

...

2 Commits

Author SHA1 Message Date
David Yip
a640c322c1 Escape metacharacters in non-whole-word keyword mutes. Fixes #533.
Also addresses #463.
2018-06-05 02:49:28 -05:00
David Yip
37d495eeeb keyword mute: Store keywords as a list
This has a couple of advantages over the regex approach:

- Keywords are individually addressable, which makes it easier to gather
  statistics (#363)
- Keywords can be individually applied to different feeds, e.g. skipping
  mentions (#454)

It *does* end up creating many more Regexp objects.  I'm not yet sure if
the difference is significant.
2018-06-03 18:12:55 -05:00
2 changed files with 49 additions and 37 deletions

View File

@@ -33,68 +33,74 @@ class Glitch::KeywordMute < ApplicationRecord
Rails.cache.delete(TagMatcher.cache_key(account_id)) Rails.cache.delete(TagMatcher.cache_key(account_id))
end end
class RegexpMatcher class CachedKeywordMute
attr_reader :account_id attr_reader :keyword
attr_reader :regex attr_reader :whole_word
def initialize(account_id) def initialize(keyword, whole_word)
@account_id = account_id @keyword = keyword
regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text } @whole_word = whole_word
@regex = /#{regex_text}/
end end
protected def boundary_regex_for_keyword
def keywords
Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword)
end
def boundary_regex_for_keyword(keyword)
sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
/(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
end end
def matches?(str)
str =~ (whole_word ? boundary_regex_for_keyword : /#{Regexp.escape(keyword)}/i)
end
end end
class TextMatcher < RegexpMatcher class Matcher
attr_reader :account_id
attr_reader :words
def initialize(account_id)
@account_id = account_id
@words = Rails.cache.fetch(self.class.cache_key(account_id)) { fetch_keywords }
end
protected
def fetch_keywords
Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword).map do |whole_word, keyword|
CachedKeywordMute.new(transform_keyword(keyword), whole_word)
end
end
def transform_keyword(keyword)
keyword
end
end
class TextMatcher < Matcher
def self.cache_key(account_id) def self.cache_key(account_id)
format('keyword_mutes:regex:text:%s', account_id) format('keyword_mutes:regex:text:%s', account_id)
end end
def matches?(str) def matches?(str)
!!(regex =~ str) words.any? { |w| w.matches?(str) }
end
private
def make_regex_text
kws = keywords.map! do |whole_word, keyword|
whole_word ? boundary_regex_for_keyword(keyword) : /(?i:#{Regexp.escape(keyword)})/
end
Regexp.union(kws).source
end end
end end
class TagMatcher < RegexpMatcher class TagMatcher < Matcher
def self.cache_key(account_id) def self.cache_key(account_id)
format('keyword_mutes:regex:tag:%s', account_id) format('keyword_mutes:regex:tag:%s', account_id)
end end
def matches?(tags) def matches?(tags)
tags.pluck(:name).any? { |n| regex =~ n } tags.pluck(:name).any? do |n|
words.any? { |w| w.matches?(n) }
end
end end
private protected
def make_regex_text def transform_keyword(kw)
kws = keywords.map! do |whole_word, keyword| Tag::HASHTAG_RE =~ kw ? $1 : kw
term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword
whole_word ? boundary_regex_for_keyword(term) : term
end
Regexp.union(kws).source
end end
end end
end end

View File

@@ -79,12 +79,18 @@ RSpec.describe Glitch::KeywordMute, type: :model do
expect(matcher.matches?('(hot take)')).to be_truthy expect(matcher.matches?('(hot take)')).to be_truthy
end end
it 'escapes metacharacters in keywords' do it 'escapes metacharacters in whole-word keywords' do
Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)') Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)')
expect(matcher.matches?('(hot take)')).to be_truthy expect(matcher.matches?('(hot take)')).to be_truthy
end end
it 'escapes metacharacters in non-whole-word keywords' do
Glitch::KeywordMute.create!(account: alice, keyword: '(-', whole_word: false)
expect(matcher.matches?('bad (-)')).to be_truthy
end
it 'uses case-folding rules appropriate for more than just English' do it 'uses case-folding rules appropriate for more than just English' do
Glitch::KeywordMute.create!(account: alice, keyword: 'großeltern') Glitch::KeywordMute.create!(account: alice, keyword: 'großeltern')