| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 #include "Matcher.h" | 18 #include "Matcher.h" |
| 19 #include "RegExpFilter.h" | 19 #include "RegExpFilter.h" |
| 20 #include "../library.h" | 20 #include "../library.h" |
| 21 | 21 |
| 22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
| 23 | |
| 24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:35
It's not important but still it would be better to
sergei
2017/10/02 12:02:36
the method should be const.
hub
2017/10/03 19:33:13
Done.
hub
2017/10/03 19:33:13
Done.
| |
| 25 { | |
| 26 if (filter->mType == Filter::Type::WHITELIST) | |
|
sergei
2017/10/02 12:02:33
What do you think about having an inline function
hub
2017/10/03 19:33:11
Done.
sergei
2017/10/04 08:54:32
I meant that the code of CombinedMatcher::SomeMeth
hub
2017/10/06 13:49:17
Done.
| |
| 27 return mWhitelist.FindKeyword(filter); | |
| 28 return mBlacklist.FindKeyword(filter); | |
| 29 } | |
| 30 | |
| 31 void CombinedMatcher::ResetCache() | |
| 32 { | |
| 33 mResultCache.clear(); | |
| 34 } | |
| 35 | |
| 36 void CombinedMatcher::Add(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:36
Should the argument be `Filter&`?
hub
2017/10/03 19:33:11
Done.
| |
| 37 { | |
| 38 if (filter->mType == Filter::Type::WHITELIST) | |
| 39 mWhitelist.Add(filter); | |
| 40 else | |
| 41 mBlacklist.Add(filter); | |
| 42 | |
| 43 ResetCache(); | |
| 44 } | |
| 45 | |
| 46 void CombinedMatcher::Remove(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:37
Should the argument be `const Filter&`?
hub
2017/10/03 19:33:09
Done.
| |
| 47 { | |
| 48 if (filter->mType == Filter::Type::WHITELIST) | |
| 49 mWhitelist.Remove(filter); | |
| 50 else | |
| 51 mBlacklist.Remove(filter); | |
| 52 | |
| 53 ResetCache(); | |
| 54 } | |
| 55 | |
| 56 void CombinedMatcher::Clear() | |
| 57 { | |
| 58 mBlacklist.Clear(); | |
| 59 mWhitelist.Clear(); | |
| 60 ResetCache(); | |
| 61 } | |
| 62 | |
| 63 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const | |
|
sergei
2017/10/02 12:02:34
Should the argument be `const Filter&`?
hub
2017/10/03 19:33:11
Done.
| |
| 64 { | |
| 65 if (filter->mType == Filter::Type::WHITELIST) | |
| 66 return mWhitelist.HasFilter(filter); | |
| 67 return mBlacklist.HasFilter(filter); | |
| 68 } | |
| 69 | |
| 70 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:35
Should the argument be `const Filter&`?
sergei
2017/10/02 12:02:36
the method should be const.
hub
2017/10/03 19:33:11
Done.
hub
2017/10/03 19:33:12
Done.
| |
| 71 { | |
| 72 if (filter->mType == Filter::Type::WHITELIST) | |
| 73 return mWhitelist.GetKeywordForFilter(filter); | |
| 74 return mBlacklist.GetKeywordForFilter(filter); | |
| 75 } | |
| 76 | |
| 77 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
|
sergei
2017/10/02 12:02:34
the method should be const if it's possible.
hub
2017/10/03 19:33:10
Done.
| |
| 78 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 79 const String& sitekey, bool specificOnly) | |
| 80 { | |
| 81 ReMatchResults reResult; | |
| 82 OwnedString text(location); | |
| 83 text.toLower(); | |
| 84 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
|
sergei
2017/10/02 12:02:36
It should be in anonymous namespace, otherwise a n
sergei
2017/10/04 08:54:31
This is not addressed.
hub
2017/10/06 13:49:17
Done.
| |
| 85 text.match(match_re_id, &reResult); | |
|
sergei
2017/10/02 12:02:35
Although it seems it does work here, I think for p
hub
2017/10/03 19:33:12
Done.
| |
| 86 | |
| 87 auto& candidates = reResult.candidates; | |
| 88 candidates.push_back(OwnedString()); | |
| 89 | |
| 90 FilterPtr blacklistHit; | |
| 91 for (auto substr : candidates) | |
| 92 { | |
| 93 if (mWhitelist.mFilterByKeyword.find(substr)) | |
|
sergei
2017/10/02 12:02:36
It's already changed in the master, do you mind to
hub
2017/10/03 19:33:13
Done.
| |
| 94 { | |
| 95 auto result = mWhitelist.CheckEntryMatch( | |
| 96 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly ); | |
| 97 if (result) | |
| 98 return result; | |
| 99 } | |
| 100 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) | |
| 101 { | |
| 102 blacklistHit = mBlacklist.CheckEntryMatch( | |
| 103 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
| 104 specificOnly); | |
| 105 } | |
| 106 } | |
| 107 return blacklistHit; | |
| 108 } | |
| 109 | |
| 110 Filter* CombinedMatcher::MatchesAny(const String& location, | |
| 111 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 112 const String& sitekey, bool specificOnly) | |
|
sergei
2017/10/02 12:02:34
The method should be const if it's possible.
hub
2017/10/03 19:33:11
sadly the use of the cache makes it non-const. I c
| |
| 113 { | |
| 114 OwnedString key(location); | |
| 115 key.append(u" "_str); | |
| 116 key.append(typeMask); | |
| 117 key.append(u" "_str); | |
| 118 key.append(docDomain); | |
| 119 key.append(u" "_str); | |
| 120 key.append(thirdParty); | |
| 121 key.append(u" "_str); | |
| 122 key.append(sitekey); | |
| 123 key.append(u" "_str); | |
| 124 key.append(specificOnly); | |
| 125 | |
| 126 FilterPtr result; | |
| 127 | |
| 128 auto cachedResult = mResultCache.find(key); | |
| 129 if (cachedResult) | |
| 130 result = cachedResult->second; | |
| 131 else | |
| 132 { | |
| 133 result = MatchesAnyInternal(location, typeMask, docDomain, | |
| 134 thirdParty, sitekey, specificOnly); | |
| 135 | |
| 136 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
| 137 ResetCache(); | |
| 138 | |
| 139 mResultCache[key] = result; | |
| 140 } | |
| 141 | |
| 142 result->AddRef(); | |
| 143 return result.get(); | |
|
sergei
2017/10/02 12:02:34
It would be better to `return result.release();`.
hub
2017/10/03 19:33:10
Done.
| |
| 144 } | |
| 145 | |
| 146 namespace { | 22 namespace { |
| 147 const DependentString regexpRegExp = | 23 const DependentString regexpRegExp = |
| 148 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | 24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; |
| 149 const DependentString optionsRegExp = | 25 const DependentString optionsRegExp = |
| 150 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | 26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; |
| 151 const DependentString candidateRegExp = | 27 const DependentString candidateRegExp = |
| 152 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | 28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; |
| 153 } | 29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; |
| 154 | 30 } |
| 155 OwnedString Matcher::FindKeyword(const FilterPtr& filter) | 31 |
|
sergei
2017/10/02 12:02:34
Should the argument be `const Filter&`?
sergei
2017/10/02 12:02:36
should it be a const method?
hub
2017/10/03 19:33:11
Done.
hub
2017/10/03 19:33:12
Done.
| |
| 156 { | 32 Matcher::Matcher() |
| 157 OwnedString result(u""_str); | 33 : mFilterByKeyword(1024), mKeywordByFilter(1024), |
| 158 OwnedString text(filter->GetText()); | 34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) |
| 159 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false); | 35 { |
|
sergei
2017/10/02 12:02:37
It and all other regexps below should be in the an
hub
2017/10/03 19:33:14
The mistake here is that a create a new DependentS
sergei
2017/10/04 08:54:32
Each call of GenerateRegExp increases global _rege
hub
2017/10/06 13:49:16
Done.
| |
| 160 if (TestRegExp(re_id, text)) | 36 mReId = GenerateRegExp(regexpRegExp, true, false); |
| 37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false); | |
| 38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); | |
| 39 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 40 } | |
| 41 | |
| 42 void Matcher::Add(Filter& filter) | |
| 43 { | |
| 44 if (mKeywordByFilter.find(filter.GetText())) | |
| 45 return; | |
| 46 | |
| 47 auto keyword = FindKeyword(filter); | |
| 48 | |
| 49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | |
|
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
| |
| 50 mKeywordByFilter[filter.GetText()] = | |
| 51 FilterKeyword(std::move(keyword), filter); | |
| 52 } | |
| 53 | |
| 54 void Matcher::Remove(Filter& filter) | |
| 55 { | |
| 56 auto entry = mKeywordByFilter.find(filter.GetText()); | |
| 57 if (!entry) | |
| 58 return; | |
| 59 | |
| 60 auto& keyword = static_cast<const String&>(entry->second); | |
| 61 auto list = mFilterByKeyword[keyword]; | |
| 62 if (list.size() == 1) | |
| 63 mFilterByKeyword.erase(keyword); | |
| 64 else | |
| 65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | |
| 66 | |
| 67 mKeywordByFilter.erase(filter.GetText()); | |
| 68 } | |
| 69 | |
| 70 void Matcher::Clear() | |
| 71 { | |
| 72 mFilterByKeyword.clear(); | |
| 73 mKeywordByFilter.clear(); | |
| 74 } | |
| 75 | |
| 76 bool Matcher::HasFilter(const Filter& filter) const | |
| 77 { | |
| 78 return mKeywordByFilter.find(filter.GetText()); | |
| 79 } | |
| 80 | |
| 81 namespace | |
| 82 { | |
| 83 DependentString emptyString = u""_str; | |
| 84 } | |
| 85 | |
| 86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | |
| 87 { | |
| 88 auto entry = mKeywordByFilter.find(filter.GetText()); | |
| 89 if (entry) | |
| 90 return static_cast<const String&>(entry->second); | |
| 91 return emptyString; | |
| 92 } | |
| 93 | |
| 94 Filter* Matcher::MatchesAny(const String& location, | |
| 95 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 96 const String& sitekey, bool specificOnly) const | |
| 97 { | |
| 98 OwnedString text(location); | |
| 99 text.toLower(); | |
| 100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
| 101 if (text.match(mMatchReId, *reResult)) | |
| 102 { | |
| 103 auto& candidates = reResult->candidates; | |
| 104 candidates.push_back(OwnedString()); | |
| 105 for (auto candidate : candidates) | |
| 106 { | |
| 107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, | |
| 108 thirdParty, sitekey, specificOnly); | |
| 109 if (result) | |
| 110 return result.release(); | |
| 111 } | |
| 112 } | |
|
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
| |
| 113 return nullptr; | |
| 114 } | |
| 115 | |
| 116 OwnedString Matcher::FindKeyword(const Filter& filter) const | |
| 117 { | |
| 118 OwnedString result; | |
| 119 OwnedString text(filter.GetText()); | |
| 120 if (TestRegExp(mReId, text)) | |
| 161 return result; | 121 return result; |
| 162 | 122 |
| 163 // Remove options | 123 // Remove options |
| 164 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals e); | 124 auto index = ExecRegExp(mOptionsReId, text); |
| 165 auto index = ExecRegExp(options_re_id, text); | 125 if (index != String::npos) |
| 166 if (index != -1) | 126 text = DependentString(text, 0, index); |
|
sergei
2017/10/02 12:02:34
It would be better to use String::npos than -1.
hub
2017/10/03 19:33:13
Done.
| |
| 167 text = text.substr(0, index); | |
| 168 | 127 |
| 169 // Remove whitelist marker | 128 // Remove whitelist marker |
| 170 if (text[0] == '@' && text[1] == '@') | 129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') |
|
sergei
2017/10/02 12:02:37
Firstly we should check the length of the `text`.
hub
2017/10/03 19:33:11
Done.
| |
| 171 text = text.substr(2); | 130 text = DependentString(text, 2); |
| 172 | 131 |
| 173 text.toLower(); | 132 text.toLower(); |
| 174 ReMatchResults keywords; | 133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); |
| 175 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); | 134 auto match = text.match(mCandidatesReId, *keywords); |
| 176 auto match = text.match(candidates_re_id, &keywords); | |
| 177 if (!match) | 135 if (!match) |
| 178 return result; | 136 return result; |
| 179 | 137 |
| 180 auto& candidates = keywords.candidates; | 138 auto& candidates = keywords->candidates; |
| 181 | 139 |
| 182 auto& hash = mFilterByKeyword; | 140 uint32_t resultCount = 0xffffff; |
| 183 uint32_t resultCount = 0xffffffff; | |
| 184 uint32_t resultLength = 0; | 141 uint32_t resultLength = 0; |
| 185 for (auto substr : candidates) | 142 for (auto substr : candidates) |
| 186 { | 143 { |
| 187 auto candidate = DependentString(substr).substr(1); | 144 if (substr.empty()) |
| 188 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); | 145 continue; |
|
sergei
2017/10/02 12:02:35
Basically braces are not needed here.
sergei
2017/10/02 12:02:37
It seems it could be optimized by
auto ii_hash = h
hub
2017/10/03 19:33:12
I have to do that for to make the function `const`
hub
2017/10/03 19:33:13
Done.
sergei
2017/10/04 08:54:32
It's just a side effect of the present code, there
hub
2017/10/06 13:49:16
I addressed that. Just as I said making this const
| |
| 146 | |
| 147 auto candidate = DependentString(substr, 1); | |
| 148 auto entry = mFilterByKeyword.find(candidate); | |
| 149 auto count = entry ? entry->second.size() : 0; | |
| 189 if (count < resultCount || | 150 if (count < resultCount || |
| 190 (count == resultCount && candidate.length() > resultLength)) | 151 (count == resultCount && candidate.length() > resultLength)) |
| 191 { | 152 { |
| 192 result = candidate; | 153 result = candidate; |
| 193 resultCount = count; | 154 resultCount = count; |
| 194 resultLength = candidate.length(); | 155 resultLength = candidate.length(); |
| 195 } | 156 } |
| 196 } | 157 } |
| 197 | |
| 198 return result; | 158 return result; |
| 199 } | |
| 200 | |
| 201 void Matcher::Add(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:36
What about passing `Filter&`?
hub
2017/10/03 19:33:10
Done.
| |
| 202 { | |
| 203 if (mKeywordByFilter.find(filter->GetText())) | |
| 204 return; | |
| 205 | |
| 206 auto keyword = FindKeyword(filter); | |
| 207 auto oldEntry = mFilterByKeyword.find(keyword); | |
| 208 if (!oldEntry) | |
| 209 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; | |
| 210 else | |
| 211 mFilterByKeyword[keyword].push_back(filter); | |
|
sergei
2017/10/02 12:02:37
StringMap::operator[](const String& key) creates a
hub
2017/10/03 19:33:09
Done.
| |
| 212 mKeywordByFilter[filter->GetText()] = keyword; | |
|
sergei
2017/10/02 12:02:34
mKeywordByFilter stores DependentString, what if t
sergei
2017/10/04 08:54:32
What about having some
struct FilterKeyword
{
Fi
hub
2017/10/06 13:49:17
Sounds like a good idea. Done.
| |
| 213 } | |
| 214 | |
| 215 void Matcher::Remove(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:37
It seems the argument can be a const reference.
hub
2017/10/03 19:33:09
Done.
| |
| 216 { | |
| 217 if (!mKeywordByFilter.find(filter->GetText())) | |
| 218 return; | |
| 219 | |
| 220 auto keyword = mKeywordByFilter[filter->GetText()]; | |
|
sergei
2017/10/02 12:02:37
There is also no need for double looking up.
hub
2017/10/03 19:33:12
Done.
| |
| 221 auto list = mFilterByKeyword[keyword]; | |
| 222 if (list.size() == 1) | |
| 223 mFilterByKeyword.erase(keyword); | |
| 224 else | |
| 225 { | |
| 226 auto iter = std::find(list.cbegin(), list.cend(), filter); | |
| 227 list.erase(iter); | |
|
sergei
2017/10/02 12:02:35
It can be one line but it does not matter.
hub
2017/10/03 19:33:10
Done.
| |
| 228 } | |
| 229 mKeywordByFilter.erase(filter->GetText()); | |
| 230 } | |
| 231 | |
| 232 void Matcher::Clear() | |
| 233 { | |
| 234 mFilterByKeyword.clear(); | |
| 235 mKeywordByFilter.clear(); | |
| 236 } | |
| 237 | |
| 238 bool Matcher::HasFilter(const FilterPtr& filter) const | |
|
sergei
2017/10/02 12:02:35
the argument should be a const reference.
hub
2017/10/03 19:33:09
Done.
| |
| 239 { | |
| 240 return mKeywordByFilter.find(filter->GetText()); | |
| 241 } | |
| 242 | |
| 243 static DependentString emptyString = u""_str; | |
|
sergei
2017/10/02 12:02:37
Although static in the compilation unit achieves t
hub
2017/10/03 19:33:10
Done.
| |
| 244 | |
| 245 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) | |
|
sergei
2017/10/02 12:02:36
the argument should be a const reference and the m
hub
2017/10/03 19:33:12
Done.
| |
| 246 { | |
| 247 if (mKeywordByFilter.find(filter->GetText())) | |
| 248 return mKeywordByFilter[filter->GetText()]; | |
| 249 return emptyString; | |
|
sergei
2017/10/02 12:02:37
There is also no need for double looking up.
hub
2017/10/03 19:33:13
Done (needed for making the method `const`)
| |
| 250 } | 159 } |
| 251 | 160 |
| 252 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | 161 FilterPtr Matcher::CheckEntryMatch(const String& keyword, |
| 253 const String& location, | 162 const String& location, |
| 254 int typeMask, DependentString& docDomain, bool thirdParty, | 163 int typeMask, DependentString& docDomain, bool thirdParty, |
| 255 const String& sitekey, bool specificOnly) | 164 const String& sitekey, bool specificOnly) const |
|
sergei
2017/10/02 12:02:34
basically this method and the one below do not mod
hub
2017/10/03 19:33:10
Done.
| |
| 256 { | 165 { |
| 257 auto list = mFilterByKeyword[keyword]; | 166 auto entry = mFilterByKeyword.find(keyword); |
| 258 for (auto filter : list) { | 167 if (!entry) |
| 168 return FilterPtr(); | |
| 169 | |
| 170 auto filters = entry->second; | |
| 171 for (auto filter : filters) | |
| 172 { | |
| 259 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | 173 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
|
sergei
2017/10/02 12:02:35
opening brace { should be on the new line.
hub
2017/10/03 19:33:14
Done.
| |
| 260 if (specificOnly && activeFilter->IsGeneric() && | 174 if (specificOnly && activeFilter->IsGeneric() && |
| 261 !(activeFilter->mType != Filter::Type::WHITELIST)) | 175 (activeFilter->mType != Filter::Type::WHITELIST)) |
| 262 continue; | 176 continue; |
| 263 | 177 |
| 264 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | 178 auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
| 265 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | 179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
| 266 return filter; | 180 return filter; |
| 267 } | 181 } |
| 182 | |
| 268 return FilterPtr(); | 183 return FilterPtr(); |
| 269 } | 184 } |
| 270 | 185 |
| 271 Filter* Matcher::MatchesAny(const String& location, | 186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; |
| 187 | |
| 188 CombinedMatcher::CombinedMatcher() | |
| 189 : mResultCache(1024), mMatchReId(-1) | |
| 190 { | |
| 191 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 192 } | |
| 193 | |
| 194 void CombinedMatcher::Add(Filter& filter) | |
| 195 { | |
| 196 GetMatcher(filter).Add(filter); | |
| 197 ResetCache(); | |
| 198 } | |
| 199 | |
| 200 void CombinedMatcher::Remove(Filter& filter) | |
| 201 { | |
| 202 GetMatcher(filter).Remove(filter); | |
| 203 ResetCache(); | |
| 204 } | |
| 205 | |
| 206 void CombinedMatcher::Clear() | |
| 207 { | |
| 208 mBlacklist.Clear(); | |
| 209 mWhitelist.Clear(); | |
| 210 ResetCache(); | |
| 211 } | |
| 212 | |
| 213 bool CombinedMatcher::HasFilter(const Filter& filter) const | |
| 214 { | |
| 215 return GetMatcher(filter).HasFilter(filter); | |
| 216 } | |
| 217 | |
| 218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | |
| 219 { | |
| 220 return GetMatcher(filter).GetKeywordForFilter(filter); | |
| 221 } | |
| 222 | |
| 223 Filter* CombinedMatcher::MatchesAny(const String& location, | |
| 272 int typeMask, DependentString& docDomain, bool thirdParty, | 224 int typeMask, DependentString& docDomain, bool thirdParty, |
| 273 const String& sitekey, bool specificOnly) | 225 const String& sitekey, bool specificOnly) |
| 274 { | 226 { |
| 275 ReMatchResults reResult; | 227 OwnedString key(location); |
| 276 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | 228 key.append(u" "_str); |
| 229 key.append(typeMask); | |
| 230 key.append(u" "_str); | |
| 231 key.append(docDomain); | |
| 232 key.append(u" "_str); | |
| 233 key.append(thirdParty); | |
| 234 key.append(u" "_str); | |
| 235 key.append(sitekey); | |
| 236 key.append(u" "_str); | |
| 237 key.append(specificOnly); | |
| 238 | |
| 239 FilterPtr result; | |
| 240 | |
| 241 auto cachedResult = mResultCache.find(key); | |
| 242 if (cachedResult) | |
| 243 result = cachedResult->second.filter(); | |
| 244 else | |
| 245 { | |
| 246 result = MatchesAnyInternal(location, typeMask, docDomain, | |
| 247 thirdParty, sitekey, specificOnly); | |
| 248 | |
| 249 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
| 250 ResetCache(); | |
| 251 | |
| 252 CacheEntry cache(std::move(key), result); | |
| 253 mResultCache[cache.key()] = cache; | |
| 254 } | |
| 255 | |
| 256 return result.release(); | |
| 257 } | |
| 258 | |
| 259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | |
| 260 { | |
| 261 return GetMatcher(filter).FindKeyword(filter); | |
| 262 } | |
| 263 | |
| 264 void CombinedMatcher::ResetCache() | |
| 265 { | |
| 266 mResultCache.clear(); | |
| 267 } | |
| 268 | |
| 269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
| 270 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 271 const String& sitekey, bool specificOnly) const | |
| 272 { | |
| 277 OwnedString text(location); | 273 OwnedString text(location); |
| 278 text.toLower(); | 274 text.toLower(); |
| 279 MatchRegExp(re_id, text, &reResult); | 275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); |
| 280 auto& candidates = reResult.candidates; | 276 text.match(mMatchReId, *reResult); |
| 277 | |
| 278 auto& candidates = reResult->candidates; | |
| 281 candidates.push_back(OwnedString()); | 279 candidates.push_back(OwnedString()); |
| 280 | |
| 281 FilterPtr blacklistHit; | |
| 282 for (auto substr : candidates) | 282 for (auto substr : candidates) |
| 283 if (mFilterByKeyword.find(substr)) | 283 { |
| 284 { | 284 auto result = mWhitelist.CheckEntryMatch( |
| 285 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, | 285 substr, location, typeMask, docDomain, thirdParty, sitekey, false); |
| 286 thirdParty, sitekey, specificOnly); | 286 if (result) |
| 287 if (result) | 287 return result; |
| 288 { | 288 |
| 289 result->AddRef(); | 289 if (!blacklistHit) |
| 290 return result.get(); | 290 blacklistHit = mBlacklist.CheckEntryMatch( |
|
sergei
2017/10/02 12:02:36
just return `result.release();`
hub
2017/10/03 19:33:12
Done.
| |
| 291 } | 291 substr, location, typeMask, docDomain, thirdParty, sitekey, |
| 292 } | 292 specificOnly); |
| 293 | 293 } |
| 294 return nullptr; | 294 return blacklistHit; |
| 295 } | 295 } |
| LEFT | RIGHT |