| Left: | ||
| Right: | 
| LEFT | RIGHT | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. | 
| 13 * | 13 * | 
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License | 
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 16 */ | 16 */ | 
| 17 | 17 | 
| 18 #include "Matcher.h" | 18 #include "Matcher.h" | 
| 19 #include "RegExpFilter.h" | 19 #include "RegExpFilter.h" | 
| 20 #include "../library.h" | 20 #include "../library.h" | 
| 21 | 21 | 
| 22 namespace { | 22 namespace { | 
| 23 const DependentString regexpRegExp = | 23 const DependentString regexpRegExp = | 
| 24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | 24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | 
| 25 const DependentString optionsRegExp = | 25 const DependentString optionsRegExp = | 
| 26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | 26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | 
| 27 const DependentString candidateRegExp = | 27 const DependentString candidateRegExp = | 
| 28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | 28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | 
| 29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; | |
| 30 } | |
| 31 | |
| 32 Matcher::Matcher() | |
| 33 : mFilterByKeyword(1024), mKeywordByFilter(1024), | |
| 34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) | |
| 35 { | |
| 36 mReId = GenerateRegExp(regexpRegExp, true, false); | |
| 37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false); | |
| 38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); | |
| 39 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 29 } | 40 } | 
| 30 | 41 | 
| 31 void Matcher::Add(Filter& filter) | 42 void Matcher::Add(Filter& filter) | 
| 32 { | 43 { | 
| 33 if (mKeywordByFilter.find(filter.GetText())) | 44 if (mKeywordByFilter.find(filter.GetText())) | 
| 34 return; | 45 return; | 
| 35 | 46 | 
| 36 auto keyword = FindKeyword(filter); | 47 auto keyword = FindKeyword(filter); | 
| 37 | 48 | 
| 38 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | 49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | 
| 
 
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
 
 | |
| 39 mKeywordByFilter[filter.GetText()] = keyword; | 50 mKeywordByFilter[filter.GetText()] = | 
| 51 FilterKeyword(std::move(keyword), filter); | |
| 40 } | 52 } | 
| 41 | 53 | 
| 42 void Matcher::Remove(Filter& filter) | 54 void Matcher::Remove(Filter& filter) | 
| 43 { | 55 { | 
| 44 auto entry = mKeywordByFilter.find(filter.GetText()); | 56 auto entry = mKeywordByFilter.find(filter.GetText()); | 
| 45 if (!entry) | 57 if (!entry) | 
| 46 return; | 58 return; | 
| 47 | 59 | 
| 48 auto keyword = entry->second; | 60 auto& keyword = static_cast<const String&>(entry->second); | 
| 49 auto list = mFilterByKeyword[keyword]; | 61 auto list = mFilterByKeyword[keyword]; | 
| 50 if (list.size() == 1) | 62 if (list.size() == 1) | 
| 51 mFilterByKeyword.erase(keyword); | 63 mFilterByKeyword.erase(keyword); | 
| 52 else | 64 else | 
| 53 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | 65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | 
| 54 | 66 | 
| 55 mKeywordByFilter.erase(filter.GetText()); | 67 mKeywordByFilter.erase(filter.GetText()); | 
| 56 } | 68 } | 
| 57 | 69 | 
| 58 void Matcher::Clear() | 70 void Matcher::Clear() | 
| 59 { | 71 { | 
| 60 mFilterByKeyword.clear(); | 72 mFilterByKeyword.clear(); | 
| 61 mKeywordByFilter.clear(); | 73 mKeywordByFilter.clear(); | 
| 62 } | 74 } | 
| 63 | 75 | 
| 64 bool Matcher::HasFilter(const Filter& filter) const | 76 bool Matcher::HasFilter(const Filter& filter) const | 
| 65 { | 77 { | 
| 66 return mKeywordByFilter.find(filter.GetText()); | 78 return mKeywordByFilter.find(filter.GetText()); | 
| 67 } | 79 } | 
| 68 | 80 | 
| 69 namespace | 81 namespace | 
| 70 { | 82 { | 
| 71 DependentString emptyString = u""_str; | 83 DependentString emptyString = u""_str; | 
| 72 } | 84 } | 
| 73 | 85 | 
| 74 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | 86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | 
| 75 { | 87 { | 
| 76 auto entry = mKeywordByFilter.find(filter.GetText()); | 88 auto entry = mKeywordByFilter.find(filter.GetText()); | 
| 77 if (entry) | 89 if (entry) | 
| 78 return entry->second; | 90 return static_cast<const String&>(entry->second); | 
| 79 return emptyString; | 91 return emptyString; | 
| 80 } | 92 } | 
| 81 | 93 | 
| 82 Filter* Matcher::MatchesAny(const String& location, | 94 Filter* Matcher::MatchesAny(const String& location, | 
| 83 int typeMask, DependentString& docDomain, bool thirdParty, | 95 int typeMask, DependentString& docDomain, bool thirdParty, | 
| 84 const String& sitekey, bool specificOnly) const | 96 const String& sitekey, bool specificOnly) const | 
| 85 { | 97 { | 
| 86 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
| 87 OwnedString text(location); | 98 OwnedString text(location); | 
| 88 text.toLower(); | 99 text.toLower(); | 
| 89 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | 100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | 
| 90 MatchRegExp(re_id, text, reResult.get()); | 101 if (text.match(mMatchReId, *reResult)) | 
| 91 auto& candidates = reResult->candidates; | 102 { | 
| 92 candidates.push_back(OwnedString()); | 103 auto& candidates = reResult->candidates; | 
| 93 for (auto substr : candidates) | 104 candidates.push_back(OwnedString()); | 
| 94 { | 105 for (auto candidate : candidates) | 
| 95 if (mFilterByKeyword.find(substr)) | |
| 96 { | 106 { | 
| 97 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, | 107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, | 
| 98 thirdParty, sitekey, specificOnly); | 108 thirdParty, sitekey, specificOnly); | 
| 99 if (result) | 109 if (result) | 
| 100 return result.release(); | 110 return result.release(); | 
| 101 } | 111 } | 
| 102 } | 112 } | 
| 
 
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
 
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
 
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
 
 | |
| 103 return nullptr; | 113 return nullptr; | 
| 104 } | 114 } | 
| 105 | 115 | 
| 106 OwnedString Matcher::FindKeyword(const Filter& filter) const | 116 OwnedString Matcher::FindKeyword(const Filter& filter) const | 
| 107 { | 117 { | 
| 108 OwnedString result(u""_str); | 118 OwnedString result; | 
| 109 OwnedString text(filter.GetText()); | 119 OwnedString text(filter.GetText()); | 
| 110 auto re_id = GenerateRegExp(regexpRegExp, true, false); | 120 if (TestRegExp(mReId, text)) | 
| 111 if (TestRegExp(re_id, text)) | |
| 112 return result; | 121 return result; | 
| 113 | 122 | 
| 114 // Remove options | 123 // Remove options | 
| 115 auto options_re_id = GenerateRegExp(optionsRegExp, true, false); | 124 auto index = ExecRegExp(mOptionsReId, text); | 
| 116 auto index = ExecRegExp(options_re_id, text); | |
| 117 if (index != String::npos) | 125 if (index != String::npos) | 
| 118 text = text.substr(0, index); | 126 text = DependentString(text, 0, index); | 
| 119 | 127 | 
| 120 // Remove whitelist marker | 128 // Remove whitelist marker | 
| 121 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') | 129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') | 
| 122 text = text.substr(2); | 130 text = DependentString(text, 2); | 
| 123 | 131 | 
| 124 text.toLower(); | 132 text.toLower(); | 
| 125 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); | 133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); | 
| 126 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); | 134 auto match = text.match(mCandidatesReId, *keywords); | 
| 127 auto match = text.match(candidates_re_id, keywords.get()); | |
| 128 if (!match) | 135 if (!match) | 
| 129 return result; | 136 return result; | 
| 130 | 137 | 
| 131 auto& candidates = keywords->candidates; | 138 auto& candidates = keywords->candidates; | 
| 132 | 139 | 
| 133 auto& hash = mFilterByKeyword; | 140 uint32_t resultCount = 0xffffff; | 
| 134 uint32_t resultCount = 0xffffffff; | |
| 135 uint32_t resultLength = 0; | 141 uint32_t resultLength = 0; | 
| 136 for (auto substr : candidates) | 142 for (auto substr : candidates) | 
| 137 { | 143 { | 
| 138 if (substr.empty()) | 144 if (substr.empty()) | 
| 139 continue; | 145 continue; | 
| 140 | 146 | 
| 141 auto candidate = substr.substr(1); | 147 auto candidate = DependentString(substr, 1); | 
| 142 auto entry = hash.find(candidate); | 148 auto entry = mFilterByKeyword.find(candidate); | 
| 143 auto count = entry ? entry->second.size() : 0; | 149 auto count = entry ? entry->second.size() : 0; | 
| 144 if (count < resultCount || | 150 if (count < resultCount || | 
| 145 (count == resultCount && candidate.length() > resultLength)) | 151 (count == resultCount && candidate.length() > resultLength)) | 
| 146 { | 152 { | 
| 147 result = candidate; | 153 result = candidate; | 
| 148 resultCount = count; | 154 resultCount = count; | 
| 149 resultLength = candidate.length(); | 155 resultLength = candidate.length(); | 
| 150 } | 156 } | 
| 151 } | 157 } | 
| 152 return result; | 158 return result; | 
| 153 } | 159 } | 
| 154 | 160 | 
| 155 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | 161 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | 
| 156 const String& location, | 162 const String& location, | 
| 157 int typeMask, DependentString& docDomain, bool thirdParty, | 163 int typeMask, DependentString& docDomain, bool thirdParty, | 
| 158 const String& sitekey, bool specificOnly) const | 164 const String& sitekey, bool specificOnly) const | 
| 159 { | 165 { | 
| 160 auto entry = mFilterByKeyword.find(keyword); | 166 auto entry = mFilterByKeyword.find(keyword); | 
| 161 if (entry) | 167 if (!entry) | 
| 162 { | 168 return FilterPtr(); | 
| 
 
sergei
2017/10/04 08:54:33
Earlier return would be better here, in my opinion
 
hub
2017/10/06 13:49:19
Done.
 
 | |
| 163 auto list = entry->second; | 169 | 
| 164 for (auto filter : list) | 170 auto filters = entry->second; | 
| 165 { | 171 for (auto filter : filters) | 
| 166 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | 172 { | 
| 167 if (specificOnly && activeFilter->IsGeneric() && | 173 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | 
| 168 !(activeFilter->mType != Filter::Type::WHITELIST)) | 174 if (specificOnly && activeFilter->IsGeneric() && | 
| 169 continue; | 175 (activeFilter->mType != Filter::Type::WHITELIST)) | 
| 170 | 176 continue; | 
| 171 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | 177 | 
| 172 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | 178 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | 
| 173 return filter; | 179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | 
| 174 } | 180 return filter; | 
| 175 } | 181 } | 
| 182 | |
| 176 return FilterPtr(); | 183 return FilterPtr(); | 
| 177 } | 184 } | 
| 178 | 185 | 
| 179 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | 186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | 
| 180 | 187 | 
| 188 CombinedMatcher::CombinedMatcher() | |
| 189 : mResultCache(1024), mMatchReId(-1) | |
| 190 { | |
| 191 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 192 } | |
| 193 | |
| 181 void CombinedMatcher::Add(Filter& filter) | 194 void CombinedMatcher::Add(Filter& filter) | 
| 182 { | 195 { | 
| 183 if (filter.mType == Filter::Type::WHITELIST) | 196 GetMatcher(filter).Add(filter); | 
| 184 mWhitelist.Add(filter); | |
| 185 else | |
| 186 mBlacklist.Add(filter); | |
| 187 | |
| 188 ResetCache(); | 197 ResetCache(); | 
| 189 } | 198 } | 
| 190 | 199 | 
| 191 void CombinedMatcher::Remove(Filter& filter) | 200 void CombinedMatcher::Remove(Filter& filter) | 
| 192 { | 201 { | 
| 193 if (filter.mType == Filter::Type::WHITELIST) | 202 GetMatcher(filter).Remove(filter); | 
| 194 mWhitelist.Remove(filter); | |
| 195 else | |
| 196 mBlacklist.Remove(filter); | |
| 197 | |
| 198 ResetCache(); | 203 ResetCache(); | 
| 199 } | 204 } | 
| 200 | 205 | 
| 201 void CombinedMatcher::Clear() | 206 void CombinedMatcher::Clear() | 
| 202 { | 207 { | 
| 203 mBlacklist.Clear(); | 208 mBlacklist.Clear(); | 
| 204 mWhitelist.Clear(); | 209 mWhitelist.Clear(); | 
| 205 ResetCache(); | 210 ResetCache(); | 
| 206 } | 211 } | 
| 207 | 212 | 
| 208 bool CombinedMatcher::HasFilter(const Filter& filter) const | 213 bool CombinedMatcher::HasFilter(const Filter& filter) const | 
| 209 { | 214 { | 
| 210 return filter.mType == Filter::Type::WHITELIST ? | 215 return GetMatcher(filter).HasFilter(filter); | 
| 211 mWhitelist.HasFilter(filter) : mBlacklist.HasFilter(filter); | |
| 212 } | 216 } | 
| 213 | 217 | 
| 214 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | 218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | 
| 215 { | 219 { | 
| 216 return filter.mType == Filter::Type::WHITELIST ? | 220 return GetMatcher(filter).GetKeywordForFilter(filter); | 
| 217 mWhitelist.GetKeywordForFilter(filter) : mBlacklist.GetKeywordForFilter(filt er); | |
| 218 } | 221 } | 
| 219 | 222 | 
| 220 Filter* CombinedMatcher::MatchesAny(const String& location, | 223 Filter* CombinedMatcher::MatchesAny(const String& location, | 
| 221 int typeMask, DependentString& docDomain, bool thirdParty, | 224 int typeMask, DependentString& docDomain, bool thirdParty, | 
| 222 const String& sitekey, bool specificOnly) | 225 const String& sitekey, bool specificOnly) | 
| 223 { | 226 { | 
| 224 OwnedString key(location); | 227 OwnedString key(location); | 
| 225 key.append(u" "_str); | 228 key.append(u" "_str); | 
| 226 key.append(typeMask); | 229 key.append(typeMask); | 
| 227 key.append(u" "_str); | 230 key.append(u" "_str); | 
| 228 key.append(docDomain); | 231 key.append(docDomain); | 
| 229 key.append(u" "_str); | 232 key.append(u" "_str); | 
| 230 key.append(thirdParty); | 233 key.append(thirdParty); | 
| 231 key.append(u" "_str); | 234 key.append(u" "_str); | 
| 232 key.append(sitekey); | 235 key.append(sitekey); | 
| 233 key.append(u" "_str); | 236 key.append(u" "_str); | 
| 234 key.append(specificOnly); | 237 key.append(specificOnly); | 
| 235 | 238 | 
| 236 FilterPtr result; | 239 FilterPtr result; | 
| 237 | 240 | 
| 238 auto cachedResult = mResultCache.find(key); | 241 auto cachedResult = mResultCache.find(key); | 
| 239 if (cachedResult) | 242 if (cachedResult) | 
| 240 result = cachedResult->second; | 243 result = cachedResult->second.filter(); | 
| 241 else | 244 else | 
| 242 { | 245 { | 
| 243 result = MatchesAnyInternal(location, typeMask, docDomain, | 246 result = MatchesAnyInternal(location, typeMask, docDomain, | 
| 244 thirdParty, sitekey, specificOnly); | 247 thirdParty, sitekey, specificOnly); | 
| 245 | 248 | 
| 246 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | 249 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | 
| 247 ResetCache(); | 250 ResetCache(); | 
| 248 | 251 | 
| 249 mResultCache[key] = result; | 252 CacheEntry cache(std::move(key), result); | 
| 253 mResultCache[cache.key()] = cache; | |
| 250 } | 254 } | 
| 251 | 255 | 
| 252 return result.release(); | 256 return result.release(); | 
| 253 } | 257 } | 
| 254 | 258 | 
| 255 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | 259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | 
| 256 { | 260 { | 
| 257 return filter.mType == Filter::Type::WHITELIST ? | 261 return GetMatcher(filter).FindKeyword(filter); | 
| 258 mWhitelist.FindKeyword(filter) : mBlacklist.FindKeyword(filter); | |
| 259 } | 262 } | 
| 260 | 263 | 
| 261 void CombinedMatcher::ResetCache() | 264 void CombinedMatcher::ResetCache() | 
| 262 { | 265 { | 
| 263 mResultCache.clear(); | 266 mResultCache.clear(); | 
| 264 } | 267 } | 
| 265 | 268 | 
| 266 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | 269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | 
| 267 int typeMask, DependentString& docDomain, bool thirdParty, | 270 int typeMask, DependentString& docDomain, bool thirdParty, | 
| 268 const String& sitekey, bool specificOnly) const | 271 const String& sitekey, bool specificOnly) const | 
| 269 { | 272 { | 
| 270 OwnedString text(location); | 273 OwnedString text(location); | 
| 271 text.toLower(); | 274 text.toLower(); | 
| 272 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
| 273 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | 275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | 
| 274 text.match(match_re_id, reResult.get()); | 276 text.match(mMatchReId, *reResult); | 
| 275 | 277 | 
| 276 auto& candidates = reResult->candidates; | 278 auto& candidates = reResult->candidates; | 
| 277 candidates.push_back(OwnedString()); | 279 candidates.push_back(OwnedString()); | 
| 278 | 280 | 
| 279 FilterPtr blacklistHit; | 281 FilterPtr blacklistHit; | 
| 280 for (auto substr : candidates) | 282 for (auto substr : candidates) | 
| 281 { | 283 { | 
| 282 auto result = mWhitelist.CheckEntryMatch( | 284 auto result = mWhitelist.CheckEntryMatch( | 
| 283 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly); | 285 substr, location, typeMask, docDomain, thirdParty, sitekey, false); | 
| 284 if (result) | 286 if (result) | 
| 285 return result; | 287 return result; | 
| 286 | 288 | 
| 287 if (!blacklistHit) | 289 if (!blacklistHit) | 
| 288 blacklistHit = mBlacklist.CheckEntryMatch( | 290 blacklistHit = mBlacklist.CheckEntryMatch( | 
| 289 substr, location, typeMask, docDomain, thirdParty, sitekey, | 291 substr, location, typeMask, docDomain, thirdParty, sitekey, | 
| 290 specificOnly); | 292 specificOnly); | 
| 291 } | 293 } | 
| 292 return blacklistHit; | 294 return blacklistHit; | 
| 293 } | 295 } | 
| LEFT | RIGHT |