| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
| 3 * Copyright (C) 2006-present eyeo GmbH | |
| 4 * | |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License version 3 as | |
| 7 * published by the Free Software Foundation. | |
| 8 * | |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 * | |
| 14 * You should have received a copy of the GNU General Public License | |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 16 */ | |
| 17 | |
| 18 #include "Matcher.h" | |
| 19 #include "RegExpFilter.h" | |
| 20 #include "../library.h" | |
| 21 | |
| 22 namespace { | |
| 23 const DependentString regexpRegExp = | |
| 24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | |
| 25 const DependentString optionsRegExp = | |
| 26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | |
| 27 const DependentString candidateRegExp = | |
| 28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | |
| 29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; | |
| 30 } | |
| 31 | |
| 32 Matcher::Matcher() | |
| 33 : mFilterByKeyword(1024), mKeywordByFilter(1024), | |
| 34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) | |
| 35 { | |
| 36 mReId = GenerateRegExp(regexpRegExp, true, false); | |
| 37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false); | |
| 38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); | |
| 39 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 40 } | |
| 41 | |
| 42 void Matcher::Add(Filter& filter) | |
| 43 { | |
| 44 if (mKeywordByFilter.find(filter.GetText())) | |
| 45 return; | |
| 46 | |
| 47 auto keyword = FindKeyword(filter); | |
| 48 | |
| 49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | |
|
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
| |
| 50 mKeywordByFilter[filter.GetText()] = | |
| 51 FilterKeyword(std::move(keyword), filter); | |
| 52 } | |
| 53 | |
| 54 void Matcher::Remove(Filter& filter) | |
| 55 { | |
| 56 auto entry = mKeywordByFilter.find(filter.GetText()); | |
| 57 if (!entry) | |
| 58 return; | |
| 59 | |
| 60 auto& keyword = static_cast<const String&>(entry->second); | |
| 61 auto list = mFilterByKeyword[keyword]; | |
| 62 if (list.size() == 1) | |
| 63 mFilterByKeyword.erase(keyword); | |
| 64 else | |
| 65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | |
| 66 | |
| 67 mKeywordByFilter.erase(filter.GetText()); | |
| 68 } | |
| 69 | |
| 70 void Matcher::Clear() | |
| 71 { | |
| 72 mFilterByKeyword.clear(); | |
| 73 mKeywordByFilter.clear(); | |
| 74 } | |
| 75 | |
| 76 bool Matcher::HasFilter(const Filter& filter) const | |
| 77 { | |
| 78 return mKeywordByFilter.find(filter.GetText()); | |
| 79 } | |
| 80 | |
| 81 namespace | |
| 82 { | |
| 83 DependentString emptyString = u""_str; | |
| 84 } | |
| 85 | |
| 86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | |
| 87 { | |
| 88 auto entry = mKeywordByFilter.find(filter.GetText()); | |
| 89 if (entry) | |
| 90 return static_cast<const String&>(entry->second); | |
| 91 return emptyString; | |
| 92 } | |
| 93 | |
| 94 Filter* Matcher::MatchesAny(const String& location, | |
| 95 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 96 const String& sitekey, bool specificOnly) const | |
| 97 { | |
| 98 OwnedString text(location); | |
| 99 text.toLower(); | |
| 100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
| 101 if (text.match(mMatchReId, *reResult)) | |
| 102 { | |
| 103 auto& candidates = reResult->candidates; | |
| 104 candidates.push_back(OwnedString()); | |
| 105 for (auto candidate : candidates) | |
| 106 { | |
| 107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, | |
| 108 thirdParty, sitekey, specificOnly); | |
| 109 if (result) | |
| 110 return result.release(); | |
| 111 } | |
| 112 } | |
|
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
| |
| 113 return nullptr; | |
| 114 } | |
| 115 | |
| 116 OwnedString Matcher::FindKeyword(const Filter& filter) const | |
| 117 { | |
| 118 OwnedString result; | |
| 119 OwnedString text(filter.GetText()); | |
| 120 if (TestRegExp(mReId, text)) | |
| 121 return result; | |
| 122 | |
| 123 // Remove options | |
| 124 auto index = ExecRegExp(mOptionsReId, text); | |
| 125 if (index != String::npos) | |
| 126 text = DependentString(text, 0, index); | |
| 127 | |
| 128 // Remove whitelist marker | |
| 129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') | |
| 130 text = DependentString(text, 2); | |
| 131 | |
| 132 text.toLower(); | |
| 133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); | |
| 134 auto match = text.match(mCandidatesReId, *keywords); | |
| 135 if (!match) | |
| 136 return result; | |
| 137 | |
| 138 auto& candidates = keywords->candidates; | |
| 139 | |
| 140 uint32_t resultCount = 0xffffff; | |
| 141 uint32_t resultLength = 0; | |
| 142 for (auto substr : candidates) | |
| 143 { | |
| 144 if (substr.empty()) | |
| 145 continue; | |
| 146 | |
| 147 auto candidate = DependentString(substr, 1); | |
| 148 auto entry = mFilterByKeyword.find(candidate); | |
| 149 auto count = entry ? entry->second.size() : 0; | |
| 150 if (count < resultCount || | |
| 151 (count == resultCount && candidate.length() > resultLength)) | |
| 152 { | |
| 153 result = candidate; | |
| 154 resultCount = count; | |
| 155 resultLength = candidate.length(); | |
| 156 } | |
| 157 } | |
| 158 return result; | |
| 159 } | |
| 160 | |
| 161 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | |
| 162 const String& location, | |
| 163 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 164 const String& sitekey, bool specificOnly) const | |
| 165 { | |
| 166 auto entry = mFilterByKeyword.find(keyword); | |
| 167 if (!entry) | |
| 168 return FilterPtr(); | |
| 169 | |
| 170 auto filters = entry->second; | |
| 171 for (auto filter : filters) | |
| 172 { | |
| 173 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
| 174 if (specificOnly && activeFilter->IsGeneric() && | |
| 175 (activeFilter->mType != Filter::Type::WHITELIST)) | |
| 176 continue; | |
| 177 | |
| 178 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
| 179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
| 180 return filter; | |
| 181 } | |
| 182 | |
| 183 return FilterPtr(); | |
| 184 } | |
| 185 | |
| 186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
| 187 | |
| 188 CombinedMatcher::CombinedMatcher() | |
| 189 : mResultCache(1024), mMatchReId(-1) | |
| 190 { | |
| 191 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
| 192 } | |
| 193 | |
| 194 void CombinedMatcher::Add(Filter& filter) | |
| 195 { | |
| 196 GetMatcher(filter).Add(filter); | |
| 197 ResetCache(); | |
| 198 } | |
| 199 | |
| 200 void CombinedMatcher::Remove(Filter& filter) | |
| 201 { | |
| 202 GetMatcher(filter).Remove(filter); | |
| 203 ResetCache(); | |
| 204 } | |
| 205 | |
| 206 void CombinedMatcher::Clear() | |
| 207 { | |
| 208 mBlacklist.Clear(); | |
| 209 mWhitelist.Clear(); | |
| 210 ResetCache(); | |
| 211 } | |
| 212 | |
| 213 bool CombinedMatcher::HasFilter(const Filter& filter) const | |
| 214 { | |
| 215 return GetMatcher(filter).HasFilter(filter); | |
| 216 } | |
| 217 | |
| 218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | |
| 219 { | |
| 220 return GetMatcher(filter).GetKeywordForFilter(filter); | |
| 221 } | |
| 222 | |
| 223 Filter* CombinedMatcher::MatchesAny(const String& location, | |
| 224 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 225 const String& sitekey, bool specificOnly) | |
| 226 { | |
| 227 OwnedString key(location); | |
| 228 key.append(u" "_str); | |
| 229 key.append(typeMask); | |
| 230 key.append(u" "_str); | |
| 231 key.append(docDomain); | |
| 232 key.append(u" "_str); | |
| 233 key.append(thirdParty); | |
| 234 key.append(u" "_str); | |
| 235 key.append(sitekey); | |
| 236 key.append(u" "_str); | |
| 237 key.append(specificOnly); | |
| 238 | |
| 239 FilterPtr result; | |
| 240 | |
| 241 auto cachedResult = mResultCache.find(key); | |
| 242 if (cachedResult) | |
| 243 result = cachedResult->second.filter(); | |
| 244 else | |
| 245 { | |
| 246 result = MatchesAnyInternal(location, typeMask, docDomain, | |
| 247 thirdParty, sitekey, specificOnly); | |
| 248 | |
| 249 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
| 250 ResetCache(); | |
| 251 | |
| 252 CacheEntry cache(std::move(key), result); | |
| 253 mResultCache[cache.key()] = cache; | |
| 254 } | |
| 255 | |
| 256 return result.release(); | |
| 257 } | |
| 258 | |
| 259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | |
| 260 { | |
| 261 return GetMatcher(filter).FindKeyword(filter); | |
| 262 } | |
| 263 | |
| 264 void CombinedMatcher::ResetCache() | |
| 265 { | |
| 266 mResultCache.clear(); | |
| 267 } | |
| 268 | |
| 269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
| 270 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 271 const String& sitekey, bool specificOnly) const | |
| 272 { | |
| 273 OwnedString text(location); | |
| 274 text.toLower(); | |
| 275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
| 276 text.match(mMatchReId, *reResult); | |
| 277 | |
| 278 auto& candidates = reResult->candidates; | |
| 279 candidates.push_back(OwnedString()); | |
| 280 | |
| 281 FilterPtr blacklistHit; | |
| 282 for (auto substr : candidates) | |
| 283 { | |
| 284 auto result = mWhitelist.CheckEntryMatch( | |
| 285 substr, location, typeMask, docDomain, thirdParty, sitekey, false); | |
| 286 if (result) | |
| 287 return result; | |
| 288 | |
| 289 if (!blacklistHit) | |
| 290 blacklistHit = mBlacklist.CheckEntryMatch( | |
| 291 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
| 292 specificOnly); | |
| 293 } | |
| 294 return blacklistHit; | |
| 295 } | |
| OLD | NEW |