 Issue 29556737:
  Issue 5141 - Convert filter match to C++  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/
    
  
    Issue 29556737:
  Issue 5141 - Convert filter match to C++  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/| Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
| 3 * Copyright (C) 2006-present eyeo GmbH | |
| 4 * | |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License version 3 as | |
| 7 * published by the Free Software Foundation. | |
| 8 * | |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 * | |
| 14 * You should have received a copy of the GNU General Public License | |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 16 */ | |
| 17 | |
| 18 #include "Matcher.h" | |
| 19 #include "RegExpFilter.h" | |
| 20 #include "../library.h" | |
| 21 | |
| 22 class CombinedMatcher : public MatcherBase | |
| 23 { | |
| 24 private: | |
| 25 StringMap<Filter*> mResultCache; | |
| 
hub
2017/09/26 21:49:00
I wanted to use FilterPtr in there, but it didn't
 | |
| 26 static const size_t MAX_CACHE_ENTRIES = 1000; | |
| 27 int mMatchReId; | |
| 28 public: | |
| 29 Matcher mBlacklist; | |
| 30 Matcher mWhitelist; | |
| 31 | |
| 32 CombinedMatcher() | |
| 33 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) | |
| 34 { | |
| 35 } | |
| 36 | |
| 37 ~CombinedMatcher() | |
| 38 { | |
| 39 DeleteRegExp(mMatchReId); | |
| 40 } | |
| 41 | |
| 42 void ResetCache() | |
| 43 { | |
| 44 for (auto filter : mResultCache) | |
| 45 filter.second->ReleaseRef(); | |
| 
hub
2017/09/26 21:49:00
See above: if we could have the FilterPtr as the d
 | |
| 46 mResultCache.clear(); | |
| 47 } | |
| 48 | |
| 49 void Add(const FilterPtr& filter) override | |
| 50 { | |
| 51 if (filter->mType == Filter::Type::WHITELIST) | |
| 52 mWhitelist.Add(filter); | |
| 53 else | |
| 54 mBlacklist.Add(filter); | |
| 55 | |
| 56 ResetCache(); | |
| 57 } | |
| 58 | |
| 59 void Remove(const FilterPtr& filter) override | |
| 60 { | |
| 61 if (filter->mType == Filter::Type::WHITELIST) | |
| 62 mWhitelist.Remove(filter); | |
| 63 else | |
| 64 mBlacklist.Remove(filter); | |
| 65 | |
| 66 ResetCache(); | |
| 67 } | |
| 68 | |
| 69 void Clear() override | |
| 70 { | |
| 71 mBlacklist.Clear(); | |
| 72 mWhitelist.Clear(); | |
| 73 ResetCache(); | |
| 74 } | |
| 75 | |
| 76 OwnedString FindKeyword(const FilterPtr& filter) override | |
| 77 { | |
| 78 if (filter->mType == Filter::Type::WHITELIST) | |
| 79 return mWhitelist.FindKeyword(filter); | |
| 80 return mBlacklist.FindKeyword(filter); | |
| 81 } | |
| 82 | |
| 83 bool HasFilter(const FilterPtr& filter) const override | |
| 84 { | |
| 85 if (filter->mType == Filter::Type::WHITELIST) | |
| 86 return mWhitelist.HasFilter(filter); | |
| 87 return mBlacklist.HasFilter(filter); | |
| 88 } | |
| 89 | |
| 90 const String& GetKeywordForFilter(const FilterPtr& filter) override | |
| 91 { | |
| 92 if (filter->mType == Filter::Type::WHITELIST) | |
| 93 return mWhitelist.GetKeywordForFilter(filter); | |
| 94 return mBlacklist.GetKeywordForFilter(filter); | |
| 95 } | |
| 96 | |
| 97 Filter* MatchesAnyInternal(const String& location, | |
| 98 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 99 const String& sitekey, bool specificOnly) | |
| 100 { | |
| 101 ReMatchResults reResult; | |
| 102 OwnedString text(location); | |
| 103 text.toLower(); | |
| 104 text.match(mMatchReId, &reResult); | |
| 105 | |
| 106 auto& candidates = reResult.candidates; | |
| 107 candidates.push_back(OwnedString()); | |
| 108 | |
| 109 Filter* blacklistHit = nullptr; | |
| 110 for (size_t i = 0, l = candidates.size(); i < l; i++) | |
| 111 { | |
| 112 auto substr = candidates[i]; | |
| 113 if (mWhitelist.mFilterByKeyword.find(substr)) | |
| 114 { | |
| 115 auto result = mWhitelist._CheckEntryMatch( | |
| 116 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOn ly); | |
| 117 if (result) | |
| 118 return result; | |
| 119 } | |
| 120 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) | |
| 121 { | |
| 122 blacklistHit = mBlacklist._CheckEntryMatch( | |
| 123 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
| 124 specificOnly); | |
| 125 } | |
| 126 } | |
| 127 return blacklistHit; | |
| 128 } | |
| 129 | |
| 130 Filter* MatchesAny(const String& location, | |
| 131 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 132 const String& sitekey, bool specificOnly) override | |
| 133 { | |
| 134 OwnedString key(location); | |
| 135 key.append(u" "_str); | |
| 136 key.append(typeMask); | |
| 137 key.append(u" "_str); | |
| 138 key.append(docDomain); | |
| 139 key.append(u" "_str); | |
| 140 key.append(thirdParty); | |
| 141 key.append(u" "_str); | |
| 142 key.append(sitekey); | |
| 143 key.append(u" "_str); | |
| 144 key.append(specificOnly); | |
| 145 | |
| 146 auto cachedResult = mResultCache.find(key); | |
| 147 if (cachedResult) | |
| 148 { | |
| 149 cachedResult->second->AddRef(); | |
| 150 return cachedResult->second; | |
| 151 } | |
| 152 | |
| 153 Filter* result = MatchesAnyInternal(location, typeMask, docDomain, | |
| 154 thirdParty, sitekey, specificOnly); | |
| 155 | |
| 156 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
| 157 ResetCache(); | |
| 158 | |
| 159 result->AddRef(); | |
| 160 mResultCache[key] = result; | |
| 161 | |
| 162 result->AddRef(); | |
| 163 return result; | |
| 164 } | |
| 165 }; | |
| 166 | |
| 167 MatcherBase* MatcherBase::mInstance = new CombinedMatcher; | |
| 168 | |
| 169 Matcher::Matcher() | |
| 170 : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, fals e)) | |
| 171 , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, fa lse)) | |
| 172 , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_st r, true, true)) | |
| 173 { | |
| 174 } | |
| 175 | |
| 176 Matcher::~Matcher() | |
| 177 { | |
| 178 DeleteRegExp(mFilterReId); | |
| 179 DeleteRegExp(mOptionsReId); | |
| 180 DeleteRegExp(mCandidatesReId); | |
| 181 } | |
| 182 | |
| 183 OwnedString Matcher::FindKeyword(const FilterPtr& filter) | |
| 184 { | |
| 185 OwnedString result(u""_str); | |
| 186 OwnedString text(filter->GetText()); | |
| 187 if (TestRegExp(mFilterReId, text)) | |
| 188 return result; | |
| 189 | |
| 190 // Remove options | |
| 191 auto index = ExecRegExp(mOptionsReId, text); | |
| 192 if (index != -1) | |
| 193 text = text.substr(0, index); | |
| 194 | |
| 195 // Remove whitelist marker | |
| 196 if (text[0] == '@' && text[1] == '@') | |
| 197 text = text.substr(2); | |
| 198 | |
| 199 text.toLower(); | |
| 200 ReMatchResults keywords; | |
| 201 auto match = text.match(mCandidatesReId, &keywords); | |
| 202 if (!match) | |
| 203 return result; | |
| 204 | |
| 205 auto& candidates = keywords.candidates; | |
| 206 | |
| 207 auto& hash = mFilterByKeyword; | |
| 208 uint32_t resultCount = 0xffffffff; | |
| 209 uint32_t resultLength = 0; | |
| 210 for (uint32_t i = 0, l = candidates.size(); i < l; i++) | |
| 211 { | |
| 212 auto candidate = DependentString(candidates[i]).substr(1); | |
| 213 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); | |
| 214 if (count < resultCount || | |
| 215 (count == resultCount && candidate.length() > resultLength)) | |
| 216 { | |
| 217 result = candidate; | |
| 218 resultCount = count; | |
| 219 resultLength = candidate.length(); | |
| 220 } | |
| 221 } | |
| 222 | |
| 223 return result; | |
| 224 } | |
| 225 | |
| 226 void Matcher::Add(const FilterPtr& filter) | |
| 227 { | |
| 228 if (mKeywordByFilter.find(filter->GetText())) | |
| 229 return; | |
| 230 | |
| 231 auto keyword = FindKeyword(filter); | |
| 232 auto oldEntry = mFilterByKeyword.find(keyword); | |
| 233 if (!oldEntry) | |
| 234 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; | |
| 235 else | |
| 236 mFilterByKeyword[keyword].push_back(filter); | |
| 237 mKeywordByFilter[filter->GetText()] = keyword; | |
| 238 } | |
| 239 | |
| 240 void Matcher::Remove(const FilterPtr& filter) | |
| 241 { | |
| 242 if (!mKeywordByFilter.find(filter->GetText())) | |
| 243 return; | |
| 244 | |
| 245 auto keyword = mKeywordByFilter[filter->GetText()]; | |
| 246 auto list = mFilterByKeyword[keyword]; | |
| 247 if (list.size() == 1) | |
| 248 mFilterByKeyword.erase(keyword); | |
| 249 else | |
| 250 { | |
| 251 auto iter = std::find(list.cbegin(), list.cend(), filter); | |
| 252 list.erase(iter); | |
| 253 } | |
| 254 mKeywordByFilter.erase(filter->GetText()); | |
| 255 } | |
| 256 | |
| 257 void Matcher::Clear() | |
| 258 { | |
| 259 mFilterByKeyword.clear(); | |
| 260 mKeywordByFilter.clear(); | |
| 261 } | |
| 262 | |
| 263 bool Matcher::HasFilter(const FilterPtr& filter) const | |
| 264 { | |
| 265 return mKeywordByFilter.find(filter->GetText()); | |
| 266 } | |
| 267 | |
| 268 static DependentString emptyString = u""_str; | |
| 269 | |
| 270 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) | |
| 271 { | |
| 272 if (mKeywordByFilter.find(filter->GetText())) | |
| 273 return mKeywordByFilter[filter->GetText()]; | |
| 274 return emptyString; | |
| 275 } | |
| 276 | |
| 277 Filter* Matcher::_CheckEntryMatch(const String& keyword, | |
| 278 const String& location, | |
| 279 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 280 const String& sitekey, bool specificOnly) | |
| 281 { | |
| 282 auto list = mFilterByKeyword[keyword]; | |
| 283 for (auto filter : list) { | |
| 284 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
| 
hub
2017/09/26 21:49:00
This is done without checking. And it is ugly. And
 
sergei
2017/10/02 12:02:33
Although we don't pass other filters here, what do
 | |
| 285 if (specificOnly && activeFilter->IsGeneric() && | |
| 286 !(activeFilter->mType != Filter::Type::WHITELIST)) | |
| 287 continue; | |
| 288 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
| 
hub
2017/09/26 21:49:00
SImilarly as above: this is unchecked.
 | |
| 289 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
| 290 { | |
| 291 return filter.get(); | |
| 292 } | |
| 293 } | |
| 294 return nullptr; | |
| 295 } | |
| 296 | |
| 297 Filter* Matcher::MatchesAny(const String& location, | |
| 298 int typeMask, DependentString& docDomain, bool thirdParty, | |
| 299 const String& sitekey, bool specificOnly) | |
| 300 { | |
| 301 ReMatchResults reResult; | |
| 302 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
| 303 OwnedString text(location); | |
| 304 text.toLower(); | |
| 305 MatchRegExp(re_id, text, &reResult); | |
| 306 auto& candidates = reResult.candidates; | |
| 307 candidates.push_back(OwnedString()); | |
| 308 for (size_t i = 0, l = candidates.size(); i < l; i++) | |
| 309 { | |
| 310 auto substr = candidates[i]; | |
| 311 if (mFilterByKeyword.find(substr)) | |
| 312 { | |
| 313 auto result = _CheckEntryMatch(substr, location, typeMask, docDomain, | |
| 314 thirdParty, sitekey, specificOnly); | |
| 315 if (result) | |
| 316 { | |
| 317 result->AddRef(); | |
| 318 return result; | |
| 319 } | |
| 320 } | |
| 321 } | |
| 322 | |
| 323 return nullptr; | |
| 324 } | |
| OLD | NEW |