OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. |
| 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. |
| 13 * |
| 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ |
| 17 |
| 18 #include "Matcher.h" |
| 19 #include "RegExpFilter.h" |
| 20 #include "../library.h" |
| 21 |
| 22 class CombinedMatcher : public MatcherBase |
| 23 { |
| 24 private: |
| 25 StringMap<Filter*> mResultCache; |
| 26 static const size_t MAX_CACHE_ENTRIES = 1000; |
| 27 int mMatchReId; |
| 28 Matcher mBlacklist; |
| 29 Matcher mWhitelist; |
| 30 |
| 31 protected: |
| 32 OwnedString FindKeyword(const FilterPtr& filter) override |
| 33 { |
| 34 if (filter->mType == Filter::Type::WHITELIST) |
| 35 return mWhitelist.FindKeyword(filter); |
| 36 return mBlacklist.FindKeyword(filter); |
| 37 } |
| 38 |
| 39 public: |
| 40 CombinedMatcher() |
| 41 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) |
| 42 { |
| 43 } |
| 44 |
| 45 ~CombinedMatcher() |
| 46 { |
| 47 DeleteRegExp(mMatchReId); |
| 48 } |
| 49 |
| 50 void ResetCache() |
| 51 { |
| 52 for (auto filter : mResultCache) |
| 53 filter.second->ReleaseRef(); |
| 54 mResultCache.clear(); |
| 55 } |
| 56 |
| 57 void Add(const FilterPtr& filter) override |
| 58 { |
| 59 if (filter->mType == Filter::Type::WHITELIST) |
| 60 mWhitelist.Add(filter); |
| 61 else |
| 62 mBlacklist.Add(filter); |
| 63 |
| 64 ResetCache(); |
| 65 } |
| 66 |
| 67 void Remove(const FilterPtr& filter) override |
| 68 { |
| 69 if (filter->mType == Filter::Type::WHITELIST) |
| 70 mWhitelist.Remove(filter); |
| 71 else |
| 72 mBlacklist.Remove(filter); |
| 73 |
| 74 ResetCache(); |
| 75 } |
| 76 |
| 77 void Clear() override |
| 78 { |
| 79 mBlacklist.Clear(); |
| 80 mWhitelist.Clear(); |
| 81 ResetCache(); |
| 82 } |
| 83 |
| 84 bool HasFilter(const FilterPtr& filter) const override |
| 85 { |
| 86 if (filter->mType == Filter::Type::WHITELIST) |
| 87 return mWhitelist.HasFilter(filter); |
| 88 return mBlacklist.HasFilter(filter); |
| 89 } |
| 90 |
| 91 const String& GetKeywordForFilter(const FilterPtr& filter) override |
| 92 { |
| 93 if (filter->mType == Filter::Type::WHITELIST) |
| 94 return mWhitelist.GetKeywordForFilter(filter); |
| 95 return mBlacklist.GetKeywordForFilter(filter); |
| 96 } |
| 97 |
| 98 private: |
| 99 Filter* MatchesAnyInternal(const String& location, |
| 100 int typeMask, DependentString& docDomain, bool thirdParty, |
| 101 const String& sitekey, bool specificOnly) |
| 102 { |
| 103 ReMatchResults reResult; |
| 104 OwnedString text(location); |
| 105 text.toLower(); |
| 106 text.match(mMatchReId, &reResult); |
| 107 |
| 108 auto& candidates = reResult.candidates; |
| 109 candidates.push_back(OwnedString()); |
| 110 |
| 111 Filter* blacklistHit = nullptr; |
| 112 for (auto substr : candidates) |
| 113 { |
| 114 if (mWhitelist.mFilterByKeyword.find(substr)) |
| 115 { |
| 116 auto result = mWhitelist.CheckEntryMatch( |
| 117 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOn
ly); |
| 118 if (result) |
| 119 return result; |
| 120 } |
| 121 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) |
| 122 { |
| 123 blacklistHit = mBlacklist.CheckEntryMatch( |
| 124 substr, location, typeMask, docDomain, thirdParty, sitekey, |
| 125 specificOnly); |
| 126 } |
| 127 } |
| 128 return blacklistHit; |
| 129 } |
| 130 |
| 131 public: |
| 132 Filter* MatchesAny(const String& location, |
| 133 int typeMask, DependentString& docDomain, bool thirdParty, |
| 134 const String& sitekey, bool specificOnly) override |
| 135 { |
| 136 OwnedString key(location); |
| 137 key.append(u" "_str); |
| 138 key.append(typeMask); |
| 139 key.append(u" "_str); |
| 140 key.append(docDomain); |
| 141 key.append(u" "_str); |
| 142 key.append(thirdParty); |
| 143 key.append(u" "_str); |
| 144 key.append(sitekey); |
| 145 key.append(u" "_str); |
| 146 key.append(specificOnly); |
| 147 |
| 148 auto cachedResult = mResultCache.find(key); |
| 149 if (cachedResult) |
| 150 { |
| 151 cachedResult->second->AddRef(); |
| 152 return cachedResult->second; |
| 153 } |
| 154 |
| 155 Filter* result = MatchesAnyInternal(location, typeMask, docDomain, |
| 156 thirdParty, sitekey, specificOnly); |
| 157 |
| 158 if (mResultCache.size() >= MAX_CACHE_ENTRIES) |
| 159 ResetCache(); |
| 160 |
| 161 result->AddRef(); |
| 162 mResultCache[key] = result; |
| 163 |
| 164 result->AddRef(); |
| 165 return result; |
| 166 } |
| 167 }; |
| 168 |
| 169 MatcherBase* MatcherBase::mInstance = new CombinedMatcher; |
| 170 |
| 171 Matcher::Matcher() |
| 172 : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, fals
e)) |
| 173 , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, fa
lse)) |
| 174 , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_st
r, true, true)) |
| 175 { |
| 176 } |
| 177 |
| 178 Matcher::~Matcher() |
| 179 { |
| 180 DeleteRegExp(mFilterReId); |
| 181 DeleteRegExp(mOptionsReId); |
| 182 DeleteRegExp(mCandidatesReId); |
| 183 } |
| 184 |
| 185 OwnedString Matcher::FindKeyword(const FilterPtr& filter) |
| 186 { |
| 187 OwnedString result(u""_str); |
| 188 OwnedString text(filter->GetText()); |
| 189 if (TestRegExp(mFilterReId, text)) |
| 190 return result; |
| 191 |
| 192 // Remove options |
| 193 auto index = ExecRegExp(mOptionsReId, text); |
| 194 if (index != -1) |
| 195 text = text.substr(0, index); |
| 196 |
| 197 // Remove whitelist marker |
| 198 if (text[0] == '@' && text[1] == '@') |
| 199 text = text.substr(2); |
| 200 |
| 201 text.toLower(); |
| 202 ReMatchResults keywords; |
| 203 auto match = text.match(mCandidatesReId, &keywords); |
| 204 if (!match) |
| 205 return result; |
| 206 |
| 207 auto& candidates = keywords.candidates; |
| 208 |
| 209 auto& hash = mFilterByKeyword; |
| 210 uint32_t resultCount = 0xffffffff; |
| 211 uint32_t resultLength = 0; |
| 212 for (auto substr : candidates) |
| 213 { |
| 214 auto candidate = DependentString(substr).substr(1); |
| 215 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); |
| 216 if (count < resultCount || |
| 217 (count == resultCount && candidate.length() > resultLength)) |
| 218 { |
| 219 result = candidate; |
| 220 resultCount = count; |
| 221 resultLength = candidate.length(); |
| 222 } |
| 223 } |
| 224 |
| 225 return result; |
| 226 } |
| 227 |
| 228 void Matcher::Add(const FilterPtr& filter) |
| 229 { |
| 230 if (mKeywordByFilter.find(filter->GetText())) |
| 231 return; |
| 232 |
| 233 auto keyword = FindKeyword(filter); |
| 234 auto oldEntry = mFilterByKeyword.find(keyword); |
| 235 if (!oldEntry) |
| 236 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; |
| 237 else |
| 238 mFilterByKeyword[keyword].push_back(filter); |
| 239 mKeywordByFilter[filter->GetText()] = keyword; |
| 240 } |
| 241 |
| 242 void Matcher::Remove(const FilterPtr& filter) |
| 243 { |
| 244 if (!mKeywordByFilter.find(filter->GetText())) |
| 245 return; |
| 246 |
| 247 auto keyword = mKeywordByFilter[filter->GetText()]; |
| 248 auto list = mFilterByKeyword[keyword]; |
| 249 if (list.size() == 1) |
| 250 mFilterByKeyword.erase(keyword); |
| 251 else |
| 252 { |
| 253 auto iter = std::find(list.cbegin(), list.cend(), filter); |
| 254 list.erase(iter); |
| 255 } |
| 256 mKeywordByFilter.erase(filter->GetText()); |
| 257 } |
| 258 |
| 259 void Matcher::Clear() |
| 260 { |
| 261 mFilterByKeyword.clear(); |
| 262 mKeywordByFilter.clear(); |
| 263 } |
| 264 |
| 265 bool Matcher::HasFilter(const FilterPtr& filter) const |
| 266 { |
| 267 return mKeywordByFilter.find(filter->GetText()); |
| 268 } |
| 269 |
| 270 static DependentString emptyString = u""_str; |
| 271 |
| 272 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) |
| 273 { |
| 274 if (mKeywordByFilter.find(filter->GetText())) |
| 275 return mKeywordByFilter[filter->GetText()]; |
| 276 return emptyString; |
| 277 } |
| 278 |
| 279 Filter* Matcher::CheckEntryMatch(const String& keyword, |
| 280 const String& location, |
| 281 int typeMask, DependentString& docDomain, bool thirdParty, |
| 282 const String& sitekey, bool specificOnly) |
| 283 { |
| 284 auto list = mFilterByKeyword[keyword]; |
| 285 for (auto filter : list) { |
| 286 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
| 287 if (specificOnly && activeFilter->IsGeneric() && |
| 288 !(activeFilter->mType != Filter::Type::WHITELIST)) |
| 289 continue; |
| 290 |
| 291 auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
| 292 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
| 293 return filter.get(); |
| 294 } |
| 295 return nullptr; |
| 296 } |
| 297 |
| 298 Filter* Matcher::MatchesAny(const String& location, |
| 299 int typeMask, DependentString& docDomain, bool thirdParty, |
| 300 const String& sitekey, bool specificOnly) |
| 301 { |
| 302 ReMatchResults reResult; |
| 303 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); |
| 304 OwnedString text(location); |
| 305 text.toLower(); |
| 306 MatchRegExp(re_id, text, &reResult); |
| 307 auto& candidates = reResult.candidates; |
| 308 candidates.push_back(OwnedString()); |
| 309 for (auto substr : candidates) |
| 310 if (mFilterByKeyword.find(substr)) |
| 311 { |
| 312 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, |
| 313 thirdParty, sitekey, specificOnly); |
| 314 if (result) |
| 315 { |
| 316 result->AddRef(); |
| 317 return result; |
| 318 } |
| 319 } |
| 320 |
| 321 return nullptr; |
| 322 } |
OLD | NEW |