OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. |
| 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. |
| 13 * |
| 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ |
| 17 |
| 18 #include "Matcher.h" |
| 19 #include "RegExpFilter.h" |
| 20 #include "../library.h" |
| 21 |
| 22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; |
| 23 |
| 24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter) |
| 25 { |
| 26 if (filter->mType == Filter::Type::WHITELIST) |
| 27 return mWhitelist.FindKeyword(filter); |
| 28 return mBlacklist.FindKeyword(filter); |
| 29 } |
| 30 |
| 31 CombinedMatcher::CombinedMatcher() |
| 32 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) |
| 33 { |
| 34 } |
| 35 |
| 36 CombinedMatcher::~CombinedMatcher() |
| 37 { |
| 38 DeleteRegExp(mMatchReId); |
| 39 } |
| 40 |
| 41 void CombinedMatcher::ResetCache() |
| 42 { |
| 43 mResultCache.clear(); |
| 44 } |
| 45 |
| 46 void CombinedMatcher::Add(const FilterPtr& filter) |
| 47 { |
| 48 if (filter->mType == Filter::Type::WHITELIST) |
| 49 mWhitelist.Add(filter); |
| 50 else |
| 51 mBlacklist.Add(filter); |
| 52 |
| 53 ResetCache(); |
| 54 } |
| 55 |
| 56 void CombinedMatcher::Remove(const FilterPtr& filter) |
| 57 { |
| 58 if (filter->mType == Filter::Type::WHITELIST) |
| 59 mWhitelist.Remove(filter); |
| 60 else |
| 61 mBlacklist.Remove(filter); |
| 62 |
| 63 ResetCache(); |
| 64 } |
| 65 |
| 66 void CombinedMatcher::Clear() |
| 67 { |
| 68 mBlacklist.Clear(); |
| 69 mWhitelist.Clear(); |
| 70 ResetCache(); |
| 71 } |
| 72 |
| 73 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const |
| 74 { |
| 75 if (filter->mType == Filter::Type::WHITELIST) |
| 76 return mWhitelist.HasFilter(filter); |
| 77 return mBlacklist.HasFilter(filter); |
| 78 } |
| 79 |
| 80 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter) |
| 81 { |
| 82 if (filter->mType == Filter::Type::WHITELIST) |
| 83 return mWhitelist.GetKeywordForFilter(filter); |
| 84 return mBlacklist.GetKeywordForFilter(filter); |
| 85 } |
| 86 |
| 87 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, |
| 88 int typeMask, DependentString& docDomain, bool thirdParty, |
| 89 const String& sitekey, bool specificOnly) |
| 90 { |
| 91 ReMatchResults reResult; |
| 92 OwnedString text(location); |
| 93 text.toLower(); |
| 94 text.match(mMatchReId, &reResult); |
| 95 |
| 96 auto& candidates = reResult.candidates; |
| 97 candidates.push_back(OwnedString()); |
| 98 |
| 99 FilterPtr blacklistHit; |
| 100 for (auto substr : candidates) |
| 101 { |
| 102 if (mWhitelist.mFilterByKeyword.find(substr)) |
| 103 { |
| 104 auto result = mWhitelist.CheckEntryMatch( |
| 105 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly
); |
| 106 if (result) |
| 107 return result; |
| 108 } |
| 109 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) |
| 110 { |
| 111 blacklistHit = mBlacklist.CheckEntryMatch( |
| 112 substr, location, typeMask, docDomain, thirdParty, sitekey, |
| 113 specificOnly); |
| 114 } |
| 115 } |
| 116 return blacklistHit; |
| 117 } |
| 118 |
| 119 Filter* CombinedMatcher::MatchesAny(const String& location, |
| 120 int typeMask, DependentString& docDomain, bool thirdParty, |
| 121 const String& sitekey, bool specificOnly) |
| 122 { |
| 123 OwnedString key(location); |
| 124 key.append(u" "_str); |
| 125 key.append(typeMask); |
| 126 key.append(u" "_str); |
| 127 key.append(docDomain); |
| 128 key.append(u" "_str); |
| 129 key.append(thirdParty); |
| 130 key.append(u" "_str); |
| 131 key.append(sitekey); |
| 132 key.append(u" "_str); |
| 133 key.append(specificOnly); |
| 134 |
| 135 FilterPtr result; |
| 136 |
| 137 auto cachedResult = mResultCache.find(key); |
| 138 if (cachedResult) |
| 139 result = cachedResult->second; |
| 140 else |
| 141 { |
| 142 result = MatchesAnyInternal(location, typeMask, docDomain, |
| 143 thirdParty, sitekey, specificOnly); |
| 144 |
| 145 if (mResultCache.size() >= MAX_CACHE_ENTRIES) |
| 146 ResetCache(); |
| 147 |
| 148 mResultCache[key] = result; |
| 149 } |
| 150 |
| 151 result->AddRef(); |
| 152 return result.get(); |
| 153 } |
| 154 |
| 155 namespace { |
| 156 const DependentString regexpRegExp = |
| 157 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s
tr; |
| 158 const DependentString optionsRegExp = |
| 159 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; |
| 160 const DependentString candidateRegExp = |
| 161 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; |
| 162 } |
| 163 |
| 164 OwnedString Matcher::FindKeyword(const FilterPtr& filter) |
| 165 { |
| 166 OwnedString result(u""_str); |
| 167 OwnedString text(filter->GetText()); |
| 168 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false); |
| 169 if (TestRegExp(re_id, text)) |
| 170 return result; |
| 171 |
| 172 // Remove options |
| 173 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals
e); |
| 174 auto index = ExecRegExp(options_re_id, text); |
| 175 if (index != -1) |
| 176 text = text.substr(0, index); |
| 177 |
| 178 // Remove whitelist marker |
| 179 if (text[0] == '@' && text[1] == '@') |
| 180 text = text.substr(2); |
| 181 |
| 182 text.toLower(); |
| 183 ReMatchResults keywords; |
| 184 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); |
| 185 auto match = text.match(candidates_re_id, &keywords); |
| 186 if (!match) |
| 187 return result; |
| 188 |
| 189 auto& candidates = keywords.candidates; |
| 190 |
| 191 auto& hash = mFilterByKeyword; |
| 192 uint32_t resultCount = 0xffffffff; |
| 193 uint32_t resultLength = 0; |
| 194 for (auto substr : candidates) |
| 195 { |
| 196 auto candidate = DependentString(substr).substr(1); |
| 197 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); |
| 198 if (count < resultCount || |
| 199 (count == resultCount && candidate.length() > resultLength)) |
| 200 { |
| 201 result = candidate; |
| 202 resultCount = count; |
| 203 resultLength = candidate.length(); |
| 204 } |
| 205 } |
| 206 |
| 207 return result; |
| 208 } |
| 209 |
| 210 void Matcher::Add(const FilterPtr& filter) |
| 211 { |
| 212 if (mKeywordByFilter.find(filter->GetText())) |
| 213 return; |
| 214 |
| 215 auto keyword = FindKeyword(filter); |
| 216 auto oldEntry = mFilterByKeyword.find(keyword); |
| 217 if (!oldEntry) |
| 218 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; |
| 219 else |
| 220 mFilterByKeyword[keyword].push_back(filter); |
| 221 mKeywordByFilter[filter->GetText()] = keyword; |
| 222 } |
| 223 |
| 224 void Matcher::Remove(const FilterPtr& filter) |
| 225 { |
| 226 if (!mKeywordByFilter.find(filter->GetText())) |
| 227 return; |
| 228 |
| 229 auto keyword = mKeywordByFilter[filter->GetText()]; |
| 230 auto list = mFilterByKeyword[keyword]; |
| 231 if (list.size() == 1) |
| 232 mFilterByKeyword.erase(keyword); |
| 233 else |
| 234 { |
| 235 auto iter = std::find(list.cbegin(), list.cend(), filter); |
| 236 list.erase(iter); |
| 237 } |
| 238 mKeywordByFilter.erase(filter->GetText()); |
| 239 } |
| 240 |
| 241 void Matcher::Clear() |
| 242 { |
| 243 mFilterByKeyword.clear(); |
| 244 mKeywordByFilter.clear(); |
| 245 } |
| 246 |
| 247 bool Matcher::HasFilter(const FilterPtr& filter) const |
| 248 { |
| 249 return mKeywordByFilter.find(filter->GetText()); |
| 250 } |
| 251 |
| 252 static DependentString emptyString = u""_str; |
| 253 |
| 254 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) |
| 255 { |
| 256 if (mKeywordByFilter.find(filter->GetText())) |
| 257 return mKeywordByFilter[filter->GetText()]; |
| 258 return emptyString; |
| 259 } |
| 260 |
| 261 FilterPtr Matcher::CheckEntryMatch(const String& keyword, |
| 262 const String& location, |
| 263 int typeMask, DependentString& docDomain, bool thirdParty, |
| 264 const String& sitekey, bool specificOnly) |
| 265 { |
| 266 auto list = mFilterByKeyword[keyword]; |
| 267 for (auto filter : list) { |
| 268 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
| 269 if (specificOnly && activeFilter->IsGeneric() && |
| 270 !(activeFilter->mType != Filter::Type::WHITELIST)) |
| 271 continue; |
| 272 |
| 273 auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
| 274 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
| 275 return filter; |
| 276 } |
| 277 return FilterPtr(); |
| 278 } |
| 279 |
| 280 Filter* Matcher::MatchesAny(const String& location, |
| 281 int typeMask, DependentString& docDomain, bool thirdParty, |
| 282 const String& sitekey, bool specificOnly) |
| 283 { |
| 284 ReMatchResults reResult; |
| 285 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); |
| 286 OwnedString text(location); |
| 287 text.toLower(); |
| 288 MatchRegExp(re_id, text, &reResult); |
| 289 auto& candidates = reResult.candidates; |
| 290 candidates.push_back(OwnedString()); |
| 291 for (auto substr : candidates) |
| 292 if (mFilterByKeyword.find(substr)) |
| 293 { |
| 294 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, |
| 295 thirdParty, sitekey, specificOnly); |
| 296 if (result) |
| 297 { |
| 298 result->AddRef(); |
| 299 return result.get(); |
| 300 } |
| 301 } |
| 302 |
| 303 return nullptr; |
| 304 } |
OLD | NEW |