| Index: compiled/filter/Matcher.cpp |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/compiled/filter/Matcher.cpp |
| @@ -0,0 +1,304 @@ |
| +/* |
| + * This file is part of Adblock Plus <https://adblockplus.org/>, |
| + * Copyright (C) 2006-present eyeo GmbH |
| + * |
| + * Adblock Plus is free software: you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 3 as |
| + * published by the Free Software Foundation. |
| + * |
| + * Adblock Plus is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + * You should have received a copy of the GNU General Public License |
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + */ |
| + |
| +#include "Matcher.h" |
| +#include "RegExpFilter.h" |
| +#include "../library.h" |
| + |
| +const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; |
| + |
| +OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter) |
| +{ |
| + if (filter->mType == Filter::Type::WHITELIST) |
| + return mWhitelist.FindKeyword(filter); |
| + return mBlacklist.FindKeyword(filter); |
| +} |
| + |
| +CombinedMatcher::CombinedMatcher() |
| + : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) |
| +{ |
| +} |
| + |
| +CombinedMatcher::~CombinedMatcher() |
| +{ |
| + DeleteRegExp(mMatchReId); |
| +} |
| + |
| +void CombinedMatcher::ResetCache() |
| +{ |
| + mResultCache.clear(); |
| +} |
| + |
| +void CombinedMatcher::Add(const FilterPtr& filter) |
| +{ |
| + if (filter->mType == Filter::Type::WHITELIST) |
| + mWhitelist.Add(filter); |
| + else |
| + mBlacklist.Add(filter); |
| + |
| + ResetCache(); |
| +} |
| + |
| +void CombinedMatcher::Remove(const FilterPtr& filter) |
| +{ |
| + if (filter->mType == Filter::Type::WHITELIST) |
| + mWhitelist.Remove(filter); |
| + else |
| + mBlacklist.Remove(filter); |
| + |
| + ResetCache(); |
| +} |
| + |
| +void CombinedMatcher::Clear() |
| +{ |
| + mBlacklist.Clear(); |
| + mWhitelist.Clear(); |
| + ResetCache(); |
| +} |
| + |
| +bool CombinedMatcher::HasFilter(const FilterPtr& filter) const |
| +{ |
| + if (filter->mType == Filter::Type::WHITELIST) |
| + return mWhitelist.HasFilter(filter); |
| + return mBlacklist.HasFilter(filter); |
| +} |
| + |
| +const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter) |
| +{ |
| + if (filter->mType == Filter::Type::WHITELIST) |
| + return mWhitelist.GetKeywordForFilter(filter); |
| + return mBlacklist.GetKeywordForFilter(filter); |
| +} |
| + |
| +FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, |
| + int typeMask, DependentString& docDomain, bool thirdParty, |
| + const String& sitekey, bool specificOnly) |
| +{ |
| + ReMatchResults reResult; |
| + OwnedString text(location); |
| + text.toLower(); |
| + text.match(mMatchReId, &reResult); |
| + |
| + auto& candidates = reResult.candidates; |
| + candidates.push_back(OwnedString()); |
| + |
| + FilterPtr blacklistHit; |
| + for (auto substr : candidates) |
| + { |
| + if (mWhitelist.mFilterByKeyword.find(substr)) |
| + { |
| + auto result = mWhitelist.CheckEntryMatch( |
| + substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly); |
| + if (result) |
| + return result; |
| + } |
| + if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) |
| + { |
| + blacklistHit = mBlacklist.CheckEntryMatch( |
| + substr, location, typeMask, docDomain, thirdParty, sitekey, |
| + specificOnly); |
| + } |
| + } |
| + return blacklistHit; |
| +} |
| + |
| +Filter* CombinedMatcher::MatchesAny(const String& location, |
| + int typeMask, DependentString& docDomain, bool thirdParty, |
| + const String& sitekey, bool specificOnly) |
| +{ |
| + OwnedString key(location); |
| + key.append(u" "_str); |
| + key.append(typeMask); |
| + key.append(u" "_str); |
| + key.append(docDomain); |
| + key.append(u" "_str); |
| + key.append(thirdParty); |
| + key.append(u" "_str); |
| + key.append(sitekey); |
| + key.append(u" "_str); |
| + key.append(specificOnly); |
| + |
| + FilterPtr result; |
| + |
| + auto cachedResult = mResultCache.find(key); |
| + if (cachedResult) |
| + result = cachedResult->second; |
| + else |
| + { |
| + result = MatchesAnyInternal(location, typeMask, docDomain, |
| + thirdParty, sitekey, specificOnly); |
| + |
| + if (mResultCache.size() >= MAX_CACHE_ENTRIES) |
| + ResetCache(); |
| + |
| + mResultCache[key] = result; |
| + } |
| + |
| + result->AddRef(); |
| + return result.get(); |
| +} |
| + |
| +namespace { |
| + const DependentString regexpRegExp = |
| + u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_str; |
| + const DependentString optionsRegExp = |
| + u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; |
| + const DependentString candidateRegExp = |
| + u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; |
| +} |
| + |
| +OwnedString Matcher::FindKeyword(const FilterPtr& filter) |
| +{ |
| + OwnedString result(u""_str); |
| + OwnedString text(filter->GetText()); |
| + auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false); |
| + if (TestRegExp(re_id, text)) |
| + return result; |
| + |
| + // Remove options |
| + auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, false); |
| + auto index = ExecRegExp(options_re_id, text); |
| + if (index != -1) |
| + text = text.substr(0, index); |
| + |
| + // Remove whitelist marker |
| + if (text[0] == '@' && text[1] == '@') |
| + text = text.substr(2); |
| + |
| + text.toLower(); |
| + ReMatchResults keywords; |
| + auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); |
| + auto match = text.match(candidates_re_id, &keywords); |
| + if (!match) |
| + return result; |
| + |
| + auto& candidates = keywords.candidates; |
| + |
| + auto& hash = mFilterByKeyword; |
| + uint32_t resultCount = 0xffffffff; |
| + uint32_t resultLength = 0; |
| + for (auto substr : candidates) |
| + { |
| + auto candidate = DependentString(substr).substr(1); |
| + auto count = (hash.find(candidate) ? hash[candidate].size() : 0); |
| + if (count < resultCount || |
| + (count == resultCount && candidate.length() > resultLength)) |
| + { |
| + result = candidate; |
| + resultCount = count; |
| + resultLength = candidate.length(); |
| + } |
| + } |
| + |
| + return result; |
| +} |
| + |
| +void Matcher::Add(const FilterPtr& filter) |
| +{ |
| + if (mKeywordByFilter.find(filter->GetText())) |
| + return; |
| + |
| + auto keyword = FindKeyword(filter); |
| + auto oldEntry = mFilterByKeyword.find(keyword); |
| + if (!oldEntry) |
| + mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; |
| + else |
| + mFilterByKeyword[keyword].push_back(filter); |
| + mKeywordByFilter[filter->GetText()] = keyword; |
| +} |
| + |
| +void Matcher::Remove(const FilterPtr& filter) |
| +{ |
| + if (!mKeywordByFilter.find(filter->GetText())) |
| + return; |
| + |
| + auto keyword = mKeywordByFilter[filter->GetText()]; |
| + auto list = mFilterByKeyword[keyword]; |
| + if (list.size() == 1) |
| + mFilterByKeyword.erase(keyword); |
| + else |
| + { |
| + auto iter = std::find(list.cbegin(), list.cend(), filter); |
| + list.erase(iter); |
| + } |
| + mKeywordByFilter.erase(filter->GetText()); |
| +} |
| + |
| +void Matcher::Clear() |
| +{ |
| + mFilterByKeyword.clear(); |
| + mKeywordByFilter.clear(); |
| +} |
| + |
| +bool Matcher::HasFilter(const FilterPtr& filter) const |
| +{ |
| + return mKeywordByFilter.find(filter->GetText()); |
| +} |
| + |
| +static DependentString emptyString = u""_str; |
| + |
| +const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) |
| +{ |
| + if (mKeywordByFilter.find(filter->GetText())) |
| + return mKeywordByFilter[filter->GetText()]; |
| + return emptyString; |
| +} |
| + |
| +FilterPtr Matcher::CheckEntryMatch(const String& keyword, |
| + const String& location, |
| + int typeMask, DependentString& docDomain, bool thirdParty, |
| + const String& sitekey, bool specificOnly) |
| +{ |
| + auto list = mFilterByKeyword[keyword]; |
| + for (auto filter : list) { |
| + auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
| + if (specificOnly && activeFilter->IsGeneric() && |
| + !(activeFilter->mType != Filter::Type::WHITELIST)) |
| + continue; |
| + |
| + auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
| + if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
| + return filter; |
| + } |
| + return FilterPtr(); |
| +} |
| + |
| +Filter* Matcher::MatchesAny(const String& location, |
| + int typeMask, DependentString& docDomain, bool thirdParty, |
| + const String& sitekey, bool specificOnly) |
| +{ |
| + ReMatchResults reResult; |
| + auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); |
| + OwnedString text(location); |
| + text.toLower(); |
| + MatchRegExp(re_id, text, &reResult); |
| + auto& candidates = reResult.candidates; |
| + candidates.push_back(OwnedString()); |
| + for (auto substr : candidates) |
| + if (mFilterByKeyword.find(substr)) |
| + { |
| + auto result = CheckEntryMatch(substr, location, typeMask, docDomain, |
| + thirdParty, sitekey, specificOnly); |
| + if (result) |
| + { |
| + result->AddRef(); |
| + return result.get(); |
| + } |
| + } |
| + |
| + return nullptr; |
| +} |