Index: compiled/filter/Matcher.cpp |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/compiled/filter/Matcher.cpp |
@@ -0,0 +1,324 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-present eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+#include "Matcher.h" |
+#include "RegExpFilter.h" |
+#include "../library.h" |
+ |
+class CombinedMatcher : public MatcherBase |
+{ |
+private: |
+ StringMap<Filter*> mResultCache; |
hub
2017/09/26 21:49:00
I wanted to use FilterPtr in there, but it didn't
|
+ static const size_t MAX_CACHE_ENTRIES = 1000; |
+ int mMatchReId; |
+public: |
+ Matcher mBlacklist; |
+ Matcher mWhitelist; |
+ |
+ CombinedMatcher() |
+ : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) |
+ { |
+ } |
+ |
+ ~CombinedMatcher() |
+ { |
+ DeleteRegExp(mMatchReId); |
+ } |
+ |
+ void ResetCache() |
+ { |
+ for (auto filter : mResultCache) |
+ filter.second->ReleaseRef(); |
hub
2017/09/26 21:49:00
See above: if we could have the FilterPtr as the d
|
+ mResultCache.clear(); |
+ } |
+ |
+ void Add(const FilterPtr& filter) override |
+ { |
+ if (filter->mType == Filter::Type::WHITELIST) |
+ mWhitelist.Add(filter); |
+ else |
+ mBlacklist.Add(filter); |
+ |
+ ResetCache(); |
+ } |
+ |
+ void Remove(const FilterPtr& filter) override |
+ { |
+ if (filter->mType == Filter::Type::WHITELIST) |
+ mWhitelist.Remove(filter); |
+ else |
+ mBlacklist.Remove(filter); |
+ |
+ ResetCache(); |
+ } |
+ |
+ void Clear() override |
+ { |
+ mBlacklist.Clear(); |
+ mWhitelist.Clear(); |
+ ResetCache(); |
+ } |
+ |
+ OwnedString FindKeyword(const FilterPtr& filter) override |
+ { |
+ if (filter->mType == Filter::Type::WHITELIST) |
+ return mWhitelist.FindKeyword(filter); |
+ return mBlacklist.FindKeyword(filter); |
+ } |
+ |
+ bool HasFilter(const FilterPtr& filter) const override |
+ { |
+ if (filter->mType == Filter::Type::WHITELIST) |
+ return mWhitelist.HasFilter(filter); |
+ return mBlacklist.HasFilter(filter); |
+ } |
+ |
+ const String& GetKeywordForFilter(const FilterPtr& filter) override |
+ { |
+ if (filter->mType == Filter::Type::WHITELIST) |
+ return mWhitelist.GetKeywordForFilter(filter); |
+ return mBlacklist.GetKeywordForFilter(filter); |
+ } |
+ |
+ Filter* MatchesAnyInternal(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) |
+ { |
+ ReMatchResults reResult; |
+ OwnedString text(location); |
+ text.toLower(); |
+ text.match(mMatchReId, &reResult); |
+ |
+ auto& candidates = reResult.candidates; |
+ candidates.push_back(OwnedString()); |
+ |
+ Filter* blacklistHit = nullptr; |
+ for (size_t i = 0, l = candidates.size(); i < l; i++) |
+ { |
+ auto substr = candidates[i]; |
+ if (mWhitelist.mFilterByKeyword.find(substr)) |
+ { |
+ auto result = mWhitelist._CheckEntryMatch( |
+ substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly); |
+ if (result) |
+ return result; |
+ } |
+ if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) |
+ { |
+ blacklistHit = mBlacklist._CheckEntryMatch( |
+ substr, location, typeMask, docDomain, thirdParty, sitekey, |
+ specificOnly); |
+ } |
+ } |
+ return blacklistHit; |
+ } |
+ |
+ Filter* MatchesAny(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) override |
+ { |
+ OwnedString key(location); |
+ key.append(u" "_str); |
+ key.append(typeMask); |
+ key.append(u" "_str); |
+ key.append(docDomain); |
+ key.append(u" "_str); |
+ key.append(thirdParty); |
+ key.append(u" "_str); |
+ key.append(sitekey); |
+ key.append(u" "_str); |
+ key.append(specificOnly); |
+ |
+ auto cachedResult = mResultCache.find(key); |
+ if (cachedResult) |
+ { |
+ cachedResult->second->AddRef(); |
+ return cachedResult->second; |
+ } |
+ |
+ Filter* result = MatchesAnyInternal(location, typeMask, docDomain, |
+ thirdParty, sitekey, specificOnly); |
+ |
+ if (mResultCache.size() >= MAX_CACHE_ENTRIES) |
+ ResetCache(); |
+ |
+ result->AddRef(); |
+ mResultCache[key] = result; |
+ |
+ result->AddRef(); |
+ return result; |
+ } |
+}; |
+ |
+MatcherBase* MatcherBase::mInstance = new CombinedMatcher; |
+ |
+Matcher::Matcher() |
+ : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, false)) |
+ , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, false)) |
+ , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str, true, true)) |
+{ |
+} |
+ |
+Matcher::~Matcher() |
+{ |
+ DeleteRegExp(mFilterReId); |
+ DeleteRegExp(mOptionsReId); |
+ DeleteRegExp(mCandidatesReId); |
+} |
+ |
+OwnedString Matcher::FindKeyword(const FilterPtr& filter) |
+{ |
+ OwnedString result(u""_str); |
+ OwnedString text(filter->GetText()); |
+ if (TestRegExp(mFilterReId, text)) |
+ return result; |
+ |
+ // Remove options |
+ auto index = ExecRegExp(mOptionsReId, text); |
+ if (index != -1) |
+ text = text.substr(0, index); |
+ |
+ // Remove whitelist marker |
+ if (text[0] == '@' && text[1] == '@') |
+ text = text.substr(2); |
+ |
+ text.toLower(); |
+ ReMatchResults keywords; |
+ auto match = text.match(mCandidatesReId, &keywords); |
+ if (!match) |
+ return result; |
+ |
+ auto& candidates = keywords.candidates; |
+ |
+ auto& hash = mFilterByKeyword; |
+ uint32_t resultCount = 0xffffffff; |
+ uint32_t resultLength = 0; |
+ for (uint32_t i = 0, l = candidates.size(); i < l; i++) |
+ { |
+ auto candidate = DependentString(candidates[i]).substr(1); |
+ auto count = (hash.find(candidate) ? hash[candidate].size() : 0); |
+ if (count < resultCount || |
+ (count == resultCount && candidate.length() > resultLength)) |
+ { |
+ result = candidate; |
+ resultCount = count; |
+ resultLength = candidate.length(); |
+ } |
+ } |
+ |
+ return result; |
+} |
+ |
+void Matcher::Add(const FilterPtr& filter) |
+{ |
+ if (mKeywordByFilter.find(filter->GetText())) |
+ return; |
+ |
+ auto keyword = FindKeyword(filter); |
+ auto oldEntry = mFilterByKeyword.find(keyword); |
+ if (!oldEntry) |
+ mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; |
+ else |
+ mFilterByKeyword[keyword].push_back(filter); |
+ mKeywordByFilter[filter->GetText()] = keyword; |
+} |
+ |
+void Matcher::Remove(const FilterPtr& filter) |
+{ |
+ if (!mKeywordByFilter.find(filter->GetText())) |
+ return; |
+ |
+ auto keyword = mKeywordByFilter[filter->GetText()]; |
+ auto list = mFilterByKeyword[keyword]; |
+ if (list.size() == 1) |
+ mFilterByKeyword.erase(keyword); |
+ else |
+ { |
+ auto iter = std::find(list.cbegin(), list.cend(), filter); |
+ list.erase(iter); |
+ } |
+ mKeywordByFilter.erase(filter->GetText()); |
+} |
+ |
+void Matcher::Clear() |
+{ |
+ mFilterByKeyword.clear(); |
+ mKeywordByFilter.clear(); |
+} |
+ |
+bool Matcher::HasFilter(const FilterPtr& filter) const |
+{ |
+ return mKeywordByFilter.find(filter->GetText()); |
+} |
+ |
+static DependentString emptyString = u""_str; |
+ |
+const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) |
+{ |
+ if (mKeywordByFilter.find(filter->GetText())) |
+ return mKeywordByFilter[filter->GetText()]; |
+ return emptyString; |
+} |
+ |
+Filter* Matcher::_CheckEntryMatch(const String& keyword, |
+ const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) |
+{ |
+ auto list = mFilterByKeyword[keyword]; |
+ for (auto filter : list) { |
+ auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
hub
2017/09/26 21:49:00
This is done without checking. And it is ugly. And
sergei
2017/10/02 12:02:33
Although we don't pass other filters here, what do
|
+ if (specificOnly && activeFilter->IsGeneric() && |
+ !(activeFilter->mType != Filter::Type::WHITELIST)) |
+ continue; |
+ auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
hub
2017/09/26 21:49:00
SImilarly as above: this is unchecked.
|
+ if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
+ { |
+ return filter.get(); |
+ } |
+ } |
+ return nullptr; |
+} |
+ |
+Filter* Matcher::MatchesAny(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) |
+{ |
+ ReMatchResults reResult; |
+ auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); |
+ OwnedString text(location); |
+ text.toLower(); |
+ MatchRegExp(re_id, text, &reResult); |
+ auto& candidates = reResult.candidates; |
+ candidates.push_back(OwnedString()); |
+ for (size_t i = 0, l = candidates.size(); i < l; i++) |
+ { |
+ auto substr = candidates[i]; |
+ if (mFilterByKeyword.find(substr)) |
+ { |
+ auto result = _CheckEntryMatch(substr, location, typeMask, docDomain, |
+ thirdParty, sitekey, specificOnly); |
+ if (result) |
+ { |
+ result->AddRef(); |
+ return result; |
+ } |
+ } |
+ } |
+ |
+ return nullptr; |
+} |