Index: compiled/filter/Matcher.cpp |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/compiled/filter/Matcher.cpp |
@@ -0,0 +1,295 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-present eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+#include "Matcher.h" |
+#include "RegExpFilter.h" |
+#include "../library.h" |
+ |
+namespace { |
+ const DependentString regexpRegExp = |
+ u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_str; |
+ const DependentString optionsRegExp = |
+ u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; |
+ const DependentString candidateRegExp = |
+ u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; |
+ const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; |
+} |
+ |
+Matcher::Matcher() |
+ : mFilterByKeyword(1024), mKeywordByFilter(1024), |
+ mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) |
+{ |
+ mReId = GenerateRegExp(regexpRegExp, true, false); |
+ mOptionsReId = GenerateRegExp(optionsRegExp, true, false); |
+ mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); |
+ mMatchReId = GenerateRegExp(matchRegExp, true, true); |
+} |
+ |
+void Matcher::Add(Filter& filter) |
+{ |
+ if (mKeywordByFilter.find(filter.GetText())) |
+ return; |
+ |
+ auto keyword = FindKeyword(filter); |
+ |
+ mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); |
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
|
+ mKeywordByFilter[filter.GetText()] = |
+ FilterKeyword(std::move(keyword), filter); |
+} |
+ |
+void Matcher::Remove(Filter& filter) |
+{ |
+ auto entry = mKeywordByFilter.find(filter.GetText()); |
+ if (!entry) |
+ return; |
+ |
+ auto& keyword = static_cast<const String&>(entry->second); |
+ auto list = mFilterByKeyword[keyword]; |
+ if (list.size() == 1) |
+ mFilterByKeyword.erase(keyword); |
+ else |
+ list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); |
+ |
+ mKeywordByFilter.erase(filter.GetText()); |
+} |
+ |
+void Matcher::Clear() |
+{ |
+ mFilterByKeyword.clear(); |
+ mKeywordByFilter.clear(); |
+} |
+ |
+bool Matcher::HasFilter(const Filter& filter) const |
+{ |
+ return mKeywordByFilter.find(filter.GetText()); |
+} |
+ |
+namespace |
+{ |
+ DependentString emptyString = u""_str; |
+} |
+ |
+const String& Matcher::GetKeywordForFilter(const Filter& filter) const |
+{ |
+ auto entry = mKeywordByFilter.find(filter.GetText()); |
+ if (entry) |
+ return static_cast<const String&>(entry->second); |
+ return emptyString; |
+} |
+ |
+Filter* Matcher::MatchesAny(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) const |
+{ |
+ OwnedString text(location); |
+ text.toLower(); |
+ intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); |
+ if (text.match(mMatchReId, *reResult)) |
+ { |
+ auto& candidates = reResult->candidates; |
+ candidates.push_back(OwnedString()); |
+ for (auto candidate : candidates) |
+ { |
+ auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, |
+ thirdParty, sitekey, specificOnly); |
+ if (result) |
+ return result.release(); |
+ } |
+ } |
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
|
+ return nullptr; |
+} |
+ |
+OwnedString Matcher::FindKeyword(const Filter& filter) const |
+{ |
+ OwnedString result; |
+ OwnedString text(filter.GetText()); |
+ if (TestRegExp(mReId, text)) |
+ return result; |
+ |
+ // Remove options |
+ auto index = ExecRegExp(mOptionsReId, text); |
+ if (index != String::npos) |
+ text = DependentString(text, 0, index); |
+ |
+ // Remove whitelist marker |
+ if (text.length() >= 2 && text[0] == '@' && text[1] == '@') |
+ text = DependentString(text, 2); |
+ |
+ text.toLower(); |
+ intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); |
+ auto match = text.match(mCandidatesReId, *keywords); |
+ if (!match) |
+ return result; |
+ |
+ auto& candidates = keywords->candidates; |
+ |
+ uint32_t resultCount = 0xffffff; |
+ uint32_t resultLength = 0; |
+ for (auto substr : candidates) |
+ { |
+ if (substr.empty()) |
+ continue; |
+ |
+ auto candidate = DependentString(substr, 1); |
+ auto entry = mFilterByKeyword.find(candidate); |
+ auto count = entry ? entry->second.size() : 0; |
+ if (count < resultCount || |
+ (count == resultCount && candidate.length() > resultLength)) |
+ { |
+ result = candidate; |
+ resultCount = count; |
+ resultLength = candidate.length(); |
+ } |
+ } |
+ return result; |
+} |
+ |
+FilterPtr Matcher::CheckEntryMatch(const String& keyword, |
+ const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) const |
+{ |
+ auto entry = mFilterByKeyword.find(keyword); |
+ if (!entry) |
+ return FilterPtr(); |
+ |
+ auto filters = entry->second; |
+ for (auto filter : filters) |
+ { |
+ auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
+ if (specificOnly && activeFilter->IsGeneric() && |
+ (activeFilter->mType != Filter::Type::WHITELIST)) |
+ continue; |
+ |
+ auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
+ if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
+ return filter; |
+ } |
+ |
+ return FilterPtr(); |
+} |
+ |
+const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; |
+ |
+CombinedMatcher::CombinedMatcher() |
+ : mResultCache(1024), mMatchReId(-1) |
+{ |
+ mMatchReId = GenerateRegExp(matchRegExp, true, true); |
+} |
+ |
+void CombinedMatcher::Add(Filter& filter) |
+{ |
+ GetMatcher(filter).Add(filter); |
+ ResetCache(); |
+} |
+ |
+void CombinedMatcher::Remove(Filter& filter) |
+{ |
+ GetMatcher(filter).Remove(filter); |
+ ResetCache(); |
+} |
+ |
+void CombinedMatcher::Clear() |
+{ |
+ mBlacklist.Clear(); |
+ mWhitelist.Clear(); |
+ ResetCache(); |
+} |
+ |
+bool CombinedMatcher::HasFilter(const Filter& filter) const |
+{ |
+ return GetMatcher(filter).HasFilter(filter); |
+} |
+ |
+const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const |
+{ |
+ return GetMatcher(filter).GetKeywordForFilter(filter); |
+} |
+ |
+Filter* CombinedMatcher::MatchesAny(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) |
+{ |
+ OwnedString key(location); |
+ key.append(u" "_str); |
+ key.append(typeMask); |
+ key.append(u" "_str); |
+ key.append(docDomain); |
+ key.append(u" "_str); |
+ key.append(thirdParty); |
+ key.append(u" "_str); |
+ key.append(sitekey); |
+ key.append(u" "_str); |
+ key.append(specificOnly); |
+ |
+ FilterPtr result; |
+ |
+ auto cachedResult = mResultCache.find(key); |
+ if (cachedResult) |
+ result = cachedResult->second.filter(); |
+ else |
+ { |
+ result = MatchesAnyInternal(location, typeMask, docDomain, |
+ thirdParty, sitekey, specificOnly); |
+ |
+ if (mResultCache.size() >= MAX_CACHE_ENTRIES) |
+ ResetCache(); |
+ |
+ CacheEntry cache(std::move(key), result); |
+ mResultCache[cache.key()] = cache; |
+ } |
+ |
+ return result.release(); |
+} |
+ |
+OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const |
+{ |
+ return GetMatcher(filter).FindKeyword(filter); |
+} |
+ |
+void CombinedMatcher::ResetCache() |
+{ |
+ mResultCache.clear(); |
+} |
+ |
+FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, |
+ int typeMask, DependentString& docDomain, bool thirdParty, |
+ const String& sitekey, bool specificOnly) const |
+{ |
+ OwnedString text(location); |
+ text.toLower(); |
+ intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); |
+ text.match(mMatchReId, *reResult); |
+ |
+ auto& candidates = reResult->candidates; |
+ candidates.push_back(OwnedString()); |
+ |
+ FilterPtr blacklistHit; |
+ for (auto substr : candidates) |
+ { |
+ auto result = mWhitelist.CheckEntryMatch( |
+ substr, location, typeMask, docDomain, thirdParty, sitekey, false); |
+ if (result) |
+ return result; |
+ |
+ if (!blacklistHit) |
+ blacklistHit = mBlacklist.CheckEntryMatch( |
+ substr, location, typeMask, docDomain, thirdParty, sitekey, |
+ specificOnly); |
+ } |
+ return blacklistHit; |
+} |