Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Fixed many issues. One test left out. Created Oct. 6, 2017, 1:45 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: compiled/filter/Matcher.cpp
===================================================================
new file mode 100644
--- /dev/null
+++ b/compiled/filter/Matcher.cpp
@@ -0,0 +1,295 @@
+/*
+ * This file is part of Adblock Plus <https://adblockplus.org/>,
+ * Copyright (C) 2006-present eyeo GmbH
+ *
+ * Adblock Plus is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * Adblock Plus is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "Matcher.h"
+#include "RegExpFilter.h"
+#include "../library.h"
+
+namespace {
+ const DependentString regexpRegExp =
+ u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_str;
+ const DependentString optionsRegExp =
+ u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str;
+ const DependentString candidateRegExp =
+ u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str;
+ const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str;
+}
+
+Matcher::Matcher()
+ : mFilterByKeyword(1024), mKeywordByFilter(1024),
+ mReId(-1), mOptionsReId(-1), mCandidatesReId(-1)
+{
+ mReId = GenerateRegExp(regexpRegExp, true, false);
+ mOptionsReId = GenerateRegExp(optionsRegExp, true, false);
+ mCandidatesReId = GenerateRegExp(candidateRegExp, true, true);
+ mMatchReId = GenerateRegExp(matchRegExp, true, true);
+}
+
+void Matcher::Add(Filter& filter)
+{
+ if (mKeywordByFilter.find(filter.GetText()))
+ return;
+
+ auto keyword = FindKeyword(filter);
+
+ mFilterByKeyword[keyword].push_back(FilterPtr(&filter));
sergei 2017/10/11 09:55:16 Although the review is already closed I think it's
+ mKeywordByFilter[filter.GetText()] =
+ FilterKeyword(std::move(keyword), filter);
+}
+
+void Matcher::Remove(Filter& filter)
+{
+ auto entry = mKeywordByFilter.find(filter.GetText());
+ if (!entry)
+ return;
+
+ auto& keyword = static_cast<const String&>(entry->second);
+ auto list = mFilterByKeyword[keyword];
+ if (list.size() == 1)
+ mFilterByKeyword.erase(keyword);
+ else
+ list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter)));
+
+ mKeywordByFilter.erase(filter.GetText());
+}
+
+void Matcher::Clear()
+{
+ mFilterByKeyword.clear();
+ mKeywordByFilter.clear();
+}
+
+bool Matcher::HasFilter(const Filter& filter) const
+{
+ return mKeywordByFilter.find(filter.GetText());
+}
+
+namespace
+{
+ DependentString emptyString = u""_str;
+}
+
+const String& Matcher::GetKeywordForFilter(const Filter& filter) const
+{
+ auto entry = mKeywordByFilter.find(filter.GetText());
+ if (entry)
+ return static_cast<const String&>(entry->second);
+ return emptyString;
+}
+
+Filter* Matcher::MatchesAny(const String& location,
+ int typeMask, DependentString& docDomain, bool thirdParty,
+ const String& sitekey, bool specificOnly) const
+{
+ OwnedString text(location);
+ text.toLower();
+ intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
+ if (text.match(mMatchReId, *reResult))
+ {
+ auto& candidates = reResult->candidates;
+ candidates.push_back(OwnedString());
+ for (auto candidate : candidates)
+ {
+ auto result = CheckEntryMatch(candidate, location, typeMask, docDomain,
+ thirdParty, sitekey, specificOnly);
+ if (result)
+ return result.release();
+ }
+ }
Wladimir Palant 2017/10/09 08:39:47 As mentioned in the issue description, we should n
sergei 2017/10/09 15:27:53 Although it merely converts the existing JS code a
Wladimir Palant 2017/10/10 07:39:05 I strongly disagree. Landing crappy code is always
+ return nullptr;
+}
+
+OwnedString Matcher::FindKeyword(const Filter& filter) const
+{
+ OwnedString result;
+ OwnedString text(filter.GetText());
+ if (TestRegExp(mReId, text))
+ return result;
+
+ // Remove options
+ auto index = ExecRegExp(mOptionsReId, text);
+ if (index != String::npos)
+ text = DependentString(text, 0, index);
+
+ // Remove whitelist marker
+ if (text.length() >= 2 && text[0] == '@' && text[1] == '@')
+ text = DependentString(text, 2);
+
+ text.toLower();
+ intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false);
+ auto match = text.match(mCandidatesReId, *keywords);
+ if (!match)
+ return result;
+
+ auto& candidates = keywords->candidates;
+
+ uint32_t resultCount = 0xffffff;
+ uint32_t resultLength = 0;
+ for (auto substr : candidates)
+ {
+ if (substr.empty())
+ continue;
+
+ auto candidate = DependentString(substr, 1);
+ auto entry = mFilterByKeyword.find(candidate);
+ auto count = entry ? entry->second.size() : 0;
+ if (count < resultCount ||
+ (count == resultCount && candidate.length() > resultLength))
+ {
+ result = candidate;
+ resultCount = count;
+ resultLength = candidate.length();
+ }
+ }
+ return result;
+}
+
+FilterPtr Matcher::CheckEntryMatch(const String& keyword,
+ const String& location,
+ int typeMask, DependentString& docDomain, bool thirdParty,
+ const String& sitekey, bool specificOnly) const
+{
+ auto entry = mFilterByKeyword.find(keyword);
+ if (!entry)
+ return FilterPtr();
+
+ auto filters = entry->second;
+ for (auto filter : filters)
+ {
+ auto activeFilter = static_cast<ActiveFilter*>(filter.get());
+ if (specificOnly && activeFilter->IsGeneric() &&
+ (activeFilter->mType != Filter::Type::WHITELIST))
+ continue;
+
+ auto reFilter = static_cast<RegExpFilter*>(activeFilter);
+ if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
+ return filter;
+ }
+
+ return FilterPtr();
+}
+
+const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000;
+
+CombinedMatcher::CombinedMatcher()
+ : mResultCache(1024), mMatchReId(-1)
+{
+ mMatchReId = GenerateRegExp(matchRegExp, true, true);
+}
+
+void CombinedMatcher::Add(Filter& filter)
+{
+ GetMatcher(filter).Add(filter);
+ ResetCache();
+}
+
+void CombinedMatcher::Remove(Filter& filter)
+{
+ GetMatcher(filter).Remove(filter);
+ ResetCache();
+}
+
+void CombinedMatcher::Clear()
+{
+ mBlacklist.Clear();
+ mWhitelist.Clear();
+ ResetCache();
+}
+
+bool CombinedMatcher::HasFilter(const Filter& filter) const
+{
+ return GetMatcher(filter).HasFilter(filter);
+}
+
+const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const
+{
+ return GetMatcher(filter).GetKeywordForFilter(filter);
+}
+
+Filter* CombinedMatcher::MatchesAny(const String& location,
+ int typeMask, DependentString& docDomain, bool thirdParty,
+ const String& sitekey, bool specificOnly)
+{
+ OwnedString key(location);
+ key.append(u" "_str);
+ key.append(typeMask);
+ key.append(u" "_str);
+ key.append(docDomain);
+ key.append(u" "_str);
+ key.append(thirdParty);
+ key.append(u" "_str);
+ key.append(sitekey);
+ key.append(u" "_str);
+ key.append(specificOnly);
+
+ FilterPtr result;
+
+ auto cachedResult = mResultCache.find(key);
+ if (cachedResult)
+ result = cachedResult->second.filter();
+ else
+ {
+ result = MatchesAnyInternal(location, typeMask, docDomain,
+ thirdParty, sitekey, specificOnly);
+
+ if (mResultCache.size() >= MAX_CACHE_ENTRIES)
+ ResetCache();
+
+ CacheEntry cache(std::move(key), result);
+ mResultCache[cache.key()] = cache;
+ }
+
+ return result.release();
+}
+
+OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const
+{
+ return GetMatcher(filter).FindKeyword(filter);
+}
+
+void CombinedMatcher::ResetCache()
+{
+ mResultCache.clear();
+}
+
+FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location,
+ int typeMask, DependentString& docDomain, bool thirdParty,
+ const String& sitekey, bool specificOnly) const
+{
+ OwnedString text(location);
+ text.toLower();
+ intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
+ text.match(mMatchReId, *reResult);
+
+ auto& candidates = reResult->candidates;
+ candidates.push_back(OwnedString());
+
+ FilterPtr blacklistHit;
+ for (auto substr : candidates)
+ {
+ auto result = mWhitelist.CheckEntryMatch(
+ substr, location, typeMask, docDomain, thirdParty, sitekey, false);
+ if (result)
+ return result;
+
+ if (!blacklistHit)
+ blacklistHit = mBlacklist.CheckEntryMatch(
+ substr, location, typeMask, docDomain, thirdParty, sitekey,
+ specificOnly);
+ }
+ return blacklistHit;
+}

Powered by Google App Engine
This is Rietveld