| Index: compiled/matcher/Matcher.cpp |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/compiled/matcher/Matcher.cpp |
| @@ -0,0 +1,175 @@ |
| +/* |
| + * This file is part of Adblock Plus <https://adblockplus.org/>, |
| + * Copyright (C) 2006-present eyeo GmbH |
| + * |
| + * Adblock Plus is free software: you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 3 as |
| + * published by the Free Software Foundation. |
| + * |
| + * Adblock Plus is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + * You should have received a copy of the GNU General Public License |
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + */ |
| + |
| +#include "Matcher.h" |
| +#include "../filter/WhitelistFilter.h" |
| + |
| +class RollingHash |
| +{ |
| +private: |
| + static const String::size_type SUBSTRING_LENGTH = 8; |
| + static const uint32_t VALUE_SIZE = 32; |
| + typedef uint32_t result_type; |
| + String::size_type mCount; |
| + result_type mResult; |
| + String::value_type mCurrentChars[SUBSTRING_LENGTH]; |
| + |
| +private: |
| + result_type Rotate(result_type value, uint32_t bits) |
|
sergei
2017/10/18 10:58:43
should it be static?
|
| + { |
| + return value << bits | value >> (VALUE_SIZE - bits); |
|
sergei
2017/10/18 10:58:42
Mind adding static_assert(std::is_unsigned<result_
sergei
2017/10/18 10:58:42
I wonder whether our compilers are smart enough to
|
| + } |
| + |
| +public: |
| + RollingHash() |
| + : mCount(0), mResult(0) |
| + { |
| + } |
| + |
| + void Advance(String::value_type newChar) |
| + { |
| + auto pos = mCount++ % SUBSTRING_LENGTH; |
| + mResult = Rotate(mResult, 1) ^ newChar; |
| + if (mCount > SUBSTRING_LENGTH) |
|
sergei
2017/10/18 10:58:43
Do we really need this condition?
Can we rather se
|
| + mResult ^= Rotate(mCurrentChars[pos], SUBSTRING_LENGTH); |
|
Wladimir Palant
2017/10/17 12:51:22
Normally, one wouldn't use the char value directly
|
| + mCurrentChars[pos] = newChar; |
| + } |
| + |
| + int32_t PrefixLength() |
|
hub
2017/11/20 17:38:33
IMHO this should be const method.
|
| + { |
| + return static_cast<int32_t>(mCount) - SUBSTRING_LENGTH; |
| + } |
|
Wladimir Palant
2017/10/17 12:51:22
This goal of this method is to avoid exposing SUBS
sergei
2017/10/18 10:58:43
IMO not only the caller should do the calculation
|
| + |
| + result_type GetResult() |
|
hub
2017/11/20 17:38:33
and const method here too.
|
| + { |
| + return mResult & 0x7FFFFFFF; |
|
Wladimir Palant
2017/10/17 12:51:22
We cut off the top bit because key values 0xFFFFFF
sergei
2017/10/18 10:58:42
Maybe put a special const value masking the range
|
| + } |
| +}; |
| + |
| +Matcher* Matcher::Create() |
| +{ |
| + return new Matcher(); |
| +} |
| + |
| +void Matcher::Add(Filter& filter) |
| +{ |
| + RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); |
| + if (!regExpFilter) |
| + return; |
| + |
| + RollingHash hash; |
| + DependentString str(regExpFilter->GetRegExpSource()); |
| + int32_t minPrefixLength = 0; |
| + for (auto i = 0; i < str.length(); i++) |
|
sergei
2017/10/18 10:58:43
Here auto resolves into `int`, but should be Strin
|
| + { |
| + auto currentChar = str[i]; |
| + if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') |
| + minPrefixLength = i + 1; |
| + hash.Advance(currentChar); |
| + if (hash.PrefixLength() >= minPrefixLength) |
| + { |
| + auto candidate = hash.GetResult(); |
| + auto entry = mFilters.find(candidate); |
| + if (!entry) |
| + { |
| + entry.assign(candidate, RegExpFilterPtr(regExpFilter)); |
| + return; |
| + } |
| + } |
| + } |
| + |
| + mUnoptimizedFilters.push_back(RegExpFilterPtr(regExpFilter)); |
| +} |
| + |
| +void Matcher::Remove(const Filter& filter) |
| +{ |
| + const RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); |
| + if (!regExpFilter) |
| + return; |
| + |
| + RollingHash hash; |
| + DependentString str(regExpFilter->GetRegExpSource()); |
| + String::size_type minPrefixLength = 0; |
| + for (auto i = 0; i < str.length(); i++) |
| + { |
| + auto currentChar = str[i]; |
| + if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') |
|
sergei
2017/10/18 10:58:43
What about putting this condition into an inline f
|
| + minPrefixLength = i + 1; |
| + hash.Advance(currentChar); |
| + if (hash.PrefixLength() >= minPrefixLength) |
| + { |
| + auto candidate = hash.GetResult(); |
| + auto entry = mFilters.find(candidate); |
| + if (entry && entry->second == &filter) |
|
sergei
2017/10/18 10:58:43
The type of second is RegExpFilterPtr, so finally
|
| + { |
| + mFilters.erase(candidate); |
| + return; |
| + } |
| + } |
| + } |
| + |
| + for (auto ptr : mUnoptimizedFilters) |
| + if (ptr == &filter) |
| + ptr.reset(); |
| +} |
| + |
| +RegExpFilter* Matcher::MatchesAny(const String& location, int typeMask, |
| + DependentString& docDomain, bool thirdParty, const String& siteKey, |
| + bool specificOnly) |
| +{ |
| + RegExpFilterPtr result; |
| + |
| + RollingHash hash; |
| + for (auto i = 0; i < location.length(); i++) |
| + { |
| + hash.Advance(location[i]); |
| + if (hash.PrefixLength() >= 0) |
| + { |
| + auto entry = mFilters.find(hash.GetResult()); |
| + if (!entry) |
| + continue; |
| + |
| + if (specificOnly && !entry->second->As<WhitelistFilter>() && entry->second->IsGeneric()) |
| + continue; |
| + |
| + if (!entry->second->Matches(location, typeMask, docDomain, thirdParty, siteKey)) |
| + continue; |
| + |
| + result.reset(entry->second.get()); |
| + if (result->As<WhitelistFilter>()) |
| + return result.release(); |
| + } |
| + } |
| + |
| + for (auto ptr : mUnoptimizedFilters) |
| + { |
| + if (!ptr) |
| + continue; |
| + |
| + if (specificOnly && !ptr->As<WhitelistFilter>() && ptr->IsGeneric()) |
| + continue; |
| + |
| + if (!ptr->Matches(location, typeMask, docDomain, thirdParty, siteKey)) |
| + continue; |
| + |
| + result.reset(ptr.get()); |
| + if (result->As<WhitelistFilter>()) |
| + return result.release(); |
| + } |
| + |
| + return result.release(); |
| +} |