| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
| 3 * Copyright (C) 2006-present eyeo GmbH | |
| 4 * | |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License version 3 as | |
| 7 * published by the Free Software Foundation. | |
| 8 * | |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 * | |
| 14 * You should have received a copy of the GNU General Public License | |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 16 */ | |
| 17 | |
| 18 #include "Matcher.h" | |
| 19 #include "../filter/WhitelistFilter.h" | |
| 20 | |
| 21 class RollingHash | |
| 22 { | |
| 23 private: | |
| 24 static const String::size_type SUBSTRING_LENGTH = 8; | |
| 25 static const uint32_t VALUE_SIZE = 32; | |
| 26 typedef uint32_t result_type; | |
| 27 String::size_type mCount; | |
| 28 result_type mResult; | |
| 29 String::value_type mCurrentChars[SUBSTRING_LENGTH]; | |
| 30 | |
| 31 private: | |
| 32 result_type Rotate(result_type value, uint32_t bits) | |
|
sergei
2017/10/18 10:58:43
should it be static?
| |
| 33 { | |
| 34 return value << bits | value >> (VALUE_SIZE - bits); | |
|
sergei
2017/10/18 10:58:42
Mind adding static_assert(std::is_unsigned<result_
sergei
2017/10/18 10:58:42
I wonder whether our compilers are smart enough to
| |
| 35 } | |
| 36 | |
| 37 public: | |
| 38 RollingHash() | |
| 39 : mCount(0), mResult(0) | |
| 40 { | |
| 41 } | |
| 42 | |
| 43 void Advance(String::value_type newChar) | |
| 44 { | |
| 45 auto pos = mCount++ % SUBSTRING_LENGTH; | |
| 46 mResult = Rotate(mResult, 1) ^ newChar; | |
| 47 if (mCount > SUBSTRING_LENGTH) | |
|
sergei
2017/10/18 10:58:43
Do we really need this condition?
Can we rather se
| |
| 48 mResult ^= Rotate(mCurrentChars[pos], SUBSTRING_LENGTH); | |
|
Wladimir Palant
2017/10/17 12:51:22
Normally, one wouldn't use the char value directly
| |
| 49 mCurrentChars[pos] = newChar; | |
| 50 } | |
| 51 | |
| 52 int32_t PrefixLength() | |
|
hub
2017/11/20 17:38:33
IMHO this should be const method.
| |
| 53 { | |
| 54 return static_cast<int32_t>(mCount) - SUBSTRING_LENGTH; | |
| 55 } | |
|
Wladimir Palant
2017/10/17 12:51:22
This goal of this method is to avoid exposing SUBS
sergei
2017/10/18 10:58:43
IMO not only the caller should do the calculation
| |
| 56 | |
| 57 result_type GetResult() | |
|
hub
2017/11/20 17:38:33
and const method here too.
| |
| 58 { | |
| 59 return mResult & 0x7FFFFFFF; | |
|
Wladimir Palant
2017/10/17 12:51:22
We cut off the top bit because key values 0xFFFFFF
sergei
2017/10/18 10:58:42
Maybe put a special const value masking the range
| |
| 60 } | |
| 61 }; | |
| 62 | |
| 63 Matcher* Matcher::Create() | |
| 64 { | |
| 65 return new Matcher(); | |
| 66 } | |
| 67 | |
| 68 void Matcher::Add(Filter& filter) | |
| 69 { | |
| 70 RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); | |
| 71 if (!regExpFilter) | |
| 72 return; | |
| 73 | |
| 74 RollingHash hash; | |
| 75 DependentString str(regExpFilter->GetRegExpSource()); | |
| 76 int32_t minPrefixLength = 0; | |
| 77 for (auto i = 0; i < str.length(); i++) | |
|
sergei
2017/10/18 10:58:43
Here auto resolves into `int`, but should be Strin
| |
| 78 { | |
| 79 auto currentChar = str[i]; | |
| 80 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') | |
| 81 minPrefixLength = i + 1; | |
| 82 hash.Advance(currentChar); | |
| 83 if (hash.PrefixLength() >= minPrefixLength) | |
| 84 { | |
| 85 auto candidate = hash.GetResult(); | |
| 86 auto entry = mFilters.find(candidate); | |
| 87 if (!entry) | |
| 88 { | |
| 89 entry.assign(candidate, RegExpFilterPtr(regExpFilter)); | |
| 90 return; | |
| 91 } | |
| 92 } | |
| 93 } | |
| 94 | |
| 95 mUnoptimizedFilters.push_back(RegExpFilterPtr(regExpFilter)); | |
| 96 } | |
| 97 | |
| 98 void Matcher::Remove(const Filter& filter) | |
| 99 { | |
| 100 const RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); | |
| 101 if (!regExpFilter) | |
| 102 return; | |
| 103 | |
| 104 RollingHash hash; | |
| 105 DependentString str(regExpFilter->GetRegExpSource()); | |
| 106 String::size_type minPrefixLength = 0; | |
| 107 for (auto i = 0; i < str.length(); i++) | |
| 108 { | |
| 109 auto currentChar = str[i]; | |
| 110 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') | |
|
sergei
2017/10/18 10:58:43
What about putting this condition into an inline f
| |
| 111 minPrefixLength = i + 1; | |
| 112 hash.Advance(currentChar); | |
| 113 if (hash.PrefixLength() >= minPrefixLength) | |
| 114 { | |
| 115 auto candidate = hash.GetResult(); | |
| 116 auto entry = mFilters.find(candidate); | |
| 117 if (entry && entry->second == &filter) | |
|
sergei
2017/10/18 10:58:43
The type of second is RegExpFilterPtr, so finally
| |
| 118 { | |
| 119 mFilters.erase(candidate); | |
| 120 return; | |
| 121 } | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 for (auto ptr : mUnoptimizedFilters) | |
| 126 if (ptr == &filter) | |
| 127 ptr.reset(); | |
| 128 } | |
| 129 | |
| 130 RegExpFilter* Matcher::MatchesAny(const String& location, int typeMask, | |
| 131 DependentString& docDomain, bool thirdParty, const String& siteKey, | |
| 132 bool specificOnly) | |
| 133 { | |
| 134 RegExpFilterPtr result; | |
| 135 | |
| 136 RollingHash hash; | |
| 137 for (auto i = 0; i < location.length(); i++) | |
| 138 { | |
| 139 hash.Advance(location[i]); | |
| 140 if (hash.PrefixLength() >= 0) | |
| 141 { | |
| 142 auto entry = mFilters.find(hash.GetResult()); | |
| 143 if (!entry) | |
| 144 continue; | |
| 145 | |
| 146 if (specificOnly && !entry->second->As<WhitelistFilter>() && entry->second ->IsGeneric()) | |
| 147 continue; | |
| 148 | |
| 149 if (!entry->second->Matches(location, typeMask, docDomain, thirdParty, sit eKey)) | |
| 150 continue; | |
| 151 | |
| 152 result.reset(entry->second.get()); | |
| 153 if (result->As<WhitelistFilter>()) | |
| 154 return result.release(); | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 for (auto ptr : mUnoptimizedFilters) | |
| 159 { | |
| 160 if (!ptr) | |
| 161 continue; | |
| 162 | |
| 163 if (specificOnly && !ptr->As<WhitelistFilter>() && ptr->IsGeneric()) | |
| 164 continue; | |
| 165 | |
| 166 if (!ptr->Matches(location, typeMask, docDomain, thirdParty, siteKey)) | |
| 167 continue; | |
| 168 | |
| 169 result.reset(ptr.get()); | |
| 170 if (result->As<WhitelistFilter>()) | |
| 171 return result.release(); | |
| 172 } | |
| 173 | |
| 174 return result.release(); | |
| 175 } | |
| OLD | NEW |