Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-present eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 #include "Matcher.h" | |
19 #include "../filter/WhitelistFilter.h" | |
20 | |
21 class RollingHash | |
22 { | |
23 private: | |
24 static const String::size_type SUBSTRING_LENGTH = 8; | |
25 static const uint32_t VALUE_SIZE = 32; | |
26 typedef uint32_t result_type; | |
27 String::size_type mCount; | |
28 result_type mResult; | |
29 String::value_type mCurrentChars[SUBSTRING_LENGTH]; | |
30 | |
31 private: | |
32 result_type Rotate(result_type value, uint32_t bits) | |
sergei
2017/10/18 10:58:43
should it be static?
| |
33 { | |
34 return value << bits | value >> (VALUE_SIZE - bits); | |
sergei
2017/10/18 10:58:42
Mind adding static_assert(std::is_unsigned<result_
sergei
2017/10/18 10:58:42
I wonder whether our compilers are smart enough to
| |
35 } | |
36 | |
37 public: | |
38 RollingHash() | |
39 : mCount(0), mResult(0) | |
40 { | |
41 } | |
42 | |
43 void Advance(String::value_type newChar) | |
44 { | |
45 auto pos = mCount++ % SUBSTRING_LENGTH; | |
46 mResult = Rotate(mResult, 1) ^ newChar; | |
47 if (mCount > SUBSTRING_LENGTH) | |
sergei
2017/10/18 10:58:43
Do we really need this condition?
Can we rather se
| |
48 mResult ^= Rotate(mCurrentChars[pos], SUBSTRING_LENGTH); | |
Wladimir Palant
2017/10/17 12:51:22
Normally, one wouldn't use the char value directly
| |
49 mCurrentChars[pos] = newChar; | |
50 } | |
51 | |
52 int32_t PrefixLength() | |
hub
2017/11/20 17:38:33
IMHO this should be const method.
| |
53 { | |
54 return static_cast<int32_t>(mCount) - SUBSTRING_LENGTH; | |
55 } | |
Wladimir Palant
2017/10/17 12:51:22
This goal of this method is to avoid exposing SUBS
sergei
2017/10/18 10:58:43
IMO not only the caller should do the calculation
| |
56 | |
57 result_type GetResult() | |
hub
2017/11/20 17:38:33
and const method here too.
| |
58 { | |
59 return mResult & 0x7FFFFFFF; | |
Wladimir Palant
2017/10/17 12:51:22
We cut off the top bit because key values 0xFFFFFF
sergei
2017/10/18 10:58:42
Maybe put a special const value masking the range
| |
60 } | |
61 }; | |
62 | |
63 Matcher* Matcher::Create() | |
64 { | |
65 return new Matcher(); | |
66 } | |
67 | |
68 void Matcher::Add(Filter& filter) | |
69 { | |
70 RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); | |
71 if (!regExpFilter) | |
72 return; | |
73 | |
74 RollingHash hash; | |
75 DependentString str(regExpFilter->GetRegExpSource()); | |
76 int32_t minPrefixLength = 0; | |
77 for (auto i = 0; i < str.length(); i++) | |
sergei
2017/10/18 10:58:43
Here auto resolves into `int`, but should be Strin
| |
78 { | |
79 auto currentChar = str[i]; | |
80 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') | |
81 minPrefixLength = i + 1; | |
82 hash.Advance(currentChar); | |
83 if (hash.PrefixLength() >= minPrefixLength) | |
84 { | |
85 auto candidate = hash.GetResult(); | |
86 auto entry = mFilters.find(candidate); | |
87 if (!entry) | |
88 { | |
89 entry.assign(candidate, RegExpFilterPtr(regExpFilter)); | |
90 return; | |
91 } | |
92 } | |
93 } | |
94 | |
95 mUnoptimizedFilters.push_back(RegExpFilterPtr(regExpFilter)); | |
96 } | |
97 | |
98 void Matcher::Remove(const Filter& filter) | |
99 { | |
100 const RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); | |
101 if (!regExpFilter) | |
102 return; | |
103 | |
104 RollingHash hash; | |
105 DependentString str(regExpFilter->GetRegExpSource()); | |
106 String::size_type minPrefixLength = 0; | |
107 for (auto i = 0; i < str.length(); i++) | |
108 { | |
109 auto currentChar = str[i]; | |
110 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') | |
sergei
2017/10/18 10:58:43
What about putting this condition into an inline f
| |
111 minPrefixLength = i + 1; | |
112 hash.Advance(currentChar); | |
113 if (hash.PrefixLength() >= minPrefixLength) | |
114 { | |
115 auto candidate = hash.GetResult(); | |
116 auto entry = mFilters.find(candidate); | |
117 if (entry && entry->second == &filter) | |
sergei
2017/10/18 10:58:43
The type of second is RegExpFilterPtr, so finally
| |
118 { | |
119 mFilters.erase(candidate); | |
120 return; | |
121 } | |
122 } | |
123 } | |
124 | |
125 for (auto ptr : mUnoptimizedFilters) | |
126 if (ptr == &filter) | |
127 ptr.reset(); | |
128 } | |
129 | |
130 RegExpFilter* Matcher::MatchesAny(const String& location, int typeMask, | |
131 DependentString& docDomain, bool thirdParty, const String& siteKey, | |
132 bool specificOnly) | |
133 { | |
134 RegExpFilterPtr result; | |
135 | |
136 RollingHash hash; | |
137 for (auto i = 0; i < location.length(); i++) | |
138 { | |
139 hash.Advance(location[i]); | |
140 if (hash.PrefixLength() >= 0) | |
141 { | |
142 auto entry = mFilters.find(hash.GetResult()); | |
143 if (!entry) | |
144 continue; | |
145 | |
146 if (specificOnly && !entry->second->As<WhitelistFilter>() && entry->second ->IsGeneric()) | |
147 continue; | |
148 | |
149 if (!entry->second->Matches(location, typeMask, docDomain, thirdParty, sit eKey)) | |
150 continue; | |
151 | |
152 result.reset(entry->second.get()); | |
153 if (result->As<WhitelistFilter>()) | |
154 return result.release(); | |
155 } | |
156 } | |
157 | |
158 for (auto ptr : mUnoptimizedFilters) | |
159 { | |
160 if (!ptr) | |
161 continue; | |
162 | |
163 if (specificOnly && !ptr->As<WhitelistFilter>() && ptr->IsGeneric()) | |
164 continue; | |
165 | |
166 if (!ptr->Matches(location, typeMask, docDomain, thirdParty, siteKey)) | |
167 continue; | |
168 | |
169 result.reset(ptr.get()); | |
170 if (result->As<WhitelistFilter>()) | |
171 return result.release(); | |
172 } | |
173 | |
174 return result.release(); | |
175 } | |
OLD | NEW |