Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/matcher/Matcher.cpp

Issue 29581602: Issue 5141 - [emscripten] Convert filter matcher to C++ Base URL: https://hg.adblockplus.org/adblockpluscore
Patch Set: Created Oct. 17, 2017, 12:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "../filter/WhitelistFilter.h"
20
21 class RollingHash
22 {
23 private:
24 static const String::size_type SUBSTRING_LENGTH = 8;
25 static const uint32_t VALUE_SIZE = 32;
26 typedef uint32_t result_type;
27 String::size_type mCount;
28 result_type mResult;
29 String::value_type mCurrentChars[SUBSTRING_LENGTH];
30
31 private:
32 result_type Rotate(result_type value, uint32_t bits)
sergei 2017/10/18 10:58:43 should it be static?
33 {
34 return value << bits | value >> (VALUE_SIZE - bits);
sergei 2017/10/18 10:58:42 Mind adding static_assert(std::is_unsigned<result_
sergei 2017/10/18 10:58:42 I wonder whether our compilers are smart enough to
35 }
36
37 public:
38 RollingHash()
39 : mCount(0), mResult(0)
40 {
41 }
42
43 void Advance(String::value_type newChar)
44 {
45 auto pos = mCount++ % SUBSTRING_LENGTH;
46 mResult = Rotate(mResult, 1) ^ newChar;
47 if (mCount > SUBSTRING_LENGTH)
sergei 2017/10/18 10:58:43 Do we really need this condition? Can we rather se
48 mResult ^= Rotate(mCurrentChars[pos], SUBSTRING_LENGTH);
Wladimir Palant 2017/10/17 12:51:22 Normally, one wouldn't use the char value directly
49 mCurrentChars[pos] = newChar;
50 }
51
52 int32_t PrefixLength()
hub 2017/11/20 17:38:33 IMHO this should be const method.
53 {
54 return static_cast<int32_t>(mCount) - SUBSTRING_LENGTH;
55 }
Wladimir Palant 2017/10/17 12:51:22 This goal of this method is to avoid exposing SUBS
sergei 2017/10/18 10:58:43 IMO not only the caller should do the calculation
56
57 result_type GetResult()
hub 2017/11/20 17:38:33 and const method here too.
58 {
59 return mResult & 0x7FFFFFFF;
Wladimir Palant 2017/10/17 12:51:22 We cut off the top bit because key values 0xFFFFFF
sergei 2017/10/18 10:58:42 Maybe put a special const value masking the range
60 }
61 };
62
63 Matcher* Matcher::Create()
64 {
65 return new Matcher();
66 }
67
68 void Matcher::Add(Filter& filter)
69 {
70 RegExpFilter* regExpFilter = filter.As<RegExpFilter>();
71 if (!regExpFilter)
72 return;
73
74 RollingHash hash;
75 DependentString str(regExpFilter->GetRegExpSource());
76 int32_t minPrefixLength = 0;
77 for (auto i = 0; i < str.length(); i++)
sergei 2017/10/18 10:58:43 Here auto resolves into `int`, but should be Strin
78 {
79 auto currentChar = str[i];
80 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|')
81 minPrefixLength = i + 1;
82 hash.Advance(currentChar);
83 if (hash.PrefixLength() >= minPrefixLength)
84 {
85 auto candidate = hash.GetResult();
86 auto entry = mFilters.find(candidate);
87 if (!entry)
88 {
89 entry.assign(candidate, RegExpFilterPtr(regExpFilter));
90 return;
91 }
92 }
93 }
94
95 mUnoptimizedFilters.push_back(RegExpFilterPtr(regExpFilter));
96 }
97
98 void Matcher::Remove(const Filter& filter)
99 {
100 const RegExpFilter* regExpFilter = filter.As<RegExpFilter>();
101 if (!regExpFilter)
102 return;
103
104 RollingHash hash;
105 DependentString str(regExpFilter->GetRegExpSource());
106 String::size_type minPrefixLength = 0;
107 for (auto i = 0; i < str.length(); i++)
108 {
109 auto currentChar = str[i];
110 if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|')
sergei 2017/10/18 10:58:43 What about putting this condition into an inline f
111 minPrefixLength = i + 1;
112 hash.Advance(currentChar);
113 if (hash.PrefixLength() >= minPrefixLength)
114 {
115 auto candidate = hash.GetResult();
116 auto entry = mFilters.find(candidate);
117 if (entry && entry->second == &filter)
sergei 2017/10/18 10:58:43 The type of second is RegExpFilterPtr, so finally
118 {
119 mFilters.erase(candidate);
120 return;
121 }
122 }
123 }
124
125 for (auto ptr : mUnoptimizedFilters)
126 if (ptr == &filter)
127 ptr.reset();
128 }
129
130 RegExpFilter* Matcher::MatchesAny(const String& location, int typeMask,
131 DependentString& docDomain, bool thirdParty, const String& siteKey,
132 bool specificOnly)
133 {
134 RegExpFilterPtr result;
135
136 RollingHash hash;
137 for (auto i = 0; i < location.length(); i++)
138 {
139 hash.Advance(location[i]);
140 if (hash.PrefixLength() >= 0)
141 {
142 auto entry = mFilters.find(hash.GetResult());
143 if (!entry)
144 continue;
145
146 if (specificOnly && !entry->second->As<WhitelistFilter>() && entry->second ->IsGeneric())
147 continue;
148
149 if (!entry->second->Matches(location, typeMask, docDomain, thirdParty, sit eKey))
150 continue;
151
152 result.reset(entry->second.get());
153 if (result->As<WhitelistFilter>())
154 return result.release();
155 }
156 }
157
158 for (auto ptr : mUnoptimizedFilters)
159 {
160 if (!ptr)
161 continue;
162
163 if (specificOnly && !ptr->As<WhitelistFilter>() && ptr->IsGeneric())
164 continue;
165
166 if (!ptr->Matches(location, typeMask, docDomain, thirdParty, siteKey))
167 continue;
168
169 result.reset(ptr.get());
170 if (result->As<WhitelistFilter>())
171 return result.release();
172 }
173
174 return result.release();
175 }
OLDNEW

Powered by Google App Engine
This is Rietveld