Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-present eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 #include "Matcher.h" | |
19 #include "RegExpFilter.h" | |
20 #include "../library.h" | |
21 | |
22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
23 | |
24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:35
It's not important but still it would be better to
sergei
2017/10/02 12:02:36
the method should be const.
hub
2017/10/03 19:33:13
Done.
hub
2017/10/03 19:33:13
Done.
| |
25 { | |
26 if (filter->mType == Filter::Type::WHITELIST) | |
sergei
2017/10/02 12:02:33
What do you think about having an inline function
hub
2017/10/03 19:33:11
Done.
sergei
2017/10/04 08:54:32
I meant that the code of CombinedMatcher::SomeMeth
hub
2017/10/06 13:49:17
Done.
| |
27 return mWhitelist.FindKeyword(filter); | |
28 return mBlacklist.FindKeyword(filter); | |
29 } | |
30 | |
31 void CombinedMatcher::ResetCache() | |
32 { | |
33 mResultCache.clear(); | |
34 } | |
35 | |
36 void CombinedMatcher::Add(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:36
Should the argument be `Filter&`?
hub
2017/10/03 19:33:11
Done.
| |
37 { | |
38 if (filter->mType == Filter::Type::WHITELIST) | |
39 mWhitelist.Add(filter); | |
40 else | |
41 mBlacklist.Add(filter); | |
42 | |
43 ResetCache(); | |
44 } | |
45 | |
46 void CombinedMatcher::Remove(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:37
Should the argument be `const Filter&`?
hub
2017/10/03 19:33:09
Done.
| |
47 { | |
48 if (filter->mType == Filter::Type::WHITELIST) | |
49 mWhitelist.Remove(filter); | |
50 else | |
51 mBlacklist.Remove(filter); | |
52 | |
53 ResetCache(); | |
54 } | |
55 | |
56 void CombinedMatcher::Clear() | |
57 { | |
58 mBlacklist.Clear(); | |
59 mWhitelist.Clear(); | |
60 ResetCache(); | |
61 } | |
62 | |
63 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const | |
sergei
2017/10/02 12:02:34
Should the argument be `const Filter&`?
hub
2017/10/03 19:33:11
Done.
| |
64 { | |
65 if (filter->mType == Filter::Type::WHITELIST) | |
66 return mWhitelist.HasFilter(filter); | |
67 return mBlacklist.HasFilter(filter); | |
68 } | |
69 | |
70 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:35
Should the argument be `const Filter&`?
sergei
2017/10/02 12:02:36
the method should be const.
hub
2017/10/03 19:33:11
Done.
hub
2017/10/03 19:33:12
Done.
| |
71 { | |
72 if (filter->mType == Filter::Type::WHITELIST) | |
73 return mWhitelist.GetKeywordForFilter(filter); | |
74 return mBlacklist.GetKeywordForFilter(filter); | |
75 } | |
76 | |
77 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
sergei
2017/10/02 12:02:34
the method should be const if it's possible.
hub
2017/10/03 19:33:10
Done.
| |
78 int typeMask, DependentString& docDomain, bool thirdParty, | |
79 const String& sitekey, bool specificOnly) | |
80 { | |
81 ReMatchResults reResult; | |
82 OwnedString text(location); | |
83 text.toLower(); | |
84 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
sergei
2017/10/02 12:02:36
It should be in anonymous namespace, otherwise a n
sergei
2017/10/04 08:54:31
This is not addressed.
hub
2017/10/06 13:49:17
Done.
| |
85 text.match(match_re_id, &reResult); | |
sergei
2017/10/02 12:02:35
Although it seems it does work here, I think for p
hub
2017/10/03 19:33:12
Done.
| |
86 | |
87 auto& candidates = reResult.candidates; | |
88 candidates.push_back(OwnedString()); | |
89 | |
90 FilterPtr blacklistHit; | |
91 for (auto substr : candidates) | |
92 { | |
93 if (mWhitelist.mFilterByKeyword.find(substr)) | |
sergei
2017/10/02 12:02:36
It's already changed in the master, do you mind to
hub
2017/10/03 19:33:13
Done.
| |
94 { | |
95 auto result = mWhitelist.CheckEntryMatch( | |
96 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly ); | |
97 if (result) | |
98 return result; | |
99 } | |
100 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) | |
101 { | |
102 blacklistHit = mBlacklist.CheckEntryMatch( | |
103 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
104 specificOnly); | |
105 } | |
106 } | |
107 return blacklistHit; | |
108 } | |
109 | |
110 Filter* CombinedMatcher::MatchesAny(const String& location, | |
111 int typeMask, DependentString& docDomain, bool thirdParty, | |
112 const String& sitekey, bool specificOnly) | |
sergei
2017/10/02 12:02:34
The method should be const if it's possible.
hub
2017/10/03 19:33:11
sadly the use of the cache makes it non-const. I c
| |
113 { | |
114 OwnedString key(location); | |
115 key.append(u" "_str); | |
116 key.append(typeMask); | |
117 key.append(u" "_str); | |
118 key.append(docDomain); | |
119 key.append(u" "_str); | |
120 key.append(thirdParty); | |
121 key.append(u" "_str); | |
122 key.append(sitekey); | |
123 key.append(u" "_str); | |
124 key.append(specificOnly); | |
125 | |
126 FilterPtr result; | |
127 | |
128 auto cachedResult = mResultCache.find(key); | |
129 if (cachedResult) | |
130 result = cachedResult->second; | |
131 else | |
132 { | |
133 result = MatchesAnyInternal(location, typeMask, docDomain, | |
134 thirdParty, sitekey, specificOnly); | |
135 | |
136 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
137 ResetCache(); | |
138 | |
139 mResultCache[key] = result; | |
140 } | |
141 | |
142 result->AddRef(); | |
143 return result.get(); | |
sergei
2017/10/02 12:02:34
It would be better to `return result.release();`.
hub
2017/10/03 19:33:10
Done.
| |
144 } | |
145 | |
146 namespace { | |
147 const DependentString regexpRegExp = | |
148 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | |
149 const DependentString optionsRegExp = | |
150 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | |
151 const DependentString candidateRegExp = | |
152 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | |
153 } | |
154 | |
155 OwnedString Matcher::FindKeyword(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:34
Should the argument be `const Filter&`?
sergei
2017/10/02 12:02:36
should it be a const method?
hub
2017/10/03 19:33:11
Done.
hub
2017/10/03 19:33:12
Done.
| |
156 { | |
157 OwnedString result(u""_str); | |
158 OwnedString text(filter->GetText()); | |
159 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false); | |
sergei
2017/10/02 12:02:37
It and all other regexps below should be in the an
hub
2017/10/03 19:33:14
The mistake here is that a create a new DependentS
sergei
2017/10/04 08:54:32
Each call of GenerateRegExp increases global _rege
hub
2017/10/06 13:49:16
Done.
| |
160 if (TestRegExp(re_id, text)) | |
161 return result; | |
162 | |
163 // Remove options | |
164 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals e); | |
165 auto index = ExecRegExp(options_re_id, text); | |
166 if (index != -1) | |
sergei
2017/10/02 12:02:34
It would be better to use String::npos than -1.
hub
2017/10/03 19:33:13
Done.
| |
167 text = text.substr(0, index); | |
168 | |
169 // Remove whitelist marker | |
170 if (text[0] == '@' && text[1] == '@') | |
sergei
2017/10/02 12:02:37
Firstly we should check the length of the `text`.
hub
2017/10/03 19:33:11
Done.
| |
171 text = text.substr(2); | |
172 | |
173 text.toLower(); | |
174 ReMatchResults keywords; | |
175 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); | |
176 auto match = text.match(candidates_re_id, &keywords); | |
177 if (!match) | |
178 return result; | |
179 | |
180 auto& candidates = keywords.candidates; | |
181 | |
182 auto& hash = mFilterByKeyword; | |
183 uint32_t resultCount = 0xffffffff; | |
184 uint32_t resultLength = 0; | |
185 for (auto substr : candidates) | |
186 { | |
187 auto candidate = DependentString(substr).substr(1); | |
188 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); | |
sergei
2017/10/02 12:02:35
Basically braces are not needed here.
sergei
2017/10/02 12:02:37
It seems it could be optimized by
auto ii_hash = h
hub
2017/10/03 19:33:12
I have to do that for to make the function `const`
hub
2017/10/03 19:33:13
Done.
sergei
2017/10/04 08:54:32
It's just a side effect of the present code, there
hub
2017/10/06 13:49:16
I addressed that. Just as I said making this const
| |
189 if (count < resultCount || | |
190 (count == resultCount && candidate.length() > resultLength)) | |
191 { | |
192 result = candidate; | |
193 resultCount = count; | |
194 resultLength = candidate.length(); | |
195 } | |
196 } | |
197 | |
198 return result; | |
199 } | |
200 | |
201 void Matcher::Add(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:36
What about passing `Filter&`?
hub
2017/10/03 19:33:10
Done.
| |
202 { | |
203 if (mKeywordByFilter.find(filter->GetText())) | |
204 return; | |
205 | |
206 auto keyword = FindKeyword(filter); | |
207 auto oldEntry = mFilterByKeyword.find(keyword); | |
208 if (!oldEntry) | |
209 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; | |
210 else | |
211 mFilterByKeyword[keyword].push_back(filter); | |
sergei
2017/10/02 12:02:37
StringMap::operator[](const String& key) creates a
hub
2017/10/03 19:33:09
Done.
| |
212 mKeywordByFilter[filter->GetText()] = keyword; | |
sergei
2017/10/02 12:02:34
mKeywordByFilter stores DependentString, what if t
sergei
2017/10/04 08:54:32
What about having some
struct FilterKeyword
{
Fi
hub
2017/10/06 13:49:17
Sounds like a good idea. Done.
| |
213 } | |
214 | |
215 void Matcher::Remove(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:37
It seems the argument can be a const reference.
hub
2017/10/03 19:33:09
Done.
| |
216 { | |
217 if (!mKeywordByFilter.find(filter->GetText())) | |
218 return; | |
219 | |
220 auto keyword = mKeywordByFilter[filter->GetText()]; | |
sergei
2017/10/02 12:02:37
There is also no need for double looking up.
hub
2017/10/03 19:33:12
Done.
| |
221 auto list = mFilterByKeyword[keyword]; | |
222 if (list.size() == 1) | |
223 mFilterByKeyword.erase(keyword); | |
224 else | |
225 { | |
226 auto iter = std::find(list.cbegin(), list.cend(), filter); | |
227 list.erase(iter); | |
sergei
2017/10/02 12:02:35
It can be one line but it does not matter.
hub
2017/10/03 19:33:10
Done.
| |
228 } | |
229 mKeywordByFilter.erase(filter->GetText()); | |
230 } | |
231 | |
232 void Matcher::Clear() | |
233 { | |
234 mFilterByKeyword.clear(); | |
235 mKeywordByFilter.clear(); | |
236 } | |
237 | |
238 bool Matcher::HasFilter(const FilterPtr& filter) const | |
sergei
2017/10/02 12:02:35
the argument should be a const reference.
hub
2017/10/03 19:33:09
Done.
| |
239 { | |
240 return mKeywordByFilter.find(filter->GetText()); | |
241 } | |
242 | |
243 static DependentString emptyString = u""_str; | |
sergei
2017/10/02 12:02:37
Although static in the compilation unit achieves t
hub
2017/10/03 19:33:10
Done.
| |
244 | |
245 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) | |
sergei
2017/10/02 12:02:36
the argument should be a const reference and the m
hub
2017/10/03 19:33:12
Done.
| |
246 { | |
247 if (mKeywordByFilter.find(filter->GetText())) | |
248 return mKeywordByFilter[filter->GetText()]; | |
249 return emptyString; | |
sergei
2017/10/02 12:02:37
There is also no need for double looking up.
hub
2017/10/03 19:33:13
Done (needed for making the method `const`)
| |
250 } | |
251 | |
252 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | |
253 const String& location, | |
254 int typeMask, DependentString& docDomain, bool thirdParty, | |
255 const String& sitekey, bool specificOnly) | |
sergei
2017/10/02 12:02:34
basically this method and the one below do not mod
hub
2017/10/03 19:33:10
Done.
| |
256 { | |
257 auto list = mFilterByKeyword[keyword]; | |
258 for (auto filter : list) { | |
259 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
sergei
2017/10/02 12:02:35
opening brace { should be on the new line.
hub
2017/10/03 19:33:14
Done.
| |
260 if (specificOnly && activeFilter->IsGeneric() && | |
261 !(activeFilter->mType != Filter::Type::WHITELIST)) | |
262 continue; | |
263 | |
264 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
265 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
266 return filter; | |
267 } | |
268 return FilterPtr(); | |
269 } | |
270 | |
271 Filter* Matcher::MatchesAny(const String& location, | |
272 int typeMask, DependentString& docDomain, bool thirdParty, | |
273 const String& sitekey, bool specificOnly) | |
274 { | |
275 ReMatchResults reResult; | |
276 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
277 OwnedString text(location); | |
278 text.toLower(); | |
279 MatchRegExp(re_id, text, &reResult); | |
280 auto& candidates = reResult.candidates; | |
281 candidates.push_back(OwnedString()); | |
282 for (auto substr : candidates) | |
283 if (mFilterByKeyword.find(substr)) | |
284 { | |
285 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, | |
286 thirdParty, sitekey, specificOnly); | |
287 if (result) | |
288 { | |
289 result->AddRef(); | |
290 return result.get(); | |
sergei
2017/10/02 12:02:36
just return `result.release();`
hub
2017/10/03 19:33:12
Done.
| |
291 } | |
292 } | |
293 | |
294 return nullptr; | |
295 } | |
OLD | NEW |