Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Some more cleanup Created Sept. 29, 2017, 4:12 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000;
23
24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter)
sergei 2017/10/02 12:02:35 It's not important but still it would be better to
sergei 2017/10/02 12:02:36 the method should be const.
hub 2017/10/03 19:33:13 Done.
hub 2017/10/03 19:33:13 Done.
25 {
26 if (filter->mType == Filter::Type::WHITELIST)
sergei 2017/10/02 12:02:33 What do you think about having an inline function
hub 2017/10/03 19:33:11 Done.
sergei 2017/10/04 08:54:32 I meant that the code of CombinedMatcher::SomeMeth
hub 2017/10/06 13:49:17 Done.
27 return mWhitelist.FindKeyword(filter);
28 return mBlacklist.FindKeyword(filter);
29 }
30
31 void CombinedMatcher::ResetCache()
32 {
33 mResultCache.clear();
34 }
35
36 void CombinedMatcher::Add(const FilterPtr& filter)
sergei 2017/10/02 12:02:36 Should the argument be `Filter&`?
hub 2017/10/03 19:33:11 Done.
37 {
38 if (filter->mType == Filter::Type::WHITELIST)
39 mWhitelist.Add(filter);
40 else
41 mBlacklist.Add(filter);
42
43 ResetCache();
44 }
45
46 void CombinedMatcher::Remove(const FilterPtr& filter)
sergei 2017/10/02 12:02:37 Should the argument be `const Filter&`?
hub 2017/10/03 19:33:09 Done.
47 {
48 if (filter->mType == Filter::Type::WHITELIST)
49 mWhitelist.Remove(filter);
50 else
51 mBlacklist.Remove(filter);
52
53 ResetCache();
54 }
55
56 void CombinedMatcher::Clear()
57 {
58 mBlacklist.Clear();
59 mWhitelist.Clear();
60 ResetCache();
61 }
62
63 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const
sergei 2017/10/02 12:02:34 Should the argument be `const Filter&`?
hub 2017/10/03 19:33:11 Done.
64 {
65 if (filter->mType == Filter::Type::WHITELIST)
66 return mWhitelist.HasFilter(filter);
67 return mBlacklist.HasFilter(filter);
68 }
69
70 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter)
sergei 2017/10/02 12:02:35 Should the argument be `const Filter&`?
sergei 2017/10/02 12:02:36 the method should be const.
hub 2017/10/03 19:33:11 Done.
hub 2017/10/03 19:33:12 Done.
71 {
72 if (filter->mType == Filter::Type::WHITELIST)
73 return mWhitelist.GetKeywordForFilter(filter);
74 return mBlacklist.GetKeywordForFilter(filter);
75 }
76
77 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location,
sergei 2017/10/02 12:02:34 the method should be const if it's possible.
hub 2017/10/03 19:33:10 Done.
78 int typeMask, DependentString& docDomain, bool thirdParty,
79 const String& sitekey, bool specificOnly)
80 {
81 ReMatchResults reResult;
82 OwnedString text(location);
83 text.toLower();
84 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
sergei 2017/10/02 12:02:36 It should be in anonymous namespace, otherwise a n
sergei 2017/10/04 08:54:31 This is not addressed.
hub 2017/10/06 13:49:17 Done.
85 text.match(match_re_id, &reResult);
sergei 2017/10/02 12:02:35 Although it seems it does work here, I think for p
hub 2017/10/03 19:33:12 Done.
86
87 auto& candidates = reResult.candidates;
88 candidates.push_back(OwnedString());
89
90 FilterPtr blacklistHit;
91 for (auto substr : candidates)
92 {
93 if (mWhitelist.mFilterByKeyword.find(substr))
sergei 2017/10/02 12:02:36 It's already changed in the master, do you mind to
hub 2017/10/03 19:33:13 Done.
94 {
95 auto result = mWhitelist.CheckEntryMatch(
96 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly );
97 if (result)
98 return result;
99 }
100 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit)
101 {
102 blacklistHit = mBlacklist.CheckEntryMatch(
103 substr, location, typeMask, docDomain, thirdParty, sitekey,
104 specificOnly);
105 }
106 }
107 return blacklistHit;
108 }
109
110 Filter* CombinedMatcher::MatchesAny(const String& location,
111 int typeMask, DependentString& docDomain, bool thirdParty,
112 const String& sitekey, bool specificOnly)
sergei 2017/10/02 12:02:34 The method should be const if it's possible.
hub 2017/10/03 19:33:11 sadly the use of the cache makes it non-const. I c
113 {
114 OwnedString key(location);
115 key.append(u" "_str);
116 key.append(typeMask);
117 key.append(u" "_str);
118 key.append(docDomain);
119 key.append(u" "_str);
120 key.append(thirdParty);
121 key.append(u" "_str);
122 key.append(sitekey);
123 key.append(u" "_str);
124 key.append(specificOnly);
125
126 FilterPtr result;
127
128 auto cachedResult = mResultCache.find(key);
129 if (cachedResult)
130 result = cachedResult->second;
131 else
132 {
133 result = MatchesAnyInternal(location, typeMask, docDomain,
134 thirdParty, sitekey, specificOnly);
135
136 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
137 ResetCache();
138
139 mResultCache[key] = result;
140 }
141
142 result->AddRef();
143 return result.get();
sergei 2017/10/02 12:02:34 It would be better to `return result.release();`.
hub 2017/10/03 19:33:10 Done.
144 }
145
146 namespace {
147 const DependentString regexpRegExp =
148 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr;
149 const DependentString optionsRegExp =
150 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str;
151 const DependentString candidateRegExp =
152 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str;
153 }
154
155 OwnedString Matcher::FindKeyword(const FilterPtr& filter)
sergei 2017/10/02 12:02:34 Should the argument be `const Filter&`?
sergei 2017/10/02 12:02:36 should it be a const method?
hub 2017/10/03 19:33:11 Done.
hub 2017/10/03 19:33:12 Done.
156 {
157 OwnedString result(u""_str);
158 OwnedString text(filter->GetText());
159 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false);
sergei 2017/10/02 12:02:37 It and all other regexps below should be in the an
hub 2017/10/03 19:33:14 The mistake here is that a create a new DependentS
sergei 2017/10/04 08:54:32 Each call of GenerateRegExp increases global _rege
hub 2017/10/06 13:49:16 Done.
160 if (TestRegExp(re_id, text))
161 return result;
162
163 // Remove options
164 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals e);
165 auto index = ExecRegExp(options_re_id, text);
166 if (index != -1)
sergei 2017/10/02 12:02:34 It would be better to use String::npos than -1.
hub 2017/10/03 19:33:13 Done.
167 text = text.substr(0, index);
168
169 // Remove whitelist marker
170 if (text[0] == '@' && text[1] == '@')
sergei 2017/10/02 12:02:37 Firstly we should check the length of the `text`.
hub 2017/10/03 19:33:11 Done.
171 text = text.substr(2);
172
173 text.toLower();
174 ReMatchResults keywords;
175 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true);
176 auto match = text.match(candidates_re_id, &keywords);
177 if (!match)
178 return result;
179
180 auto& candidates = keywords.candidates;
181
182 auto& hash = mFilterByKeyword;
183 uint32_t resultCount = 0xffffffff;
184 uint32_t resultLength = 0;
185 for (auto substr : candidates)
186 {
187 auto candidate = DependentString(substr).substr(1);
188 auto count = (hash.find(candidate) ? hash[candidate].size() : 0);
sergei 2017/10/02 12:02:35 Basically braces are not needed here.
sergei 2017/10/02 12:02:37 It seems it could be optimized by auto ii_hash = h
hub 2017/10/03 19:33:12 I have to do that for to make the function `const`
hub 2017/10/03 19:33:13 Done.
sergei 2017/10/04 08:54:32 It's just a side effect of the present code, there
hub 2017/10/06 13:49:16 I addressed that. Just as I said making this const
189 if (count < resultCount ||
190 (count == resultCount && candidate.length() > resultLength))
191 {
192 result = candidate;
193 resultCount = count;
194 resultLength = candidate.length();
195 }
196 }
197
198 return result;
199 }
200
201 void Matcher::Add(const FilterPtr& filter)
sergei 2017/10/02 12:02:36 What about passing `Filter&`?
hub 2017/10/03 19:33:10 Done.
202 {
203 if (mKeywordByFilter.find(filter->GetText()))
204 return;
205
206 auto keyword = FindKeyword(filter);
207 auto oldEntry = mFilterByKeyword.find(keyword);
208 if (!oldEntry)
209 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter};
210 else
211 mFilterByKeyword[keyword].push_back(filter);
sergei 2017/10/02 12:02:37 StringMap::operator[](const String& key) creates a
hub 2017/10/03 19:33:09 Done.
212 mKeywordByFilter[filter->GetText()] = keyword;
sergei 2017/10/02 12:02:34 mKeywordByFilter stores DependentString, what if t
sergei 2017/10/04 08:54:32 What about having some struct FilterKeyword { Fi
hub 2017/10/06 13:49:17 Sounds like a good idea. Done.
213 }
214
215 void Matcher::Remove(const FilterPtr& filter)
sergei 2017/10/02 12:02:37 It seems the argument can be a const reference.
hub 2017/10/03 19:33:09 Done.
216 {
217 if (!mKeywordByFilter.find(filter->GetText()))
218 return;
219
220 auto keyword = mKeywordByFilter[filter->GetText()];
sergei 2017/10/02 12:02:37 There is also no need for double looking up.
hub 2017/10/03 19:33:12 Done.
221 auto list = mFilterByKeyword[keyword];
222 if (list.size() == 1)
223 mFilterByKeyword.erase(keyword);
224 else
225 {
226 auto iter = std::find(list.cbegin(), list.cend(), filter);
227 list.erase(iter);
sergei 2017/10/02 12:02:35 It can be one line but it does not matter.
hub 2017/10/03 19:33:10 Done.
228 }
229 mKeywordByFilter.erase(filter->GetText());
230 }
231
232 void Matcher::Clear()
233 {
234 mFilterByKeyword.clear();
235 mKeywordByFilter.clear();
236 }
237
238 bool Matcher::HasFilter(const FilterPtr& filter) const
sergei 2017/10/02 12:02:35 the argument should be a const reference.
hub 2017/10/03 19:33:09 Done.
239 {
240 return mKeywordByFilter.find(filter->GetText());
241 }
242
243 static DependentString emptyString = u""_str;
sergei 2017/10/02 12:02:37 Although static in the compilation unit achieves t
hub 2017/10/03 19:33:10 Done.
244
245 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter)
sergei 2017/10/02 12:02:36 the argument should be a const reference and the m
hub 2017/10/03 19:33:12 Done.
246 {
247 if (mKeywordByFilter.find(filter->GetText()))
248 return mKeywordByFilter[filter->GetText()];
249 return emptyString;
sergei 2017/10/02 12:02:37 There is also no need for double looking up.
hub 2017/10/03 19:33:13 Done (needed for making the method `const`)
250 }
251
252 FilterPtr Matcher::CheckEntryMatch(const String& keyword,
253 const String& location,
254 int typeMask, DependentString& docDomain, bool thirdParty,
255 const String& sitekey, bool specificOnly)
sergei 2017/10/02 12:02:34 basically this method and the one below do not mod
hub 2017/10/03 19:33:10 Done.
256 {
257 auto list = mFilterByKeyword[keyword];
258 for (auto filter : list) {
259 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
sergei 2017/10/02 12:02:35 opening brace { should be on the new line.
hub 2017/10/03 19:33:14 Done.
260 if (specificOnly && activeFilter->IsGeneric() &&
261 !(activeFilter->mType != Filter::Type::WHITELIST))
262 continue;
263
264 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
265 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
266 return filter;
267 }
268 return FilterPtr();
269 }
270
271 Filter* Matcher::MatchesAny(const String& location,
272 int typeMask, DependentString& docDomain, bool thirdParty,
273 const String& sitekey, bool specificOnly)
274 {
275 ReMatchResults reResult;
276 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
277 OwnedString text(location);
278 text.toLower();
279 MatchRegExp(re_id, text, &reResult);
280 auto& candidates = reResult.candidates;
281 candidates.push_back(OwnedString());
282 for (auto substr : candidates)
283 if (mFilterByKeyword.find(substr))
284 {
285 auto result = CheckEntryMatch(substr, location, typeMask, docDomain,
286 thirdParty, sitekey, specificOnly);
287 if (result)
288 {
289 result->AddRef();
290 return result.get();
sergei 2017/10/02 12:02:36 just return `result.release();`
hub 2017/10/03 19:33:12 Done.
291 }
292 }
293
294 return nullptr;
295 }
OLDNEW

Powered by Google App Engine
This is Rietveld