Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Fixed many issues. One test left out. Created Oct. 6, 2017, 1:45 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 namespace {
23 const DependentString regexpRegExp =
24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr;
25 const DependentString optionsRegExp =
26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str;
27 const DependentString candidateRegExp =
28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str;
29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str;
30 }
31
32 Matcher::Matcher()
33 : mFilterByKeyword(1024), mKeywordByFilter(1024),
34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1)
35 {
36 mReId = GenerateRegExp(regexpRegExp, true, false);
37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false);
38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true);
39 mMatchReId = GenerateRegExp(matchRegExp, true, true);
40 }
41
42 void Matcher::Add(Filter& filter)
43 {
44 if (mKeywordByFilter.find(filter.GetText()))
45 return;
46
47 auto keyword = FindKeyword(filter);
48
49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter));
sergei 2017/10/11 09:55:16 Although the review is already closed I think it's
50 mKeywordByFilter[filter.GetText()] =
51 FilterKeyword(std::move(keyword), filter);
52 }
53
54 void Matcher::Remove(Filter& filter)
55 {
56 auto entry = mKeywordByFilter.find(filter.GetText());
57 if (!entry)
58 return;
59
60 auto& keyword = static_cast<const String&>(entry->second);
61 auto list = mFilterByKeyword[keyword];
62 if (list.size() == 1)
63 mFilterByKeyword.erase(keyword);
64 else
65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter)));
66
67 mKeywordByFilter.erase(filter.GetText());
68 }
69
70 void Matcher::Clear()
71 {
72 mFilterByKeyword.clear();
73 mKeywordByFilter.clear();
74 }
75
76 bool Matcher::HasFilter(const Filter& filter) const
77 {
78 return mKeywordByFilter.find(filter.GetText());
79 }
80
81 namespace
82 {
83 DependentString emptyString = u""_str;
84 }
85
86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const
87 {
88 auto entry = mKeywordByFilter.find(filter.GetText());
89 if (entry)
90 return static_cast<const String&>(entry->second);
91 return emptyString;
92 }
93
94 Filter* Matcher::MatchesAny(const String& location,
95 int typeMask, DependentString& docDomain, bool thirdParty,
96 const String& sitekey, bool specificOnly) const
97 {
98 OwnedString text(location);
99 text.toLower();
100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
101 if (text.match(mMatchReId, *reResult))
102 {
103 auto& candidates = reResult->candidates;
104 candidates.push_back(OwnedString());
105 for (auto candidate : candidates)
106 {
107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain,
108 thirdParty, sitekey, specificOnly);
109 if (result)
110 return result.release();
111 }
112 }
Wladimir Palant 2017/10/09 08:39:47 As mentioned in the issue description, we should n
sergei 2017/10/09 15:27:53 Although it merely converts the existing JS code a
Wladimir Palant 2017/10/10 07:39:05 I strongly disagree. Landing crappy code is always
113 return nullptr;
114 }
115
116 OwnedString Matcher::FindKeyword(const Filter& filter) const
117 {
118 OwnedString result;
119 OwnedString text(filter.GetText());
120 if (TestRegExp(mReId, text))
121 return result;
122
123 // Remove options
124 auto index = ExecRegExp(mOptionsReId, text);
125 if (index != String::npos)
126 text = DependentString(text, 0, index);
127
128 // Remove whitelist marker
129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@')
130 text = DependentString(text, 2);
131
132 text.toLower();
133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false);
134 auto match = text.match(mCandidatesReId, *keywords);
135 if (!match)
136 return result;
137
138 auto& candidates = keywords->candidates;
139
140 uint32_t resultCount = 0xffffff;
141 uint32_t resultLength = 0;
142 for (auto substr : candidates)
143 {
144 if (substr.empty())
145 continue;
146
147 auto candidate = DependentString(substr, 1);
148 auto entry = mFilterByKeyword.find(candidate);
149 auto count = entry ? entry->second.size() : 0;
150 if (count < resultCount ||
151 (count == resultCount && candidate.length() > resultLength))
152 {
153 result = candidate;
154 resultCount = count;
155 resultLength = candidate.length();
156 }
157 }
158 return result;
159 }
160
161 FilterPtr Matcher::CheckEntryMatch(const String& keyword,
162 const String& location,
163 int typeMask, DependentString& docDomain, bool thirdParty,
164 const String& sitekey, bool specificOnly) const
165 {
166 auto entry = mFilterByKeyword.find(keyword);
167 if (!entry)
168 return FilterPtr();
169
170 auto filters = entry->second;
171 for (auto filter : filters)
172 {
173 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
174 if (specificOnly && activeFilter->IsGeneric() &&
175 (activeFilter->mType != Filter::Type::WHITELIST))
176 continue;
177
178 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
180 return filter;
181 }
182
183 return FilterPtr();
184 }
185
186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000;
187
188 CombinedMatcher::CombinedMatcher()
189 : mResultCache(1024), mMatchReId(-1)
190 {
191 mMatchReId = GenerateRegExp(matchRegExp, true, true);
192 }
193
194 void CombinedMatcher::Add(Filter& filter)
195 {
196 GetMatcher(filter).Add(filter);
197 ResetCache();
198 }
199
200 void CombinedMatcher::Remove(Filter& filter)
201 {
202 GetMatcher(filter).Remove(filter);
203 ResetCache();
204 }
205
206 void CombinedMatcher::Clear()
207 {
208 mBlacklist.Clear();
209 mWhitelist.Clear();
210 ResetCache();
211 }
212
213 bool CombinedMatcher::HasFilter(const Filter& filter) const
214 {
215 return GetMatcher(filter).HasFilter(filter);
216 }
217
218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const
219 {
220 return GetMatcher(filter).GetKeywordForFilter(filter);
221 }
222
223 Filter* CombinedMatcher::MatchesAny(const String& location,
224 int typeMask, DependentString& docDomain, bool thirdParty,
225 const String& sitekey, bool specificOnly)
226 {
227 OwnedString key(location);
228 key.append(u" "_str);
229 key.append(typeMask);
230 key.append(u" "_str);
231 key.append(docDomain);
232 key.append(u" "_str);
233 key.append(thirdParty);
234 key.append(u" "_str);
235 key.append(sitekey);
236 key.append(u" "_str);
237 key.append(specificOnly);
238
239 FilterPtr result;
240
241 auto cachedResult = mResultCache.find(key);
242 if (cachedResult)
243 result = cachedResult->second.filter();
244 else
245 {
246 result = MatchesAnyInternal(location, typeMask, docDomain,
247 thirdParty, sitekey, specificOnly);
248
249 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
250 ResetCache();
251
252 CacheEntry cache(std::move(key), result);
253 mResultCache[cache.key()] = cache;
254 }
255
256 return result.release();
257 }
258
259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const
260 {
261 return GetMatcher(filter).FindKeyword(filter);
262 }
263
264 void CombinedMatcher::ResetCache()
265 {
266 mResultCache.clear();
267 }
268
269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location,
270 int typeMask, DependentString& docDomain, bool thirdParty,
271 const String& sitekey, bool specificOnly) const
272 {
273 OwnedString text(location);
274 text.toLower();
275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
276 text.match(mMatchReId, *reResult);
277
278 auto& candidates = reResult->candidates;
279 candidates.push_back(OwnedString());
280
281 FilterPtr blacklistHit;
282 for (auto substr : candidates)
283 {
284 auto result = mWhitelist.CheckEntryMatch(
285 substr, location, typeMask, docDomain, thirdParty, sitekey, false);
286 if (result)
287 return result;
288
289 if (!blacklistHit)
290 blacklistHit = mBlacklist.CheckEntryMatch(
291 substr, location, typeMask, docDomain, thirdParty, sitekey,
292 specificOnly);
293 }
294 return blacklistHit;
295 }
OLDNEW

Powered by Google App Engine
This is Rietveld