Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Created Sept. 26, 2017, 9:34 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 class CombinedMatcher : public MatcherBase
23 {
24 private:
25 StringMap<Filter*> mResultCache;
hub 2017/09/26 21:49:00 I wanted to use FilterPtr in there, but it didn't
26 static const size_t MAX_CACHE_ENTRIES = 1000;
27 int mMatchReId;
28 public:
29 Matcher mBlacklist;
30 Matcher mWhitelist;
31
32 CombinedMatcher()
33 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true))
34 {
35 }
36
37 ~CombinedMatcher()
38 {
39 DeleteRegExp(mMatchReId);
40 }
41
42 void ResetCache()
43 {
44 for (auto filter : mResultCache)
45 filter.second->ReleaseRef();
hub 2017/09/26 21:49:00 See above: if we could have the FilterPtr as the d
46 mResultCache.clear();
47 }
48
49 void Add(const FilterPtr& filter) override
50 {
51 if (filter->mType == Filter::Type::WHITELIST)
52 mWhitelist.Add(filter);
53 else
54 mBlacklist.Add(filter);
55
56 ResetCache();
57 }
58
59 void Remove(const FilterPtr& filter) override
60 {
61 if (filter->mType == Filter::Type::WHITELIST)
62 mWhitelist.Remove(filter);
63 else
64 mBlacklist.Remove(filter);
65
66 ResetCache();
67 }
68
69 void Clear() override
70 {
71 mBlacklist.Clear();
72 mWhitelist.Clear();
73 ResetCache();
74 }
75
76 OwnedString FindKeyword(const FilterPtr& filter) override
77 {
78 if (filter->mType == Filter::Type::WHITELIST)
79 return mWhitelist.FindKeyword(filter);
80 return mBlacklist.FindKeyword(filter);
81 }
82
83 bool HasFilter(const FilterPtr& filter) const override
84 {
85 if (filter->mType == Filter::Type::WHITELIST)
86 return mWhitelist.HasFilter(filter);
87 return mBlacklist.HasFilter(filter);
88 }
89
90 const String& GetKeywordForFilter(const FilterPtr& filter) override
91 {
92 if (filter->mType == Filter::Type::WHITELIST)
93 return mWhitelist.GetKeywordForFilter(filter);
94 return mBlacklist.GetKeywordForFilter(filter);
95 }
96
97 Filter* MatchesAnyInternal(const String& location,
98 int typeMask, DependentString& docDomain, bool thirdParty,
99 const String& sitekey, bool specificOnly)
100 {
101 ReMatchResults reResult;
102 OwnedString text(location);
103 text.toLower();
104 text.match(mMatchReId, &reResult);
105
106 auto& candidates = reResult.candidates;
107 candidates.push_back(OwnedString());
108
109 Filter* blacklistHit = nullptr;
110 for (size_t i = 0, l = candidates.size(); i < l; i++)
111 {
112 auto substr = candidates[i];
113 if (mWhitelist.mFilterByKeyword.find(substr))
114 {
115 auto result = mWhitelist._CheckEntryMatch(
116 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOn ly);
117 if (result)
118 return result;
119 }
120 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit)
121 {
122 blacklistHit = mBlacklist._CheckEntryMatch(
123 substr, location, typeMask, docDomain, thirdParty, sitekey,
124 specificOnly);
125 }
126 }
127 return blacklistHit;
128 }
129
130 Filter* MatchesAny(const String& location,
131 int typeMask, DependentString& docDomain, bool thirdParty,
132 const String& sitekey, bool specificOnly) override
133 {
134 OwnedString key(location);
135 key.append(u" "_str);
136 key.append(typeMask);
137 key.append(u" "_str);
138 key.append(docDomain);
139 key.append(u" "_str);
140 key.append(thirdParty);
141 key.append(u" "_str);
142 key.append(sitekey);
143 key.append(u" "_str);
144 key.append(specificOnly);
145
146 auto cachedResult = mResultCache.find(key);
147 if (cachedResult)
148 {
149 cachedResult->second->AddRef();
150 return cachedResult->second;
151 }
152
153 Filter* result = MatchesAnyInternal(location, typeMask, docDomain,
154 thirdParty, sitekey, specificOnly);
155
156 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
157 ResetCache();
158
159 result->AddRef();
160 mResultCache[key] = result;
161
162 result->AddRef();
163 return result;
164 }
165 };
166
167 MatcherBase* MatcherBase::mInstance = new CombinedMatcher;
168
169 Matcher::Matcher()
170 : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, fals e))
171 , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, fa lse))
172 , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_st r, true, true))
173 {
174 }
175
176 Matcher::~Matcher()
177 {
178 DeleteRegExp(mFilterReId);
179 DeleteRegExp(mOptionsReId);
180 DeleteRegExp(mCandidatesReId);
181 }
182
183 OwnedString Matcher::FindKeyword(const FilterPtr& filter)
184 {
185 OwnedString result(u""_str);
186 OwnedString text(filter->GetText());
187 if (TestRegExp(mFilterReId, text))
188 return result;
189
190 // Remove options
191 auto index = ExecRegExp(mOptionsReId, text);
192 if (index != -1)
193 text = text.substr(0, index);
194
195 // Remove whitelist marker
196 if (text[0] == '@' && text[1] == '@')
197 text = text.substr(2);
198
199 text.toLower();
200 ReMatchResults keywords;
201 auto match = text.match(mCandidatesReId, &keywords);
202 if (!match)
203 return result;
204
205 auto& candidates = keywords.candidates;
206
207 auto& hash = mFilterByKeyword;
208 uint32_t resultCount = 0xffffffff;
209 uint32_t resultLength = 0;
210 for (uint32_t i = 0, l = candidates.size(); i < l; i++)
211 {
212 auto candidate = DependentString(candidates[i]).substr(1);
213 auto count = (hash.find(candidate) ? hash[candidate].size() : 0);
214 if (count < resultCount ||
215 (count == resultCount && candidate.length() > resultLength))
216 {
217 result = candidate;
218 resultCount = count;
219 resultLength = candidate.length();
220 }
221 }
222
223 return result;
224 }
225
226 void Matcher::Add(const FilterPtr& filter)
227 {
228 if (mKeywordByFilter.find(filter->GetText()))
229 return;
230
231 auto keyword = FindKeyword(filter);
232 auto oldEntry = mFilterByKeyword.find(keyword);
233 if (!oldEntry)
234 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter};
235 else
236 mFilterByKeyword[keyword].push_back(filter);
237 mKeywordByFilter[filter->GetText()] = keyword;
238 }
239
240 void Matcher::Remove(const FilterPtr& filter)
241 {
242 if (!mKeywordByFilter.find(filter->GetText()))
243 return;
244
245 auto keyword = mKeywordByFilter[filter->GetText()];
246 auto list = mFilterByKeyword[keyword];
247 if (list.size() == 1)
248 mFilterByKeyword.erase(keyword);
249 else
250 {
251 auto iter = std::find(list.cbegin(), list.cend(), filter);
252 list.erase(iter);
253 }
254 mKeywordByFilter.erase(filter->GetText());
255 }
256
257 void Matcher::Clear()
258 {
259 mFilterByKeyword.clear();
260 mKeywordByFilter.clear();
261 }
262
263 bool Matcher::HasFilter(const FilterPtr& filter) const
264 {
265 return mKeywordByFilter.find(filter->GetText());
266 }
267
268 static DependentString emptyString = u""_str;
269
270 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter)
271 {
272 if (mKeywordByFilter.find(filter->GetText()))
273 return mKeywordByFilter[filter->GetText()];
274 return emptyString;
275 }
276
277 Filter* Matcher::_CheckEntryMatch(const String& keyword,
278 const String& location,
279 int typeMask, DependentString& docDomain, bool thirdParty,
280 const String& sitekey, bool specificOnly)
281 {
282 auto list = mFilterByKeyword[keyword];
283 for (auto filter : list) {
284 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
hub 2017/09/26 21:49:00 This is done without checking. And it is ugly. And
sergei 2017/10/02 12:02:33 Although we don't pass other filters here, what do
285 if (specificOnly && activeFilter->IsGeneric() &&
286 !(activeFilter->mType != Filter::Type::WHITELIST))
287 continue;
288 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
hub 2017/09/26 21:49:00 SImilarly as above: this is unchecked.
289 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
290 {
291 return filter.get();
292 }
293 }
294 return nullptr;
295 }
296
297 Filter* Matcher::MatchesAny(const String& location,
298 int typeMask, DependentString& docDomain, bool thirdParty,
299 const String& sitekey, bool specificOnly)
300 {
301 ReMatchResults reResult;
302 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
303 OwnedString text(location);
304 text.toLower();
305 MatchRegExp(re_id, text, &reResult);
306 auto& candidates = reResult.candidates;
307 candidates.push_back(OwnedString());
308 for (size_t i = 0, l = candidates.size(); i < l; i++)
309 {
310 auto substr = candidates[i];
311 if (mFilterByKeyword.find(substr))
312 {
313 auto result = _CheckEntryMatch(substr, location, typeMask, docDomain,
314 thirdParty, sitekey, specificOnly);
315 if (result)
316 {
317 result->AddRef();
318 return result;
319 }
320 }
321 }
322
323 return nullptr;
324 }
OLDNEW

Powered by Google App Engine
This is Rietveld