Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Cleanup. Fixed the bindings to export what we actually need. Created Sept. 27, 2017, 3:27 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/filter/Matcher.h ('k') | compiled/filter/RegExpFilter.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 class CombinedMatcher : public MatcherBase
23 {
24 private:
25 StringMap<Filter*> mResultCache;
26 static const size_t MAX_CACHE_ENTRIES = 1000;
27 int mMatchReId;
28 Matcher mBlacklist;
29 Matcher mWhitelist;
30
31 protected:
32 OwnedString FindKeyword(const FilterPtr& filter) override
33 {
34 if (filter->mType == Filter::Type::WHITELIST)
35 return mWhitelist.FindKeyword(filter);
36 return mBlacklist.FindKeyword(filter);
37 }
38
39 public:
40 CombinedMatcher()
41 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true))
42 {
43 }
44
45 ~CombinedMatcher()
46 {
47 DeleteRegExp(mMatchReId);
48 }
49
50 void ResetCache()
51 {
52 for (auto filter : mResultCache)
53 filter.second->ReleaseRef();
54 mResultCache.clear();
55 }
56
57 void Add(const FilterPtr& filter) override
58 {
59 if (filter->mType == Filter::Type::WHITELIST)
60 mWhitelist.Add(filter);
61 else
62 mBlacklist.Add(filter);
63
64 ResetCache();
65 }
66
67 void Remove(const FilterPtr& filter) override
68 {
69 if (filter->mType == Filter::Type::WHITELIST)
70 mWhitelist.Remove(filter);
71 else
72 mBlacklist.Remove(filter);
73
74 ResetCache();
75 }
76
77 void Clear() override
78 {
79 mBlacklist.Clear();
80 mWhitelist.Clear();
81 ResetCache();
82 }
83
84 bool HasFilter(const FilterPtr& filter) const override
85 {
86 if (filter->mType == Filter::Type::WHITELIST)
87 return mWhitelist.HasFilter(filter);
88 return mBlacklist.HasFilter(filter);
89 }
90
91 const String& GetKeywordForFilter(const FilterPtr& filter) override
92 {
93 if (filter->mType == Filter::Type::WHITELIST)
94 return mWhitelist.GetKeywordForFilter(filter);
95 return mBlacklist.GetKeywordForFilter(filter);
96 }
97
98 private:
99 Filter* MatchesAnyInternal(const String& location,
100 int typeMask, DependentString& docDomain, bool thirdParty,
101 const String& sitekey, bool specificOnly)
102 {
103 ReMatchResults reResult;
104 OwnedString text(location);
105 text.toLower();
106 text.match(mMatchReId, &reResult);
107
108 auto& candidates = reResult.candidates;
109 candidates.push_back(OwnedString());
110
111 Filter* blacklistHit = nullptr;
112 for (auto substr : candidates)
113 {
114 if (mWhitelist.mFilterByKeyword.find(substr))
115 {
116 auto result = mWhitelist.CheckEntryMatch(
117 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOn ly);
118 if (result)
119 return result;
120 }
121 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit)
122 {
123 blacklistHit = mBlacklist.CheckEntryMatch(
124 substr, location, typeMask, docDomain, thirdParty, sitekey,
125 specificOnly);
126 }
127 }
128 return blacklistHit;
129 }
130
131 public:
132 Filter* MatchesAny(const String& location,
133 int typeMask, DependentString& docDomain, bool thirdParty,
134 const String& sitekey, bool specificOnly) override
135 {
136 OwnedString key(location);
137 key.append(u" "_str);
138 key.append(typeMask);
139 key.append(u" "_str);
140 key.append(docDomain);
141 key.append(u" "_str);
142 key.append(thirdParty);
143 key.append(u" "_str);
144 key.append(sitekey);
145 key.append(u" "_str);
146 key.append(specificOnly);
147
148 auto cachedResult = mResultCache.find(key);
149 if (cachedResult)
150 {
151 cachedResult->second->AddRef();
152 return cachedResult->second;
153 }
154
155 Filter* result = MatchesAnyInternal(location, typeMask, docDomain,
156 thirdParty, sitekey, specificOnly);
157
158 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
159 ResetCache();
160
161 result->AddRef();
162 mResultCache[key] = result;
163
164 result->AddRef();
165 return result;
166 }
167 };
168
169 MatcherBase* MatcherBase::mInstance = new CombinedMatcher;
170
171 Matcher::Matcher()
172 : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, fals e))
173 , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, fa lse))
174 , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_st r, true, true))
175 {
176 }
177
178 Matcher::~Matcher()
179 {
180 DeleteRegExp(mFilterReId);
181 DeleteRegExp(mOptionsReId);
182 DeleteRegExp(mCandidatesReId);
183 }
184
185 OwnedString Matcher::FindKeyword(const FilterPtr& filter)
186 {
187 OwnedString result(u""_str);
188 OwnedString text(filter->GetText());
189 if (TestRegExp(mFilterReId, text))
190 return result;
191
192 // Remove options
193 auto index = ExecRegExp(mOptionsReId, text);
194 if (index != -1)
195 text = text.substr(0, index);
196
197 // Remove whitelist marker
198 if (text[0] == '@' && text[1] == '@')
199 text = text.substr(2);
200
201 text.toLower();
202 ReMatchResults keywords;
203 auto match = text.match(mCandidatesReId, &keywords);
204 if (!match)
205 return result;
206
207 auto& candidates = keywords.candidates;
208
209 auto& hash = mFilterByKeyword;
210 uint32_t resultCount = 0xffffffff;
211 uint32_t resultLength = 0;
212 for (auto substr : candidates)
213 {
214 auto candidate = DependentString(substr).substr(1);
215 auto count = (hash.find(candidate) ? hash[candidate].size() : 0);
216 if (count < resultCount ||
217 (count == resultCount && candidate.length() > resultLength))
218 {
219 result = candidate;
220 resultCount = count;
221 resultLength = candidate.length();
222 }
223 }
224
225 return result;
226 }
227
228 void Matcher::Add(const FilterPtr& filter)
229 {
230 if (mKeywordByFilter.find(filter->GetText()))
231 return;
232
233 auto keyword = FindKeyword(filter);
234 auto oldEntry = mFilterByKeyword.find(keyword);
235 if (!oldEntry)
236 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter};
237 else
238 mFilterByKeyword[keyword].push_back(filter);
239 mKeywordByFilter[filter->GetText()] = keyword;
240 }
241
242 void Matcher::Remove(const FilterPtr& filter)
243 {
244 if (!mKeywordByFilter.find(filter->GetText()))
245 return;
246
247 auto keyword = mKeywordByFilter[filter->GetText()];
248 auto list = mFilterByKeyword[keyword];
249 if (list.size() == 1)
250 mFilterByKeyword.erase(keyword);
251 else
252 {
253 auto iter = std::find(list.cbegin(), list.cend(), filter);
254 list.erase(iter);
255 }
256 mKeywordByFilter.erase(filter->GetText());
257 }
258
259 void Matcher::Clear()
260 {
261 mFilterByKeyword.clear();
262 mKeywordByFilter.clear();
263 }
264
265 bool Matcher::HasFilter(const FilterPtr& filter) const
266 {
267 return mKeywordByFilter.find(filter->GetText());
268 }
269
270 static DependentString emptyString = u""_str;
271
272 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter)
273 {
274 if (mKeywordByFilter.find(filter->GetText()))
275 return mKeywordByFilter[filter->GetText()];
276 return emptyString;
277 }
278
279 Filter* Matcher::CheckEntryMatch(const String& keyword,
280 const String& location,
281 int typeMask, DependentString& docDomain, bool thirdParty,
282 const String& sitekey, bool specificOnly)
283 {
284 auto list = mFilterByKeyword[keyword];
285 for (auto filter : list) {
286 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
287 if (specificOnly && activeFilter->IsGeneric() &&
288 !(activeFilter->mType != Filter::Type::WHITELIST))
289 continue;
290
291 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
292 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
293 return filter.get();
294 }
295 return nullptr;
296 }
297
298 Filter* Matcher::MatchesAny(const String& location,
299 int typeMask, DependentString& docDomain, bool thirdParty,
300 const String& sitekey, bool specificOnly)
301 {
302 ReMatchResults reResult;
303 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
304 OwnedString text(location);
305 text.toLower();
306 MatchRegExp(re_id, text, &reResult);
307 auto& candidates = reResult.candidates;
308 candidates.push_back(OwnedString());
309 for (auto substr : candidates)
310 if (mFilterByKeyword.find(substr))
311 {
312 auto result = CheckEntryMatch(substr, location, typeMask, docDomain,
313 thirdParty, sitekey, specificOnly);
314 if (result)
315 {
316 result->AddRef();
317 return result;
318 }
319 }
320
321 return nullptr;
322 }
OLDNEW
« no previous file with comments | « compiled/filter/Matcher.h ('k') | compiled/filter/RegExpFilter.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld