Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Addressed most of the comment. Fixed some issues. Created Oct. 3, 2017, 7:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 namespace {
23 const DependentString regexpRegExp =
24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr;
25 const DependentString optionsRegExp =
26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str;
27 const DependentString candidateRegExp =
28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str;
29 }
30
31 void Matcher::Add(Filter& filter)
32 {
33 if (mKeywordByFilter.find(filter.GetText()))
34 return;
35
36 auto keyword = FindKeyword(filter);
37
38 mFilterByKeyword[keyword].push_back(FilterPtr(&filter));
39 mKeywordByFilter[filter.GetText()] = keyword;
40 }
41
42 void Matcher::Remove(Filter& filter)
43 {
44 auto entry = mKeywordByFilter.find(filter.GetText());
45 if (!entry)
46 return;
47
48 auto keyword = entry->second;
49 auto list = mFilterByKeyword[keyword];
50 if (list.size() == 1)
51 mFilterByKeyword.erase(keyword);
52 else
53 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter)));
54
55 mKeywordByFilter.erase(filter.GetText());
56 }
57
58 void Matcher::Clear()
59 {
60 mFilterByKeyword.clear();
61 mKeywordByFilter.clear();
62 }
63
64 bool Matcher::HasFilter(const Filter& filter) const
65 {
66 return mKeywordByFilter.find(filter.GetText());
67 }
68
69 namespace
70 {
71 DependentString emptyString = u""_str;
72 }
73
74 const String& Matcher::GetKeywordForFilter(const Filter& filter) const
75 {
76 auto entry = mKeywordByFilter.find(filter.GetText());
77 if (entry)
78 return entry->second;
79 return emptyString;
80 }
81
82 Filter* Matcher::MatchesAny(const String& location,
83 int typeMask, DependentString& docDomain, bool thirdParty,
84 const String& sitekey, bool specificOnly) const
85 {
86 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
87 OwnedString text(location);
88 text.toLower();
89 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
90 MatchRegExp(re_id, text, reResult.get());
91 auto& candidates = reResult->candidates;
92 candidates.push_back(OwnedString());
93 for (auto substr : candidates)
94 {
95 if (mFilterByKeyword.find(substr))
96 {
97 auto result = CheckEntryMatch(substr, location, typeMask, docDomain,
98 thirdParty, sitekey, specificOnly);
99 if (result)
100 return result.release();
101 }
102 }
103 return nullptr;
104 }
105
106 OwnedString Matcher::FindKeyword(const Filter& filter) const
107 {
108 OwnedString result(u""_str);
109 OwnedString text(filter.GetText());
110 auto re_id = GenerateRegExp(regexpRegExp, true, false);
111 if (TestRegExp(re_id, text))
112 return result;
113
114 // Remove options
115 auto options_re_id = GenerateRegExp(optionsRegExp, true, false);
116 auto index = ExecRegExp(options_re_id, text);
117 if (index != String::npos)
118 text = text.substr(0, index);
119
120 // Remove whitelist marker
121 if (text.length() >= 2 && text[0] == '@' && text[1] == '@')
122 text = text.substr(2);
123
124 text.toLower();
125 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false);
126 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true);
127 auto match = text.match(candidates_re_id, keywords.get());
128 if (!match)
129 return result;
130
131 auto& candidates = keywords->candidates;
132
133 auto& hash = mFilterByKeyword;
134 uint32_t resultCount = 0xffffffff;
135 uint32_t resultLength = 0;
136 for (auto substr : candidates)
137 {
138 if (substr.empty())
139 continue;
140
141 auto candidate = substr.substr(1);
142 auto entry = hash.find(candidate);
143 auto count = entry ? entry->second.size() : 0;
144 if (count < resultCount ||
145 (count == resultCount && candidate.length() > resultLength))
146 {
147 result = candidate;
148 resultCount = count;
149 resultLength = candidate.length();
150 }
151 }
152 return result;
153 }
154
155 FilterPtr Matcher::CheckEntryMatch(const String& keyword,
156 const String& location,
157 int typeMask, DependentString& docDomain, bool thirdParty,
158 const String& sitekey, bool specificOnly) const
159 {
160 auto entry = mFilterByKeyword.find(keyword);
161 if (entry)
162 {
sergei 2017/10/04 08:54:33 Earlier return would be better here, in my opinion
hub 2017/10/06 13:49:19 Done.
163 auto list = entry->second;
164 for (auto filter : list)
165 {
166 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
167 if (specificOnly && activeFilter->IsGeneric() &&
168 !(activeFilter->mType != Filter::Type::WHITELIST))
169 continue;
170
171 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
172 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
173 return filter;
174 }
175 }
176 return FilterPtr();
177 }
178
179 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000;
180
181 void CombinedMatcher::Add(Filter& filter)
182 {
183 if (filter.mType == Filter::Type::WHITELIST)
184 mWhitelist.Add(filter);
185 else
186 mBlacklist.Add(filter);
187
188 ResetCache();
189 }
190
191 void CombinedMatcher::Remove(Filter& filter)
192 {
193 if (filter.mType == Filter::Type::WHITELIST)
194 mWhitelist.Remove(filter);
195 else
196 mBlacklist.Remove(filter);
197
198 ResetCache();
199 }
200
201 void CombinedMatcher::Clear()
202 {
203 mBlacklist.Clear();
204 mWhitelist.Clear();
205 ResetCache();
206 }
207
208 bool CombinedMatcher::HasFilter(const Filter& filter) const
209 {
210 return filter.mType == Filter::Type::WHITELIST ?
211 mWhitelist.HasFilter(filter) : mBlacklist.HasFilter(filter);
212 }
213
214 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const
215 {
216 return filter.mType == Filter::Type::WHITELIST ?
217 mWhitelist.GetKeywordForFilter(filter) : mBlacklist.GetKeywordForFilter(filt er);
218 }
219
220 Filter* CombinedMatcher::MatchesAny(const String& location,
221 int typeMask, DependentString& docDomain, bool thirdParty,
222 const String& sitekey, bool specificOnly)
223 {
224 OwnedString key(location);
225 key.append(u" "_str);
226 key.append(typeMask);
227 key.append(u" "_str);
228 key.append(docDomain);
229 key.append(u" "_str);
230 key.append(thirdParty);
231 key.append(u" "_str);
232 key.append(sitekey);
233 key.append(u" "_str);
234 key.append(specificOnly);
235
236 FilterPtr result;
237
238 auto cachedResult = mResultCache.find(key);
239 if (cachedResult)
240 result = cachedResult->second;
241 else
242 {
243 result = MatchesAnyInternal(location, typeMask, docDomain,
244 thirdParty, sitekey, specificOnly);
245
246 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
247 ResetCache();
248
249 mResultCache[key] = result;
250 }
251
252 return result.release();
253 }
254
255 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const
256 {
257 return filter.mType == Filter::Type::WHITELIST ?
258 mWhitelist.FindKeyword(filter) : mBlacklist.FindKeyword(filter);
259 }
260
261 void CombinedMatcher::ResetCache()
262 {
263 mResultCache.clear();
264 }
265
266 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location,
267 int typeMask, DependentString& docDomain, bool thirdParty,
268 const String& sitekey, bool specificOnly) const
269 {
270 OwnedString text(location);
271 text.toLower();
272 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
273 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false);
274 text.match(match_re_id, reResult.get());
275
276 auto& candidates = reResult->candidates;
277 candidates.push_back(OwnedString());
278
279 FilterPtr blacklistHit;
280 for (auto substr : candidates)
281 {
282 auto result = mWhitelist.CheckEntryMatch(
283 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly);
284 if (result)
285 return result;
286
287 if (!blacklistHit)
288 blacklistHit = mBlacklist.CheckEntryMatch(
289 substr, location, typeMask, docDomain, thirdParty, sitekey,
290 specificOnly);
291 }
292 return blacklistHit;
293 }
OLDNEW

Powered by Google App Engine
This is Rietveld