Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/Matcher.cpp

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Reworked the code, added test, they fail. Created Sept. 29, 2017, 4:01 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/filter/Matcher.h ('k') | compiled/filter/RegExpFilter.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Matcher.h"
19 #include "RegExpFilter.h"
20 #include "../library.h"
21
22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000;
23
24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter)
25 {
26 if (filter->mType == Filter::Type::WHITELIST)
27 return mWhitelist.FindKeyword(filter);
28 return mBlacklist.FindKeyword(filter);
29 }
30
31 CombinedMatcher::CombinedMatcher()
32 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true))
33 {
34 }
35
36 CombinedMatcher::~CombinedMatcher()
37 {
38 DeleteRegExp(mMatchReId);
39 }
40
41 void CombinedMatcher::ResetCache()
42 {
43 mResultCache.clear();
44 }
45
46 void CombinedMatcher::Add(const FilterPtr& filter)
47 {
48 if (filter->mType == Filter::Type::WHITELIST)
49 mWhitelist.Add(filter);
50 else
51 mBlacklist.Add(filter);
52
53 ResetCache();
54 }
55
56 void CombinedMatcher::Remove(const FilterPtr& filter)
57 {
58 if (filter->mType == Filter::Type::WHITELIST)
59 mWhitelist.Remove(filter);
60 else
61 mBlacklist.Remove(filter);
62
63 ResetCache();
64 }
65
66 void CombinedMatcher::Clear()
67 {
68 mBlacklist.Clear();
69 mWhitelist.Clear();
70 ResetCache();
71 }
72
73 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const
74 {
75 if (filter->mType == Filter::Type::WHITELIST)
76 return mWhitelist.HasFilter(filter);
77 return mBlacklist.HasFilter(filter);
78 }
79
80 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter)
81 {
82 if (filter->mType == Filter::Type::WHITELIST)
83 return mWhitelist.GetKeywordForFilter(filter);
84 return mBlacklist.GetKeywordForFilter(filter);
85 }
86
87 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location,
88 int typeMask, DependentString& docDomain, bool thirdParty,
89 const String& sitekey, bool specificOnly)
90 {
91 ReMatchResults reResult;
92 OwnedString text(location);
93 text.toLower();
94 text.match(mMatchReId, &reResult);
95
96 auto& candidates = reResult.candidates;
97 candidates.push_back(OwnedString());
98
99 FilterPtr blacklistHit;
100 for (auto substr : candidates)
101 {
102 if (mWhitelist.mFilterByKeyword.find(substr))
103 {
104 auto result = mWhitelist.CheckEntryMatch(
105 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly );
106 if (result)
107 return result;
108 }
109 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit)
110 {
111 blacklistHit = mBlacklist.CheckEntryMatch(
112 substr, location, typeMask, docDomain, thirdParty, sitekey,
113 specificOnly);
114 }
115 }
116 return blacklistHit;
117 }
118
119 Filter* CombinedMatcher::MatchesAny(const String& location,
120 int typeMask, DependentString& docDomain, bool thirdParty,
121 const String& sitekey, bool specificOnly)
122 {
123 OwnedString key(location);
124 key.append(u" "_str);
125 key.append(typeMask);
126 key.append(u" "_str);
127 key.append(docDomain);
128 key.append(u" "_str);
129 key.append(thirdParty);
130 key.append(u" "_str);
131 key.append(sitekey);
132 key.append(u" "_str);
133 key.append(specificOnly);
134
135 FilterPtr result;
136
137 auto cachedResult = mResultCache.find(key);
138 if (cachedResult)
139 result = cachedResult->second;
140 else
141 {
142 result = MatchesAnyInternal(location, typeMask, docDomain,
143 thirdParty, sitekey, specificOnly);
144
145 if (mResultCache.size() >= MAX_CACHE_ENTRIES)
146 ResetCache();
147
148 mResultCache[key] = result;
149 }
150
151 result->AddRef();
152 return result.get();
153 }
154
155 namespace {
156 const DependentString regexpRegExp =
157 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr;
158 const DependentString optionsRegExp =
159 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str;
160 const DependentString candidateRegExp =
161 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str;
162 }
163
164 OwnedString Matcher::FindKeyword(const FilterPtr& filter)
165 {
166 OwnedString result(u""_str);
167 OwnedString text(filter->GetText());
168 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false);
169 if (TestRegExp(re_id, text))
170 return result;
171
172 // Remove options
173 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals e);
174 auto index = ExecRegExp(options_re_id, text);
175 if (index != -1)
176 text = text.substr(0, index);
177
178 // Remove whitelist marker
179 if (text[0] == '@' && text[1] == '@')
180 text = text.substr(2);
181
182 text.toLower();
183 ReMatchResults keywords;
184 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true);
185 auto match = text.match(candidates_re_id, &keywords);
186 if (!match)
187 return result;
188
189 auto& candidates = keywords.candidates;
190
191 auto& hash = mFilterByKeyword;
192 uint32_t resultCount = 0xffffffff;
193 uint32_t resultLength = 0;
194 for (auto substr : candidates)
195 {
196 auto candidate = DependentString(substr).substr(1);
197 auto count = (hash.find(candidate) ? hash[candidate].size() : 0);
198 if (count < resultCount ||
199 (count == resultCount && candidate.length() > resultLength))
200 {
201 result = candidate;
202 resultCount = count;
203 resultLength = candidate.length();
204 }
205 }
206
207 return result;
208 }
209
210 void Matcher::Add(const FilterPtr& filter)
211 {
212 if (mKeywordByFilter.find(filter->GetText()))
213 return;
214
215 auto keyword = FindKeyword(filter);
216 auto oldEntry = mFilterByKeyword.find(keyword);
217 if (!oldEntry)
218 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter};
219 else
220 mFilterByKeyword[keyword].push_back(filter);
221 mKeywordByFilter[filter->GetText()] = keyword;
222 }
223
224 void Matcher::Remove(const FilterPtr& filter)
225 {
226 if (!mKeywordByFilter.find(filter->GetText()))
227 return;
228
229 auto keyword = mKeywordByFilter[filter->GetText()];
230 auto list = mFilterByKeyword[keyword];
231 if (list.size() == 1)
232 mFilterByKeyword.erase(keyword);
233 else
234 {
235 auto iter = std::find(list.cbegin(), list.cend(), filter);
236 list.erase(iter);
237 }
238 mKeywordByFilter.erase(filter->GetText());
239 }
240
241 void Matcher::Clear()
242 {
243 mFilterByKeyword.clear();
244 mKeywordByFilter.clear();
245 }
246
247 bool Matcher::HasFilter(const FilterPtr& filter) const
248 {
249 return mKeywordByFilter.find(filter->GetText());
250 }
251
252 static DependentString emptyString = u""_str;
253
254 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter)
255 {
256 if (mKeywordByFilter.find(filter->GetText()))
257 return mKeywordByFilter[filter->GetText()];
258 return emptyString;
259 }
260
261 FilterPtr Matcher::CheckEntryMatch(const String& keyword,
262 const String& location,
263 int typeMask, DependentString& docDomain, bool thirdParty,
264 const String& sitekey, bool specificOnly)
265 {
266 auto list = mFilterByKeyword[keyword];
267 for (auto filter : list) {
268 auto activeFilter = static_cast<ActiveFilter*>(filter.get());
269 if (specificOnly && activeFilter->IsGeneric() &&
270 !(activeFilter->mType != Filter::Type::WHITELIST))
271 continue;
272
273 auto reFilter = static_cast<RegExpFilter*>(activeFilter);
274 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey))
275 return filter;
276 }
277 return FilterPtr();
278 }
279
280 Filter* Matcher::MatchesAny(const String& location,
281 int typeMask, DependentString& docDomain, bool thirdParty,
282 const String& sitekey, bool specificOnly)
283 {
284 ReMatchResults reResult;
285 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true);
286 OwnedString text(location);
287 text.toLower();
288 MatchRegExp(re_id, text, &reResult);
289 auto& candidates = reResult.candidates;
290 candidates.push_back(OwnedString());
291 for (auto substr : candidates)
292 if (mFilterByKeyword.find(substr))
293 {
294 auto result = CheckEntryMatch(substr, location, typeMask, docDomain,
295 thirdParty, sitekey, specificOnly);
296 if (result)
297 {
298 result->AddRef();
299 return result.get();
300 }
301 }
302
303 return nullptr;
304 }
OLDNEW
« no previous file with comments | « compiled/filter/Matcher.h ('k') | compiled/filter/RegExpFilter.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld