Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 #include "Matcher.h" | 18 #include "Matcher.h" |
19 #include "RegExpFilter.h" | 19 #include "RegExpFilter.h" |
20 #include "../library.h" | 20 #include "../library.h" |
21 | 21 |
22 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
23 | |
24 OwnedString CombinedMatcher::FindKeyword(const FilterPtr& filter) | |
25 { | |
26 if (filter->mType == Filter::Type::WHITELIST) | |
27 return mWhitelist.FindKeyword(filter); | |
28 return mBlacklist.FindKeyword(filter); | |
29 } | |
30 | |
31 CombinedMatcher::CombinedMatcher() | |
32 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) | |
33 { | |
34 } | |
35 | |
36 CombinedMatcher::~CombinedMatcher() | |
37 { | |
38 DeleteRegExp(mMatchReId); | |
39 } | |
40 | |
41 void CombinedMatcher::ResetCache() | |
42 { | |
43 mResultCache.clear(); | |
44 } | |
45 | |
46 void CombinedMatcher::Add(const FilterPtr& filter) | |
47 { | |
48 if (filter->mType == Filter::Type::WHITELIST) | |
49 mWhitelist.Add(filter); | |
50 else | |
51 mBlacklist.Add(filter); | |
52 | |
53 ResetCache(); | |
54 } | |
55 | |
56 void CombinedMatcher::Remove(const FilterPtr& filter) | |
57 { | |
58 if (filter->mType == Filter::Type::WHITELIST) | |
59 mWhitelist.Remove(filter); | |
60 else | |
61 mBlacklist.Remove(filter); | |
62 | |
63 ResetCache(); | |
64 } | |
65 | |
66 void CombinedMatcher::Clear() | |
67 { | |
68 mBlacklist.Clear(); | |
69 mWhitelist.Clear(); | |
70 ResetCache(); | |
71 } | |
72 | |
73 bool CombinedMatcher::HasFilter(const FilterPtr& filter) const | |
74 { | |
75 if (filter->mType == Filter::Type::WHITELIST) | |
76 return mWhitelist.HasFilter(filter); | |
77 return mBlacklist.HasFilter(filter); | |
78 } | |
79 | |
80 const String& CombinedMatcher::GetKeywordForFilter(const FilterPtr& filter) | |
81 { | |
82 if (filter->mType == Filter::Type::WHITELIST) | |
83 return mWhitelist.GetKeywordForFilter(filter); | |
84 return mBlacklist.GetKeywordForFilter(filter); | |
85 } | |
86 | |
87 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
88 int typeMask, DependentString& docDomain, bool thirdParty, | |
89 const String& sitekey, bool specificOnly) | |
90 { | |
91 ReMatchResults reResult; | |
92 OwnedString text(location); | |
93 text.toLower(); | |
94 text.match(mMatchReId, &reResult); | |
95 | |
96 auto& candidates = reResult.candidates; | |
97 candidates.push_back(OwnedString()); | |
98 | |
99 FilterPtr blacklistHit; | |
100 for (auto substr : candidates) | |
101 { | |
102 if (mWhitelist.mFilterByKeyword.find(substr)) | |
103 { | |
104 auto result = mWhitelist.CheckEntryMatch( | |
105 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly ); | |
106 if (result) | |
107 return result; | |
108 } | |
109 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) | |
110 { | |
111 blacklistHit = mBlacklist.CheckEntryMatch( | |
112 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
113 specificOnly); | |
114 } | |
115 } | |
116 return blacklistHit; | |
117 } | |
118 | |
119 Filter* CombinedMatcher::MatchesAny(const String& location, | |
120 int typeMask, DependentString& docDomain, bool thirdParty, | |
121 const String& sitekey, bool specificOnly) | |
122 { | |
123 OwnedString key(location); | |
124 key.append(u" "_str); | |
125 key.append(typeMask); | |
126 key.append(u" "_str); | |
127 key.append(docDomain); | |
128 key.append(u" "_str); | |
129 key.append(thirdParty); | |
130 key.append(u" "_str); | |
131 key.append(sitekey); | |
132 key.append(u" "_str); | |
133 key.append(specificOnly); | |
134 | |
135 FilterPtr result; | |
136 | |
137 auto cachedResult = mResultCache.find(key); | |
138 if (cachedResult) | |
139 result = cachedResult->second; | |
140 else | |
141 { | |
142 result = MatchesAnyInternal(location, typeMask, docDomain, | |
143 thirdParty, sitekey, specificOnly); | |
144 | |
145 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
146 ResetCache(); | |
147 | |
148 mResultCache[key] = result; | |
149 } | |
150 | |
151 result->AddRef(); | |
152 return result.get(); | |
153 } | |
154 | |
155 namespace { | 22 namespace { |
156 const DependentString regexpRegExp = | 23 const DependentString regexpRegExp = |
157 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | 24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; |
158 const DependentString optionsRegExp = | 25 const DependentString optionsRegExp = |
159 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | 26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; |
160 const DependentString candidateRegExp = | 27 const DependentString candidateRegExp = |
161 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | 28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; |
162 } | 29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; |
163 | 30 } |
164 OwnedString Matcher::FindKeyword(const FilterPtr& filter) | 31 |
165 { | 32 Matcher::Matcher() |
166 OwnedString result(u""_str); | 33 : mFilterByKeyword(1024), mKeywordByFilter(1024), |
167 OwnedString text(filter->GetText()); | 34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) |
168 auto re_id = GenerateRegExp(DependentString(regexpRegExp), true, false); | 35 { |
169 if (TestRegExp(re_id, text)) | 36 mReId = GenerateRegExp(regexpRegExp, true, false); |
37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false); | |
38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); | |
39 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
40 } | |
41 | |
42 void Matcher::Add(Filter& filter) | |
43 { | |
44 if (mKeywordByFilter.find(filter.GetText())) | |
45 return; | |
46 | |
47 auto keyword = FindKeyword(filter); | |
48 | |
49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | |
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
| |
50 mKeywordByFilter[filter.GetText()] = | |
51 FilterKeyword(std::move(keyword), filter); | |
52 } | |
53 | |
54 void Matcher::Remove(Filter& filter) | |
55 { | |
56 auto entry = mKeywordByFilter.find(filter.GetText()); | |
57 if (!entry) | |
58 return; | |
59 | |
60 auto& keyword = static_cast<const String&>(entry->second); | |
61 auto list = mFilterByKeyword[keyword]; | |
62 if (list.size() == 1) | |
63 mFilterByKeyword.erase(keyword); | |
64 else | |
65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | |
66 | |
67 mKeywordByFilter.erase(filter.GetText()); | |
68 } | |
69 | |
70 void Matcher::Clear() | |
71 { | |
72 mFilterByKeyword.clear(); | |
73 mKeywordByFilter.clear(); | |
74 } | |
75 | |
76 bool Matcher::HasFilter(const Filter& filter) const | |
77 { | |
78 return mKeywordByFilter.find(filter.GetText()); | |
79 } | |
80 | |
81 namespace | |
82 { | |
83 DependentString emptyString = u""_str; | |
84 } | |
85 | |
86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | |
87 { | |
88 auto entry = mKeywordByFilter.find(filter.GetText()); | |
89 if (entry) | |
90 return static_cast<const String&>(entry->second); | |
91 return emptyString; | |
92 } | |
93 | |
94 Filter* Matcher::MatchesAny(const String& location, | |
95 int typeMask, DependentString& docDomain, bool thirdParty, | |
96 const String& sitekey, bool specificOnly) const | |
97 { | |
98 OwnedString text(location); | |
99 text.toLower(); | |
100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
101 if (text.match(mMatchReId, *reResult)) | |
102 { | |
103 auto& candidates = reResult->candidates; | |
104 candidates.push_back(OwnedString()); | |
105 for (auto candidate : candidates) | |
106 { | |
107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, | |
108 thirdParty, sitekey, specificOnly); | |
109 if (result) | |
110 return result.release(); | |
111 } | |
112 } | |
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
| |
113 return nullptr; | |
114 } | |
115 | |
116 OwnedString Matcher::FindKeyword(const Filter& filter) const | |
117 { | |
118 OwnedString result; | |
119 OwnedString text(filter.GetText()); | |
120 if (TestRegExp(mReId, text)) | |
170 return result; | 121 return result; |
171 | 122 |
172 // Remove options | 123 // Remove options |
173 auto options_re_id = GenerateRegExp(DependentString(optionsRegExp), true, fals e); | 124 auto index = ExecRegExp(mOptionsReId, text); |
174 auto index = ExecRegExp(options_re_id, text); | 125 if (index != String::npos) |
175 if (index != -1) | 126 text = DependentString(text, 0, index); |
176 text = text.substr(0, index); | |
177 | 127 |
178 // Remove whitelist marker | 128 // Remove whitelist marker |
179 if (text[0] == '@' && text[1] == '@') | 129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') |
180 text = text.substr(2); | 130 text = DependentString(text, 2); |
181 | 131 |
182 text.toLower(); | 132 text.toLower(); |
183 ReMatchResults keywords; | 133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); |
184 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); | 134 auto match = text.match(mCandidatesReId, *keywords); |
185 auto match = text.match(candidates_re_id, &keywords); | |
186 if (!match) | 135 if (!match) |
187 return result; | 136 return result; |
188 | 137 |
189 auto& candidates = keywords.candidates; | 138 auto& candidates = keywords->candidates; |
190 | 139 |
191 auto& hash = mFilterByKeyword; | 140 uint32_t resultCount = 0xffffff; |
192 uint32_t resultCount = 0xffffffff; | |
193 uint32_t resultLength = 0; | 141 uint32_t resultLength = 0; |
194 for (auto substr : candidates) | 142 for (auto substr : candidates) |
195 { | 143 { |
196 auto candidate = DependentString(substr).substr(1); | 144 if (substr.empty()) |
197 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); | 145 continue; |
146 | |
147 auto candidate = DependentString(substr, 1); | |
148 auto entry = mFilterByKeyword.find(candidate); | |
149 auto count = entry ? entry->second.size() : 0; | |
198 if (count < resultCount || | 150 if (count < resultCount || |
199 (count == resultCount && candidate.length() > resultLength)) | 151 (count == resultCount && candidate.length() > resultLength)) |
200 { | 152 { |
201 result = candidate; | 153 result = candidate; |
202 resultCount = count; | 154 resultCount = count; |
203 resultLength = candidate.length(); | 155 resultLength = candidate.length(); |
204 } | 156 } |
205 } | 157 } |
206 | |
207 return result; | 158 return result; |
208 } | |
209 | |
210 void Matcher::Add(const FilterPtr& filter) | |
211 { | |
212 if (mKeywordByFilter.find(filter->GetText())) | |
213 return; | |
214 | |
215 auto keyword = FindKeyword(filter); | |
216 auto oldEntry = mFilterByKeyword.find(keyword); | |
217 if (!oldEntry) | |
218 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; | |
219 else | |
220 mFilterByKeyword[keyword].push_back(filter); | |
221 mKeywordByFilter[filter->GetText()] = keyword; | |
222 } | |
223 | |
224 void Matcher::Remove(const FilterPtr& filter) | |
225 { | |
226 if (!mKeywordByFilter.find(filter->GetText())) | |
227 return; | |
228 | |
229 auto keyword = mKeywordByFilter[filter->GetText()]; | |
230 auto list = mFilterByKeyword[keyword]; | |
231 if (list.size() == 1) | |
232 mFilterByKeyword.erase(keyword); | |
233 else | |
234 { | |
235 auto iter = std::find(list.cbegin(), list.cend(), filter); | |
236 list.erase(iter); | |
237 } | |
238 mKeywordByFilter.erase(filter->GetText()); | |
239 } | |
240 | |
241 void Matcher::Clear() | |
242 { | |
243 mFilterByKeyword.clear(); | |
244 mKeywordByFilter.clear(); | |
245 } | |
246 | |
247 bool Matcher::HasFilter(const FilterPtr& filter) const | |
248 { | |
249 return mKeywordByFilter.find(filter->GetText()); | |
250 } | |
251 | |
252 static DependentString emptyString = u""_str; | |
253 | |
254 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) | |
255 { | |
256 if (mKeywordByFilter.find(filter->GetText())) | |
257 return mKeywordByFilter[filter->GetText()]; | |
258 return emptyString; | |
259 } | 159 } |
260 | 160 |
261 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | 161 FilterPtr Matcher::CheckEntryMatch(const String& keyword, |
262 const String& location, | 162 const String& location, |
263 int typeMask, DependentString& docDomain, bool thirdParty, | 163 int typeMask, DependentString& docDomain, bool thirdParty, |
264 const String& sitekey, bool specificOnly) | 164 const String& sitekey, bool specificOnly) const |
265 { | 165 { |
266 auto list = mFilterByKeyword[keyword]; | 166 auto entry = mFilterByKeyword.find(keyword); |
267 for (auto filter : list) { | 167 if (!entry) |
168 return FilterPtr(); | |
169 | |
170 auto filters = entry->second; | |
171 for (auto filter : filters) | |
172 { | |
268 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | 173 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); |
269 if (specificOnly && activeFilter->IsGeneric() && | 174 if (specificOnly && activeFilter->IsGeneric() && |
270 !(activeFilter->mType != Filter::Type::WHITELIST)) | 175 (activeFilter->mType != Filter::Type::WHITELIST)) |
271 continue; | 176 continue; |
272 | 177 |
273 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | 178 auto reFilter = static_cast<RegExpFilter*>(activeFilter); |
274 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | 179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) |
275 return filter; | 180 return filter; |
276 } | 181 } |
182 | |
277 return FilterPtr(); | 183 return FilterPtr(); |
278 } | 184 } |
279 | 185 |
280 Filter* Matcher::MatchesAny(const String& location, | 186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; |
187 | |
188 CombinedMatcher::CombinedMatcher() | |
189 : mResultCache(1024), mMatchReId(-1) | |
190 { | |
191 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
192 } | |
193 | |
194 void CombinedMatcher::Add(Filter& filter) | |
195 { | |
196 GetMatcher(filter).Add(filter); | |
197 ResetCache(); | |
198 } | |
199 | |
200 void CombinedMatcher::Remove(Filter& filter) | |
201 { | |
202 GetMatcher(filter).Remove(filter); | |
203 ResetCache(); | |
204 } | |
205 | |
206 void CombinedMatcher::Clear() | |
207 { | |
208 mBlacklist.Clear(); | |
209 mWhitelist.Clear(); | |
210 ResetCache(); | |
211 } | |
212 | |
213 bool CombinedMatcher::HasFilter(const Filter& filter) const | |
214 { | |
215 return GetMatcher(filter).HasFilter(filter); | |
216 } | |
217 | |
218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | |
219 { | |
220 return GetMatcher(filter).GetKeywordForFilter(filter); | |
221 } | |
222 | |
223 Filter* CombinedMatcher::MatchesAny(const String& location, | |
281 int typeMask, DependentString& docDomain, bool thirdParty, | 224 int typeMask, DependentString& docDomain, bool thirdParty, |
282 const String& sitekey, bool specificOnly) | 225 const String& sitekey, bool specificOnly) |
283 { | 226 { |
284 ReMatchResults reResult; | 227 OwnedString key(location); |
285 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | 228 key.append(u" "_str); |
229 key.append(typeMask); | |
230 key.append(u" "_str); | |
231 key.append(docDomain); | |
232 key.append(u" "_str); | |
233 key.append(thirdParty); | |
234 key.append(u" "_str); | |
235 key.append(sitekey); | |
236 key.append(u" "_str); | |
237 key.append(specificOnly); | |
238 | |
239 FilterPtr result; | |
240 | |
241 auto cachedResult = mResultCache.find(key); | |
242 if (cachedResult) | |
243 result = cachedResult->second.filter(); | |
244 else | |
245 { | |
246 result = MatchesAnyInternal(location, typeMask, docDomain, | |
247 thirdParty, sitekey, specificOnly); | |
248 | |
249 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
250 ResetCache(); | |
251 | |
252 CacheEntry cache(std::move(key), result); | |
253 mResultCache[cache.key()] = cache; | |
254 } | |
255 | |
256 return result.release(); | |
257 } | |
258 | |
259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | |
260 { | |
261 return GetMatcher(filter).FindKeyword(filter); | |
262 } | |
263 | |
264 void CombinedMatcher::ResetCache() | |
265 { | |
266 mResultCache.clear(); | |
267 } | |
268 | |
269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
270 int typeMask, DependentString& docDomain, bool thirdParty, | |
271 const String& sitekey, bool specificOnly) const | |
272 { | |
286 OwnedString text(location); | 273 OwnedString text(location); |
287 text.toLower(); | 274 text.toLower(); |
288 MatchRegExp(re_id, text, &reResult); | 275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); |
289 auto& candidates = reResult.candidates; | 276 text.match(mMatchReId, *reResult); |
277 | |
278 auto& candidates = reResult->candidates; | |
290 candidates.push_back(OwnedString()); | 279 candidates.push_back(OwnedString()); |
280 | |
281 FilterPtr blacklistHit; | |
291 for (auto substr : candidates) | 282 for (auto substr : candidates) |
292 if (mFilterByKeyword.find(substr)) | 283 { |
293 { | 284 auto result = mWhitelist.CheckEntryMatch( |
294 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, | 285 substr, location, typeMask, docDomain, thirdParty, sitekey, false); |
295 thirdParty, sitekey, specificOnly); | 286 if (result) |
296 if (result) | 287 return result; |
297 { | 288 |
298 result->AddRef(); | 289 if (!blacklistHit) |
299 return result.get(); | 290 blacklistHit = mBlacklist.CheckEntryMatch( |
300 } | 291 substr, location, typeMask, docDomain, thirdParty, sitekey, |
301 } | 292 specificOnly); |
302 | 293 } |
303 return nullptr; | 294 return blacklistHit; |
304 } | 295 } |
LEFT | RIGHT |