Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-present eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 #include "Matcher.h" | |
19 #include "RegExpFilter.h" | |
20 #include "../library.h" | |
21 | |
22 namespace { | |
23 const DependentString regexpRegExp = | |
24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | |
25 const DependentString optionsRegExp = | |
26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | |
27 const DependentString candidateRegExp = | |
28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | |
29 const DependentString matchRegExp = u"[a-z0-9%]{3,}"_str; | |
30 } | |
31 | |
32 Matcher::Matcher() | |
33 : mFilterByKeyword(1024), mKeywordByFilter(1024), | |
34 mReId(-1), mOptionsReId(-1), mCandidatesReId(-1) | |
35 { | |
36 mReId = GenerateRegExp(regexpRegExp, true, false); | |
37 mOptionsReId = GenerateRegExp(optionsRegExp, true, false); | |
38 mCandidatesReId = GenerateRegExp(candidateRegExp, true, true); | |
39 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
40 } | |
41 | |
42 void Matcher::Add(Filter& filter) | |
43 { | |
44 if (mKeywordByFilter.find(filter.GetText())) | |
45 return; | |
46 | |
47 auto keyword = FindKeyword(filter); | |
48 | |
49 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | |
sergei
2017/10/11 09:55:16
Although the review is already closed I think it's
| |
50 mKeywordByFilter[filter.GetText()] = | |
51 FilterKeyword(std::move(keyword), filter); | |
52 } | |
53 | |
54 void Matcher::Remove(Filter& filter) | |
55 { | |
56 auto entry = mKeywordByFilter.find(filter.GetText()); | |
57 if (!entry) | |
58 return; | |
59 | |
60 auto& keyword = static_cast<const String&>(entry->second); | |
61 auto list = mFilterByKeyword[keyword]; | |
62 if (list.size() == 1) | |
63 mFilterByKeyword.erase(keyword); | |
64 else | |
65 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | |
66 | |
67 mKeywordByFilter.erase(filter.GetText()); | |
68 } | |
69 | |
70 void Matcher::Clear() | |
71 { | |
72 mFilterByKeyword.clear(); | |
73 mKeywordByFilter.clear(); | |
74 } | |
75 | |
76 bool Matcher::HasFilter(const Filter& filter) const | |
77 { | |
78 return mKeywordByFilter.find(filter.GetText()); | |
79 } | |
80 | |
81 namespace | |
82 { | |
83 DependentString emptyString = u""_str; | |
84 } | |
85 | |
86 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | |
87 { | |
88 auto entry = mKeywordByFilter.find(filter.GetText()); | |
89 if (entry) | |
90 return static_cast<const String&>(entry->second); | |
91 return emptyString; | |
92 } | |
93 | |
94 Filter* Matcher::MatchesAny(const String& location, | |
95 int typeMask, DependentString& docDomain, bool thirdParty, | |
96 const String& sitekey, bool specificOnly) const | |
97 { | |
98 OwnedString text(location); | |
99 text.toLower(); | |
100 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
101 if (text.match(mMatchReId, *reResult)) | |
102 { | |
103 auto& candidates = reResult->candidates; | |
104 candidates.push_back(OwnedString()); | |
105 for (auto candidate : candidates) | |
106 { | |
107 auto result = CheckEntryMatch(candidate, location, typeMask, docDomain, | |
108 thirdParty, sitekey, specificOnly); | |
109 if (result) | |
110 return result.release(); | |
111 } | |
112 } | |
Wladimir Palant
2017/10/09 08:39:47
As mentioned in the issue description, we should n
sergei
2017/10/09 15:27:53
Although it merely converts the existing JS code a
Wladimir Palant
2017/10/10 07:39:05
I strongly disagree. Landing crappy code is always
| |
113 return nullptr; | |
114 } | |
115 | |
116 OwnedString Matcher::FindKeyword(const Filter& filter) const | |
117 { | |
118 OwnedString result; | |
119 OwnedString text(filter.GetText()); | |
120 if (TestRegExp(mReId, text)) | |
121 return result; | |
122 | |
123 // Remove options | |
124 auto index = ExecRegExp(mOptionsReId, text); | |
125 if (index != String::npos) | |
126 text = DependentString(text, 0, index); | |
127 | |
128 // Remove whitelist marker | |
129 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') | |
130 text = DependentString(text, 2); | |
131 | |
132 text.toLower(); | |
133 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); | |
134 auto match = text.match(mCandidatesReId, *keywords); | |
135 if (!match) | |
136 return result; | |
137 | |
138 auto& candidates = keywords->candidates; | |
139 | |
140 uint32_t resultCount = 0xffffff; | |
141 uint32_t resultLength = 0; | |
142 for (auto substr : candidates) | |
143 { | |
144 if (substr.empty()) | |
145 continue; | |
146 | |
147 auto candidate = DependentString(substr, 1); | |
148 auto entry = mFilterByKeyword.find(candidate); | |
149 auto count = entry ? entry->second.size() : 0; | |
150 if (count < resultCount || | |
151 (count == resultCount && candidate.length() > resultLength)) | |
152 { | |
153 result = candidate; | |
154 resultCount = count; | |
155 resultLength = candidate.length(); | |
156 } | |
157 } | |
158 return result; | |
159 } | |
160 | |
161 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | |
162 const String& location, | |
163 int typeMask, DependentString& docDomain, bool thirdParty, | |
164 const String& sitekey, bool specificOnly) const | |
165 { | |
166 auto entry = mFilterByKeyword.find(keyword); | |
167 if (!entry) | |
168 return FilterPtr(); | |
169 | |
170 auto filters = entry->second; | |
171 for (auto filter : filters) | |
172 { | |
173 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
174 if (specificOnly && activeFilter->IsGeneric() && | |
175 (activeFilter->mType != Filter::Type::WHITELIST)) | |
176 continue; | |
177 | |
178 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
179 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
180 return filter; | |
181 } | |
182 | |
183 return FilterPtr(); | |
184 } | |
185 | |
186 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
187 | |
188 CombinedMatcher::CombinedMatcher() | |
189 : mResultCache(1024), mMatchReId(-1) | |
190 { | |
191 mMatchReId = GenerateRegExp(matchRegExp, true, true); | |
192 } | |
193 | |
194 void CombinedMatcher::Add(Filter& filter) | |
195 { | |
196 GetMatcher(filter).Add(filter); | |
197 ResetCache(); | |
198 } | |
199 | |
200 void CombinedMatcher::Remove(Filter& filter) | |
201 { | |
202 GetMatcher(filter).Remove(filter); | |
203 ResetCache(); | |
204 } | |
205 | |
206 void CombinedMatcher::Clear() | |
207 { | |
208 mBlacklist.Clear(); | |
209 mWhitelist.Clear(); | |
210 ResetCache(); | |
211 } | |
212 | |
213 bool CombinedMatcher::HasFilter(const Filter& filter) const | |
214 { | |
215 return GetMatcher(filter).HasFilter(filter); | |
216 } | |
217 | |
218 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | |
219 { | |
220 return GetMatcher(filter).GetKeywordForFilter(filter); | |
221 } | |
222 | |
223 Filter* CombinedMatcher::MatchesAny(const String& location, | |
224 int typeMask, DependentString& docDomain, bool thirdParty, | |
225 const String& sitekey, bool specificOnly) | |
226 { | |
227 OwnedString key(location); | |
228 key.append(u" "_str); | |
229 key.append(typeMask); | |
230 key.append(u" "_str); | |
231 key.append(docDomain); | |
232 key.append(u" "_str); | |
233 key.append(thirdParty); | |
234 key.append(u" "_str); | |
235 key.append(sitekey); | |
236 key.append(u" "_str); | |
237 key.append(specificOnly); | |
238 | |
239 FilterPtr result; | |
240 | |
241 auto cachedResult = mResultCache.find(key); | |
242 if (cachedResult) | |
243 result = cachedResult->second.filter(); | |
244 else | |
245 { | |
246 result = MatchesAnyInternal(location, typeMask, docDomain, | |
247 thirdParty, sitekey, specificOnly); | |
248 | |
249 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
250 ResetCache(); | |
251 | |
252 CacheEntry cache(std::move(key), result); | |
253 mResultCache[cache.key()] = cache; | |
254 } | |
255 | |
256 return result.release(); | |
257 } | |
258 | |
259 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | |
260 { | |
261 return GetMatcher(filter).FindKeyword(filter); | |
262 } | |
263 | |
264 void CombinedMatcher::ResetCache() | |
265 { | |
266 mResultCache.clear(); | |
267 } | |
268 | |
269 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
270 int typeMask, DependentString& docDomain, bool thirdParty, | |
271 const String& sitekey, bool specificOnly) const | |
272 { | |
273 OwnedString text(location); | |
274 text.toLower(); | |
275 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
276 text.match(mMatchReId, *reResult); | |
277 | |
278 auto& candidates = reResult->candidates; | |
279 candidates.push_back(OwnedString()); | |
280 | |
281 FilterPtr blacklistHit; | |
282 for (auto substr : candidates) | |
283 { | |
284 auto result = mWhitelist.CheckEntryMatch( | |
285 substr, location, typeMask, docDomain, thirdParty, sitekey, false); | |
286 if (result) | |
287 return result; | |
288 | |
289 if (!blacklistHit) | |
290 blacklistHit = mBlacklist.CheckEntryMatch( | |
291 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
292 specificOnly); | |
293 } | |
294 return blacklistHit; | |
295 } | |
OLD | NEW |