Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-present eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 #include "Matcher.h" | |
19 #include "RegExpFilter.h" | |
20 #include "../library.h" | |
21 | |
22 class CombinedMatcher : public MatcherBase | |
23 { | |
24 private: | |
25 StringMap<Filter*> mResultCache; | |
hub
2017/09/26 21:49:00
I wanted to use FilterPtr in there, but it didn't
| |
26 static const size_t MAX_CACHE_ENTRIES = 1000; | |
27 int mMatchReId; | |
28 public: | |
29 Matcher mBlacklist; | |
30 Matcher mWhitelist; | |
31 | |
32 CombinedMatcher() | |
33 : mMatchReId(GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true)) | |
34 { | |
35 } | |
36 | |
37 ~CombinedMatcher() | |
38 { | |
39 DeleteRegExp(mMatchReId); | |
40 } | |
41 | |
42 void ResetCache() | |
43 { | |
44 for (auto filter : mResultCache) | |
45 filter.second->ReleaseRef(); | |
hub
2017/09/26 21:49:00
See above: if we could have the FilterPtr as the d
| |
46 mResultCache.clear(); | |
47 } | |
48 | |
49 void Add(const FilterPtr& filter) override | |
50 { | |
51 if (filter->mType == Filter::Type::WHITELIST) | |
52 mWhitelist.Add(filter); | |
53 else | |
54 mBlacklist.Add(filter); | |
55 | |
56 ResetCache(); | |
57 } | |
58 | |
59 void Remove(const FilterPtr& filter) override | |
60 { | |
61 if (filter->mType == Filter::Type::WHITELIST) | |
62 mWhitelist.Remove(filter); | |
63 else | |
64 mBlacklist.Remove(filter); | |
65 | |
66 ResetCache(); | |
67 } | |
68 | |
69 void Clear() override | |
70 { | |
71 mBlacklist.Clear(); | |
72 mWhitelist.Clear(); | |
73 ResetCache(); | |
74 } | |
75 | |
76 OwnedString FindKeyword(const FilterPtr& filter) override | |
77 { | |
78 if (filter->mType == Filter::Type::WHITELIST) | |
79 return mWhitelist.FindKeyword(filter); | |
80 return mBlacklist.FindKeyword(filter); | |
81 } | |
82 | |
83 bool HasFilter(const FilterPtr& filter) const override | |
84 { | |
85 if (filter->mType == Filter::Type::WHITELIST) | |
86 return mWhitelist.HasFilter(filter); | |
87 return mBlacklist.HasFilter(filter); | |
88 } | |
89 | |
90 const String& GetKeywordForFilter(const FilterPtr& filter) override | |
91 { | |
92 if (filter->mType == Filter::Type::WHITELIST) | |
93 return mWhitelist.GetKeywordForFilter(filter); | |
94 return mBlacklist.GetKeywordForFilter(filter); | |
95 } | |
96 | |
97 Filter* MatchesAnyInternal(const String& location, | |
98 int typeMask, DependentString& docDomain, bool thirdParty, | |
99 const String& sitekey, bool specificOnly) | |
100 { | |
101 ReMatchResults reResult; | |
102 OwnedString text(location); | |
103 text.toLower(); | |
104 text.match(mMatchReId, &reResult); | |
105 | |
106 auto& candidates = reResult.candidates; | |
107 candidates.push_back(OwnedString()); | |
108 | |
109 Filter* blacklistHit = nullptr; | |
110 for (size_t i = 0, l = candidates.size(); i < l; i++) | |
111 { | |
112 auto substr = candidates[i]; | |
113 if (mWhitelist.mFilterByKeyword.find(substr)) | |
114 { | |
115 auto result = mWhitelist._CheckEntryMatch( | |
116 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOn ly); | |
117 if (result) | |
118 return result; | |
119 } | |
120 if (mBlacklist.mFilterByKeyword.find(substr) && !blacklistHit) | |
121 { | |
122 blacklistHit = mBlacklist._CheckEntryMatch( | |
123 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
124 specificOnly); | |
125 } | |
126 } | |
127 return blacklistHit; | |
128 } | |
129 | |
130 Filter* MatchesAny(const String& location, | |
131 int typeMask, DependentString& docDomain, bool thirdParty, | |
132 const String& sitekey, bool specificOnly) override | |
133 { | |
134 OwnedString key(location); | |
135 key.append(u" "_str); | |
136 key.append(typeMask); | |
137 key.append(u" "_str); | |
138 key.append(docDomain); | |
139 key.append(u" "_str); | |
140 key.append(thirdParty); | |
141 key.append(u" "_str); | |
142 key.append(sitekey); | |
143 key.append(u" "_str); | |
144 key.append(specificOnly); | |
145 | |
146 auto cachedResult = mResultCache.find(key); | |
147 if (cachedResult) | |
148 { | |
149 cachedResult->second->AddRef(); | |
150 return cachedResult->second; | |
151 } | |
152 | |
153 Filter* result = MatchesAnyInternal(location, typeMask, docDomain, | |
154 thirdParty, sitekey, specificOnly); | |
155 | |
156 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
157 ResetCache(); | |
158 | |
159 result->AddRef(); | |
160 mResultCache[key] = result; | |
161 | |
162 result->AddRef(); | |
163 return result; | |
164 } | |
165 }; | |
166 | |
167 MatcherBase* MatcherBase::mInstance = new CombinedMatcher; | |
168 | |
169 Matcher::Matcher() | |
170 : mFilterReId(GenerateRegExp(DependentString(Filter::regexpRegExp), true, fals e)) | |
171 , mOptionsReId(GenerateRegExp(DependentString(Filter::optionsRegExp), true, fa lse)) | |
172 , mCandidatesReId(GenerateRegExp(u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_st r, true, true)) | |
173 { | |
174 } | |
175 | |
176 Matcher::~Matcher() | |
177 { | |
178 DeleteRegExp(mFilterReId); | |
179 DeleteRegExp(mOptionsReId); | |
180 DeleteRegExp(mCandidatesReId); | |
181 } | |
182 | |
183 OwnedString Matcher::FindKeyword(const FilterPtr& filter) | |
184 { | |
185 OwnedString result(u""_str); | |
186 OwnedString text(filter->GetText()); | |
187 if (TestRegExp(mFilterReId, text)) | |
188 return result; | |
189 | |
190 // Remove options | |
191 auto index = ExecRegExp(mOptionsReId, text); | |
192 if (index != -1) | |
193 text = text.substr(0, index); | |
194 | |
195 // Remove whitelist marker | |
196 if (text[0] == '@' && text[1] == '@') | |
197 text = text.substr(2); | |
198 | |
199 text.toLower(); | |
200 ReMatchResults keywords; | |
201 auto match = text.match(mCandidatesReId, &keywords); | |
202 if (!match) | |
203 return result; | |
204 | |
205 auto& candidates = keywords.candidates; | |
206 | |
207 auto& hash = mFilterByKeyword; | |
208 uint32_t resultCount = 0xffffffff; | |
209 uint32_t resultLength = 0; | |
210 for (uint32_t i = 0, l = candidates.size(); i < l; i++) | |
211 { | |
212 auto candidate = DependentString(candidates[i]).substr(1); | |
213 auto count = (hash.find(candidate) ? hash[candidate].size() : 0); | |
214 if (count < resultCount || | |
215 (count == resultCount && candidate.length() > resultLength)) | |
216 { | |
217 result = candidate; | |
218 resultCount = count; | |
219 resultLength = candidate.length(); | |
220 } | |
221 } | |
222 | |
223 return result; | |
224 } | |
225 | |
226 void Matcher::Add(const FilterPtr& filter) | |
227 { | |
228 if (mKeywordByFilter.find(filter->GetText())) | |
229 return; | |
230 | |
231 auto keyword = FindKeyword(filter); | |
232 auto oldEntry = mFilterByKeyword.find(keyword); | |
233 if (!oldEntry) | |
234 mFilterByKeyword[keyword] = std::vector<FilterPtr>{filter}; | |
235 else | |
236 mFilterByKeyword[keyword].push_back(filter); | |
237 mKeywordByFilter[filter->GetText()] = keyword; | |
238 } | |
239 | |
240 void Matcher::Remove(const FilterPtr& filter) | |
241 { | |
242 if (!mKeywordByFilter.find(filter->GetText())) | |
243 return; | |
244 | |
245 auto keyword = mKeywordByFilter[filter->GetText()]; | |
246 auto list = mFilterByKeyword[keyword]; | |
247 if (list.size() == 1) | |
248 mFilterByKeyword.erase(keyword); | |
249 else | |
250 { | |
251 auto iter = std::find(list.cbegin(), list.cend(), filter); | |
252 list.erase(iter); | |
253 } | |
254 mKeywordByFilter.erase(filter->GetText()); | |
255 } | |
256 | |
257 void Matcher::Clear() | |
258 { | |
259 mFilterByKeyword.clear(); | |
260 mKeywordByFilter.clear(); | |
261 } | |
262 | |
263 bool Matcher::HasFilter(const FilterPtr& filter) const | |
264 { | |
265 return mKeywordByFilter.find(filter->GetText()); | |
266 } | |
267 | |
268 static DependentString emptyString = u""_str; | |
269 | |
270 const String& Matcher::GetKeywordForFilter(const FilterPtr& filter) | |
271 { | |
272 if (mKeywordByFilter.find(filter->GetText())) | |
273 return mKeywordByFilter[filter->GetText()]; | |
274 return emptyString; | |
275 } | |
276 | |
277 Filter* Matcher::_CheckEntryMatch(const String& keyword, | |
278 const String& location, | |
279 int typeMask, DependentString& docDomain, bool thirdParty, | |
280 const String& sitekey, bool specificOnly) | |
281 { | |
282 auto list = mFilterByKeyword[keyword]; | |
283 for (auto filter : list) { | |
284 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
hub
2017/09/26 21:49:00
This is done without checking. And it is ugly. And
sergei
2017/10/02 12:02:33
Although we don't pass other filters here, what do
| |
285 if (specificOnly && activeFilter->IsGeneric() && | |
286 !(activeFilter->mType != Filter::Type::WHITELIST)) | |
287 continue; | |
288 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
hub
2017/09/26 21:49:00
SImilarly as above: this is unchecked.
| |
289 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
290 { | |
291 return filter.get(); | |
292 } | |
293 } | |
294 return nullptr; | |
295 } | |
296 | |
297 Filter* Matcher::MatchesAny(const String& location, | |
298 int typeMask, DependentString& docDomain, bool thirdParty, | |
299 const String& sitekey, bool specificOnly) | |
300 { | |
301 ReMatchResults reResult; | |
302 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
303 OwnedString text(location); | |
304 text.toLower(); | |
305 MatchRegExp(re_id, text, &reResult); | |
306 auto& candidates = reResult.candidates; | |
307 candidates.push_back(OwnedString()); | |
308 for (size_t i = 0, l = candidates.size(); i < l; i++) | |
309 { | |
310 auto substr = candidates[i]; | |
311 if (mFilterByKeyword.find(substr)) | |
312 { | |
313 auto result = _CheckEntryMatch(substr, location, typeMask, docDomain, | |
314 thirdParty, sitekey, specificOnly); | |
315 if (result) | |
316 { | |
317 result->AddRef(); | |
318 return result; | |
319 } | |
320 } | |
321 } | |
322 | |
323 return nullptr; | |
324 } | |
OLD | NEW |