Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-present eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 #include "Matcher.h" | |
19 #include "RegExpFilter.h" | |
20 #include "../library.h" | |
21 | |
22 namespace { | |
23 const DependentString regexpRegExp = | |
24 u"^(@@)?/.*/(?:\\$~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)?$"_s tr; | |
25 const DependentString optionsRegExp = | |
26 u"\\$(~?[\\w-]+(?:=[^,\\s]+)?(?:,~?[\\w-]+(?:=[^,\\s]+)?)*)$"_str; | |
27 const DependentString candidateRegExp = | |
28 u"[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])"_str; | |
29 } | |
30 | |
31 void Matcher::Add(Filter& filter) | |
32 { | |
33 if (mKeywordByFilter.find(filter.GetText())) | |
34 return; | |
35 | |
36 auto keyword = FindKeyword(filter); | |
37 | |
38 mFilterByKeyword[keyword].push_back(FilterPtr(&filter)); | |
39 mKeywordByFilter[filter.GetText()] = keyword; | |
40 } | |
41 | |
42 void Matcher::Remove(Filter& filter) | |
43 { | |
44 auto entry = mKeywordByFilter.find(filter.GetText()); | |
45 if (!entry) | |
46 return; | |
47 | |
48 auto keyword = entry->second; | |
49 auto list = mFilterByKeyword[keyword]; | |
50 if (list.size() == 1) | |
51 mFilterByKeyword.erase(keyword); | |
52 else | |
53 list.erase(std::find(list.cbegin(), list.cend(), FilterPtr(&filter))); | |
54 | |
55 mKeywordByFilter.erase(filter.GetText()); | |
56 } | |
57 | |
58 void Matcher::Clear() | |
59 { | |
60 mFilterByKeyword.clear(); | |
61 mKeywordByFilter.clear(); | |
62 } | |
63 | |
64 bool Matcher::HasFilter(const Filter& filter) const | |
65 { | |
66 return mKeywordByFilter.find(filter.GetText()); | |
67 } | |
68 | |
69 namespace | |
70 { | |
71 DependentString emptyString = u""_str; | |
72 } | |
73 | |
74 const String& Matcher::GetKeywordForFilter(const Filter& filter) const | |
75 { | |
76 auto entry = mKeywordByFilter.find(filter.GetText()); | |
77 if (entry) | |
78 return entry->second; | |
79 return emptyString; | |
80 } | |
81 | |
82 Filter* Matcher::MatchesAny(const String& location, | |
83 int typeMask, DependentString& docDomain, bool thirdParty, | |
84 const String& sitekey, bool specificOnly) const | |
85 { | |
86 auto re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
87 OwnedString text(location); | |
88 text.toLower(); | |
89 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
90 MatchRegExp(re_id, text, reResult.get()); | |
91 auto& candidates = reResult->candidates; | |
92 candidates.push_back(OwnedString()); | |
93 for (auto substr : candidates) | |
94 { | |
95 if (mFilterByKeyword.find(substr)) | |
96 { | |
97 auto result = CheckEntryMatch(substr, location, typeMask, docDomain, | |
98 thirdParty, sitekey, specificOnly); | |
99 if (result) | |
100 return result.release(); | |
101 } | |
102 } | |
103 return nullptr; | |
104 } | |
105 | |
106 OwnedString Matcher::FindKeyword(const Filter& filter) const | |
107 { | |
108 OwnedString result(u""_str); | |
109 OwnedString text(filter.GetText()); | |
110 auto re_id = GenerateRegExp(regexpRegExp, true, false); | |
111 if (TestRegExp(re_id, text)) | |
112 return result; | |
113 | |
114 // Remove options | |
115 auto options_re_id = GenerateRegExp(optionsRegExp, true, false); | |
116 auto index = ExecRegExp(options_re_id, text); | |
117 if (index != String::npos) | |
118 text = text.substr(0, index); | |
119 | |
120 // Remove whitelist marker | |
121 if (text.length() >= 2 && text[0] == '@' && text[1] == '@') | |
122 text = text.substr(2); | |
123 | |
124 text.toLower(); | |
125 intrusive_ptr<ReMatchResults> keywords(new ReMatchResults, false); | |
126 auto candidates_re_id = GenerateRegExp(candidateRegExp, true, true); | |
127 auto match = text.match(candidates_re_id, keywords.get()); | |
128 if (!match) | |
129 return result; | |
130 | |
131 auto& candidates = keywords->candidates; | |
132 | |
133 auto& hash = mFilterByKeyword; | |
134 uint32_t resultCount = 0xffffffff; | |
135 uint32_t resultLength = 0; | |
136 for (auto substr : candidates) | |
137 { | |
138 if (substr.empty()) | |
139 continue; | |
140 | |
141 auto candidate = substr.substr(1); | |
142 auto entry = hash.find(candidate); | |
143 auto count = entry ? entry->second.size() : 0; | |
144 if (count < resultCount || | |
145 (count == resultCount && candidate.length() > resultLength)) | |
146 { | |
147 result = candidate; | |
148 resultCount = count; | |
149 resultLength = candidate.length(); | |
150 } | |
151 } | |
152 return result; | |
153 } | |
154 | |
155 FilterPtr Matcher::CheckEntryMatch(const String& keyword, | |
156 const String& location, | |
157 int typeMask, DependentString& docDomain, bool thirdParty, | |
158 const String& sitekey, bool specificOnly) const | |
159 { | |
160 auto entry = mFilterByKeyword.find(keyword); | |
161 if (entry) | |
162 { | |
sergei
2017/10/04 08:54:33
Earlier return would be better here, in my opinion
hub
2017/10/06 13:49:19
Done.
| |
163 auto list = entry->second; | |
164 for (auto filter : list) | |
165 { | |
166 auto activeFilter = static_cast<ActiveFilter*>(filter.get()); | |
167 if (specificOnly && activeFilter->IsGeneric() && | |
168 !(activeFilter->mType != Filter::Type::WHITELIST)) | |
169 continue; | |
170 | |
171 auto reFilter = static_cast<RegExpFilter*>(activeFilter); | |
172 if (reFilter->Matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
173 return filter; | |
174 } | |
175 } | |
176 return FilterPtr(); | |
177 } | |
178 | |
179 const size_t CombinedMatcher::MAX_CACHE_ENTRIES = 1000; | |
180 | |
181 void CombinedMatcher::Add(Filter& filter) | |
182 { | |
183 if (filter.mType == Filter::Type::WHITELIST) | |
184 mWhitelist.Add(filter); | |
185 else | |
186 mBlacklist.Add(filter); | |
187 | |
188 ResetCache(); | |
189 } | |
190 | |
191 void CombinedMatcher::Remove(Filter& filter) | |
192 { | |
193 if (filter.mType == Filter::Type::WHITELIST) | |
194 mWhitelist.Remove(filter); | |
195 else | |
196 mBlacklist.Remove(filter); | |
197 | |
198 ResetCache(); | |
199 } | |
200 | |
201 void CombinedMatcher::Clear() | |
202 { | |
203 mBlacklist.Clear(); | |
204 mWhitelist.Clear(); | |
205 ResetCache(); | |
206 } | |
207 | |
208 bool CombinedMatcher::HasFilter(const Filter& filter) const | |
209 { | |
210 return filter.mType == Filter::Type::WHITELIST ? | |
211 mWhitelist.HasFilter(filter) : mBlacklist.HasFilter(filter); | |
212 } | |
213 | |
214 const String& CombinedMatcher::GetKeywordForFilter(const Filter& filter) const | |
215 { | |
216 return filter.mType == Filter::Type::WHITELIST ? | |
217 mWhitelist.GetKeywordForFilter(filter) : mBlacklist.GetKeywordForFilter(filt er); | |
218 } | |
219 | |
220 Filter* CombinedMatcher::MatchesAny(const String& location, | |
221 int typeMask, DependentString& docDomain, bool thirdParty, | |
222 const String& sitekey, bool specificOnly) | |
223 { | |
224 OwnedString key(location); | |
225 key.append(u" "_str); | |
226 key.append(typeMask); | |
227 key.append(u" "_str); | |
228 key.append(docDomain); | |
229 key.append(u" "_str); | |
230 key.append(thirdParty); | |
231 key.append(u" "_str); | |
232 key.append(sitekey); | |
233 key.append(u" "_str); | |
234 key.append(specificOnly); | |
235 | |
236 FilterPtr result; | |
237 | |
238 auto cachedResult = mResultCache.find(key); | |
239 if (cachedResult) | |
240 result = cachedResult->second; | |
241 else | |
242 { | |
243 result = MatchesAnyInternal(location, typeMask, docDomain, | |
244 thirdParty, sitekey, specificOnly); | |
245 | |
246 if (mResultCache.size() >= MAX_CACHE_ENTRIES) | |
247 ResetCache(); | |
248 | |
249 mResultCache[key] = result; | |
250 } | |
251 | |
252 return result.release(); | |
253 } | |
254 | |
255 OwnedString CombinedMatcher::FindKeyword(const Filter& filter) const | |
256 { | |
257 return filter.mType == Filter::Type::WHITELIST ? | |
258 mWhitelist.FindKeyword(filter) : mBlacklist.FindKeyword(filter); | |
259 } | |
260 | |
261 void CombinedMatcher::ResetCache() | |
262 { | |
263 mResultCache.clear(); | |
264 } | |
265 | |
266 FilterPtr CombinedMatcher::MatchesAnyInternal(const String& location, | |
267 int typeMask, DependentString& docDomain, bool thirdParty, | |
268 const String& sitekey, bool specificOnly) const | |
269 { | |
270 OwnedString text(location); | |
271 text.toLower(); | |
272 auto match_re_id = GenerateRegExp(u"[a-z0-9%]{3,}"_str, true, true); | |
273 intrusive_ptr<ReMatchResults> reResult(new ReMatchResults, false); | |
274 text.match(match_re_id, reResult.get()); | |
275 | |
276 auto& candidates = reResult->candidates; | |
277 candidates.push_back(OwnedString()); | |
278 | |
279 FilterPtr blacklistHit; | |
280 for (auto substr : candidates) | |
281 { | |
282 auto result = mWhitelist.CheckEntryMatch( | |
283 substr, location, typeMask, docDomain, thirdParty, sitekey, specificOnly); | |
284 if (result) | |
285 return result; | |
286 | |
287 if (!blacklistHit) | |
288 blacklistHit = mBlacklist.CheckEntryMatch( | |
289 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
290 specificOnly); | |
291 } | |
292 return blacklistHit; | |
293 } | |
OLD | NEW |