Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/requestBlocker.js

Issue 29760707: Issue 6622 - Implement $rewrite filter option (Closed) Base URL: https://hg.adblockplus.org/adblockpluschrome/
Left Patch Set: Created April 24, 2018, 8:33 p.m.
Right Patch Set: Updated proper revision for adblockpluscore. No dependency change for ui Created May 18, 2018, 3:27 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « dependencies ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 /** @module requestBlocker */ 18 /** @module requestBlocker */
19 19
20 "use strict"; 20 "use strict";
21 21
22 const {Filter, RegExpFilter, BlockingFilter} = 22 const {Filter, RegExpFilter, BlockingFilter} =
23 require("../adblockpluscore/lib/filterClasses"); 23 require("../adblockpluscore/lib/filterClasses");
24 const {Subscription} = require("../adblockpluscore/lib/subscriptionClasses"); 24 const {Subscription} = require("../adblockpluscore/lib/subscriptionClasses");
25 const {defaultMatcher} = require("../adblockpluscore/lib/matcher"); 25 const {defaultMatcher} = require("../adblockpluscore/lib/matcher");
26 const {FilterNotifier} = require("../adblockpluscore/lib/filterNotifier"); 26 const {FilterNotifier} = require("../adblockpluscore/lib/filterNotifier");
27 const {Prefs} = require("./prefs"); 27 const {Prefs} = require("./prefs");
28 const {checkWhitelisted, getKey} = require("./whitelisting"); 28 const {checkWhitelisted, getKey} = require("./whitelisting");
29 const {stringifyURL, extractHostFromFrame, isThirdParty} = require("./url"); 29 const {extractHostFromFrame, isThirdParty} = require("./url");
30 const {port} = require("./messaging"); 30 const {port} = require("./messaging");
31 const devtools = require("./devtools"); 31 const {logRequest: hitLoggerLogRequest} = require("./hitLogger");
32 32
33 const extensionProtocol = new URL(browser.extension.getURL("")).protocol; 33 const extensionProtocol = new URL(browser.extension.getURL("")).protocol;
34 34
35 // Chrome can't distinguish between OBJECT_SUBREQUEST and OBJECT requests. 35 // Chrome can't distinguish between OBJECT_SUBREQUEST and OBJECT requests.
36 if (!browser.webRequest.ResourceType || 36 if (!browser.webRequest.ResourceType ||
37 !("OBJECT_SUBREQUEST" in browser.webRequest.ResourceType)) 37 !("OBJECT_SUBREQUEST" in browser.webRequest.ResourceType))
38 { 38 {
39 RegExpFilter.typeMap.OBJECT_SUBREQUEST = RegExpFilter.typeMap.OBJECT; 39 RegExpFilter.typeMap.OBJECT_SUBREQUEST = RegExpFilter.typeMap.OBJECT;
40 } 40 }
41 41
(...skipping 21 matching lines...) Expand all
63 if (!(browser.webRequest.ResourceType)) 63 if (!(browser.webRequest.ResourceType))
64 return; 64 return;
65 65
66 for (let type in browser.webRequest.ResourceType) 66 for (let type in browser.webRequest.ResourceType)
67 yield resourceTypes.get(browser.webRequest.ResourceType[type]) || "OTHER"; 67 yield resourceTypes.get(browser.webRequest.ResourceType[type]) || "OTHER";
68 68
69 // WEBRTC gets addressed through a workaround, even if the webRequest API is 69 // WEBRTC gets addressed through a workaround, even if the webRequest API is
70 // lacking support to block this kind of a request. 70 // lacking support to block this kind of a request.
71 yield "WEBRTC"; 71 yield "WEBRTC";
72 72
73 // POPUP, CSP, REWRITE and ELEMHIDE filters aren't mapped to resource types. 73 // POPUP, CSP and ELEMHIDE filters aren't mapped to resource types.
74 yield "POPUP"; 74 yield "POPUP";
75 yield "ELEMHIDE"; 75 yield "ELEMHIDE";
76 yield "CSP"; 76 yield "CSP";
77 yield "REWRITE";
78 }()); 77 }());
79 78
80 function getDocumentInfo(page, frame, originUrl) 79 function getDocumentInfo(page, frame, originUrl)
81 { 80 {
82 return [ 81 return [
83 extractHostFromFrame(frame, originUrl), 82 extractHostFromFrame(frame, originUrl),
84 getKey(page, frame, originUrl), 83 getKey(page, frame, originUrl),
85 !!checkWhitelisted(page, frame, originUrl, 84 !!checkWhitelisted(page, frame, originUrl,
86 RegExpFilter.typeMap.GENERICBLOCK) 85 RegExpFilter.typeMap.GENERICBLOCK)
87 ]; 86 ];
88 } 87 }
89 88
90 function matchRequest(url, type, docDomain, sitekey, specificOnly) 89 function matchRequest(url, type, docDomain, sitekey, specificOnly)
91 { 90 {
92 let urlString = stringifyURL(url);
93 let thirdParty = isThirdParty(url, docDomain); 91 let thirdParty = isThirdParty(url, docDomain);
94 92 let filter = defaultMatcher.matchesAny(url.href, RegExpFilter.typeMap[type],
95 return [ 93 docDomain, thirdParty,
96 defaultMatcher.matchesAny(urlString, RegExpFilter.typeMap[type], 94 sitekey, specificOnly);
97 docDomain, thirdParty, sitekey, specificOnly), 95 return [filter, thirdParty];
98 urlString,
99 thirdParty
100 ];
101 } 96 }
102 97
103 function getRelatedTabIds(details) 98 function getRelatedTabIds(details)
104 { 99 {
105 // This is the common case, the request is associated with a single tab. 100 // This is the common case, the request is associated with a single tab.
106 // If tabId is -1, its not (e.g. the request was sent by 101 // If tabId is -1, its not (e.g. the request was sent by
107 // a Service/Shared Worker) and we have to identify the related tabs. 102 // a Service/Shared Worker) and we have to identify the related tabs.
108 if (details.tabId != -1) 103 if (details.tabId != -1)
109 return Promise.resolve([details.tabId]); 104 return Promise.resolve([details.tabId]);
110 105
111 let url; // Firefox provides "originUrl" indicating the 106 let url; // Firefox provides "originUrl" indicating the
112 if (details.originUrl) // URL of the tab that caused this request. 107 if (details.originUrl) // URL of the tab that caused this request.
113 url = details.originUrl; // In case of Service/Shared Worker, this is the 108 url = details.originUrl; // In case of Service/Shared Worker, this is the
114 // URL of the tab that caused the worker to spawn. 109 // URL of the tab that caused the worker to spawn.
115 110
116 else if (details.initiator) // Chromium >=63 provides "intiator" which 111 else if (details.initiator && details.initiator != "null")
117 url = details.initiator + "/*"; // is equivalent to "originUrl" on Firefox 112 url = details.initiator + "/*"; // Chromium >=63 provides "intiator" which
113 // is equivalent to "originUrl" on Firefox
118 // except that its not a full URL but just 114 // except that its not a full URL but just
119 // an origin (proto + host). 115 // an origin (proto + host).
120 else 116 else
121 return Promise.resolve([]); 117 return Promise.resolve([]);
122 118
123 return browser.tabs.query({url}).then(tabs => tabs.map(tab => tab.id)); 119 return browser.tabs.query({url}).then(tabs => tabs.map(tab => tab.id));
124 } 120 }
125 121
126 function logRequest(tabIds, url, type, docDomain, thirdParty, 122 function logRequest(tabIds, request, filter)
127 sitekey, specificOnly, filter)
128 { 123 {
129 if (filter) 124 if (filter)
130 FilterNotifier.emit("filter.hitCount", filter, 0, 0, tabIds); 125 FilterNotifier.emit("filter.hitCount", filter, 0, 0, tabIds);
131 126
132 devtools.logRequest( 127 hitLoggerLogRequest(tabIds, request, filter);
133 tabIds, url, type, docDomain,
134 thirdParty, sitekey,
135 specificOnly, filter
136 );
137 } 128 }
138 129
139 browser.webRequest.onBeforeRequest.addListener(details => 130 browser.webRequest.onBeforeRequest.addListener(details =>
140 { 131 {
141 // Never block top-level documents. 132 // Never block top-level documents.
142 if (details.type == "main_frame") 133 if (details.type == "main_frame")
143 return; 134 return;
144 135
145 // Filter out requests from non web protocols. Ideally, we'd explicitly 136 // Filter out requests from non web protocols. Ideally, we'd explicitly
146 // specify the protocols we are interested in (i.e. http://, https://, 137 // specify the protocols we are interested in (i.e. http://, https://,
147 // ws:// and wss://) with the url patterns, given below, when adding this 138 // ws:// and wss://) with the url patterns, given below, when adding this
148 // listener. But unfortunately, Chrome <=57 doesn't support the WebSocket 139 // listener. But unfortunately, Chrome <=57 doesn't support the WebSocket
149 // protocol and is causing an error if it is given. 140 // protocol and is causing an error if it is given.
150 let url = new URL(details.url); 141 let url = new URL(details.url);
151 if (url.protocol != "http:" && url.protocol != "https:" && 142 if (url.protocol != "http:" && url.protocol != "https:" &&
152 url.protocol != "ws:" && url.protocol != "wss:") 143 url.protocol != "ws:" && url.protocol != "wss:")
153 return; 144 return;
154 145
155 // Firefox provides us with the full origin URL, while Chromium (>=63) 146 // Firefox provides us with the full origin URL, while Chromium (>=63)
156 // provides only the protocol + host of the (top-level) document which 147 // provides only the protocol + host of the (top-level) document which
157 // the request originates from through the "initiator" property. 148 // the request originates from through the "initiator" property.
158 let originUrl = details.originUrl ? new URL(details.originUrl) : 149 let originUrl = null;
159 details.initiator ? new URL(details.initiator) : null; 150 if (details.originUrl)
151 originUrl = new URL(details.originUrl);
152 else if (details.initiator && details.initiator != "null")
153 originUrl = new URL(details.initiator);
160 154
161 // Ignore requests sent by extensions or by Firefox itself: 155 // Ignore requests sent by extensions or by Firefox itself:
162 // * Firefox intercepts requests sent by any extensions, indicated with 156 // * Firefox intercepts requests sent by any extensions, indicated with
163 // an "originURL" starting with "moz-extension:". 157 // an "originURL" starting with "moz-extension:".
164 // * Chromium intercepts requests sent by this extension only, indicated 158 // * Chromium intercepts requests sent by this extension only, indicated
165 // on Chromium >=63 with an "initiator" starting with "chrome-extension:". 159 // on Chromium >=63 with an "initiator" starting with "chrome-extension:".
166 // * On Firefox, requests that don't relate to any document or extension are 160 // * On Firefox, requests that don't relate to any document or extension are
167 // indicated with an "originUrl" starting with "chrome:". 161 // indicated with an "originUrl" starting with "chrome:".
168 if (originUrl && (originUrl.protocol == extensionProtocol || 162 if (originUrl && (originUrl.protocol == extensionProtocol ||
169 originUrl.protocol == "chrome:")) 163 originUrl.protocol == "chrome:"))
170 return; 164 return;
171 165
172 let page = new ext.Page({id: details.tabId}); 166 let page = new ext.Page({id: details.tabId});
173 let frame = ext.getFrame( 167 let frame = ext.getFrame(
174 details.tabId, 168 details.tabId,
175 // We are looking for the frame that contains the element which 169 // We are looking for the frame that contains the element which
176 // has triggered this request. For most requests (e.g. images) we 170 // has triggered this request. For most requests (e.g. images) we
177 // can just use the request's frame ID, but for subdocument requests 171 // can just use the request's frame ID, but for subdocument requests
178 // (e.g. iframes) we must instead use the request's parent frame ID. 172 // (e.g. iframes) we must instead use the request's parent frame ID.
179 details.type == "sub_frame" ? details.parentFrameId : details.frameId 173 details.type == "sub_frame" ? details.parentFrameId : details.frameId
180 ); 174 );
181 175
182 // On Chromium >= 63, if both the frame is unknown and we haven't get 176 // On Chromium >= 63, if both the frame is unknown and we haven't get
183 // an "initator", this implies a request sent by the browser itself 177 // an "initiator", this implies a request sent by the browser itself
184 // (on older versions of Chromium, due to the lack of "initator", 178 // (on older versions of Chromium, due to the lack of "initiator",
185 // this can also indicate a request sent by a Shared/Service Worker). 179 // this can also indicate a request sent by a Shared/Service Worker).
186 if (!frame && !originUrl) 180 if (!frame && !originUrl)
187 return; 181 return;
188 182
189 if (checkWhitelisted(page, frame, originUrl)) 183 if (checkWhitelisted(page, frame, originUrl))
190 return; 184 return;
191 185
192 let type = resourceTypes.get(details.type) || "OTHER"; 186 let type = resourceTypes.get(details.type) || "OTHER";
193 let [docDomain, sitekey, specificOnly] = getDocumentInfo(page, frame, 187 let [docDomain, sitekey, specificOnly] = getDocumentInfo(page, frame,
194 originUrl); 188 originUrl);
195 let [filter, urlString, thirdParty] = matchRequest(url, type, docDomain, 189 let [filter, thirdParty] = matchRequest(url, type, docDomain,
196 sitekey, specificOnly); 190 sitekey, specificOnly);
197 191
198 if (filter instanceof RegExpFilter && filter.rewrite) 192 let result;
199 { 193 let rewrittenUrl;
200 let matches = filter.regexp.exec(urlString); 194
201 if (matches) 195 if (filter instanceof BlockingFilter)
196 {
197 if (filter.rewrite)
202 { 198 {
203 let rewritten = filter.rewrite.replace("%1", matches[0]); 199 rewrittenUrl = filter.rewriteUrl(details.url);
hub 2018/04/24 20:39:05 I actually wonder if we should move the rewrite pa
hub 2018/04/25 03:39:08 moved this to core in the end.
204 if (rewritten != urlString) 200 // If no rewrite happened (error, different origin), we'll
201 // return undefined in order to avoid an "infinite" loop.
202 if (rewrittenUrl != details.url)
203 result = {redirectUrl: rewrittenUrl};
204 }
205 else
206 result = {cancel: true};
207 }
208
209 getRelatedTabIds(details).then(tabIds =>
210 {
211 logRequest(
212 tabIds,
205 { 213 {
206 getRelatedTabIds(details).then(tabIds => 214 url: details.url, type, docDomain, thirdParty,
207 { 215 sitekey, specificOnly, rewrittenUrl
208 logRequest(tabIds, urlString, "REWRITE", docDomain, 216 },
209 thirdParty, sitekey, specificOnly, filter); 217 filter
210 }); 218 );
211
212 return {redirectUrl: rewritten};
213 }
214 }
215 // we couldn't do the rewrite, so just let it through.
216 return;
217 }
218
219 getRelatedTabIds(details).then(tabIds =>
220 {
221 logRequest(tabIds, urlString, type, docDomain,
222 thirdParty, sitekey, specificOnly, filter);
223 }); 219 });
224 220
225 if (filter instanceof BlockingFilter) 221 return result;
226 return {cancel: true};
227 }, {urls: ["<all_urls>"]}, ["blocking"]); 222 }, {urls: ["<all_urls>"]}, ["blocking"]);
228 223
229 port.on("filters.collapse", (message, sender) => 224 port.on("filters.collapse", (message, sender) =>
230 { 225 {
231 let {page, frame} = sender; 226 let {page, frame} = sender;
232 227
233 if (checkWhitelisted(page, frame)) 228 if (checkWhitelisted(page, frame))
234 return false; 229 return false;
235 230
236 let blocked = false; 231 let blocked = false;
(...skipping 16 matching lines...) Expand all
253 return blocked && Prefs.hidePlaceholders; 248 return blocked && Prefs.hidePlaceholders;
254 }); 249 });
255 250
256 port.on("request.blockedByRTCWrapper", (msg, sender) => 251 port.on("request.blockedByRTCWrapper", (msg, sender) =>
257 { 252 {
258 let {page, frame} = sender; 253 let {page, frame} = sender;
259 254
260 if (checkWhitelisted(page, frame)) 255 if (checkWhitelisted(page, frame))
261 return false; 256 return false;
262 257
258 let {url} = msg;
263 let [docDomain, sitekey, specificOnly] = getDocumentInfo(page, frame); 259 let [docDomain, sitekey, specificOnly] = getDocumentInfo(page, frame);
264 let [filter, url, thirdParty] = matchRequest(new URL(msg.url), 260 let [filter, thirdParty] = matchRequest(new URL(url), "WEBRTC", docDomain,
265 "WEBRTC", docDomain, 261 sitekey, specificOnly);
266 sitekey, specificOnly); 262 logRequest(
267 263 [sender.page.id],
268 logRequest([sender.page.id], url, "WEBRTC", docDomain, 264 {url, type: "WEBRTC", docDomain, thirdParty, sitekey, specificOnly},
269 thirdParty, sitekey, specificOnly, filter); 265 filter
266 );
270 267
271 return filter instanceof BlockingFilter; 268 return filter instanceof BlockingFilter;
272 }); 269 });
273 270
274 let ignoreFilterNotifications = false; 271 let ignoreFilterNotifications = false;
275 let handlerBehaviorChangedQuota = 272 let handlerBehaviorChangedQuota =
276 browser.webRequest.MAX_HANDLER_BEHAVIOR_CHANGED_CALLS_PER_10_MINUTES; 273 browser.webRequest.MAX_HANDLER_BEHAVIOR_CHANGED_CALLS_PER_10_MINUTES;
277 274
278 function propagateHandlerBehaviorChange() 275 function propagateHandlerBehaviorChange()
279 { 276 {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
331 } 328 }
332 329
333 FilterNotifier.on("subscription.added", onFilterChange); 330 FilterNotifier.on("subscription.added", onFilterChange);
334 FilterNotifier.on("subscription.removed", onFilterChange); 331 FilterNotifier.on("subscription.removed", onFilterChange);
335 FilterNotifier.on("subscription.updated", onFilterChange); 332 FilterNotifier.on("subscription.updated", onFilterChange);
336 FilterNotifier.on("subscription.disabled", arg => onFilterChange(arg, true)); 333 FilterNotifier.on("subscription.disabled", arg => onFilterChange(arg, true));
337 FilterNotifier.on("filter.added", onFilterChange); 334 FilterNotifier.on("filter.added", onFilterChange);
338 FilterNotifier.on("filter.removed", onFilterChange); 335 FilterNotifier.on("filter.removed", onFilterChange);
339 FilterNotifier.on("filter.disabled", arg => onFilterChange(arg, true)); 336 FilterNotifier.on("filter.disabled", arg => onFilterChange(arg, true));
340 FilterNotifier.on("load", onFilterChange); 337 FilterNotifier.on("load", onFilterChange);
LEFTRIGHT
« dependencies ('k') | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld