Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Patch Set: add missed space Created April 7, 2016, 8:35 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
14 Cu.import("resource://gre/modules/Timer.jsm");
14 15
15 function abprequire(module) 16 function abprequire(module)
16 { 17 {
17 let result = {}; 18 let result = {};
18 result.wrappedJSObject = result; 19 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 20 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 21 return result.exports;
21 } 22 }
22 23
23 let {RequestNotifier} = abprequire("requestNotifier"); 24 let {RequestNotifier} = abprequire("requestNotifier");
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
103 browser.removeTab(tab); 104 browser.removeTab(tab);
104 105
105 if (this._resolvers.length) 106 if (this._resolvers.length)
106 this._resolvers.shift()(createTab(this._browser)); 107 this._resolvers.shift()(createTab(this._browser));
107 else 108 else
108 this._tabs--; 109 this._tabs--;
109 } 110 }
110 }; 111 };
111 112
112 /** 113 /**
113 * Observes page loads in a particular tabbed browser.
114 *
115 * @param {tabbrowser} browser
116 * The tabbed browser to be observed
117 * @param {int} timeout
118 * Load timeout in milliseconds
119 * @constructor
120 */
121 function LoadListener(browser, timeout)
122 {
123 this._browser = browser;
124 this._deferred = new Map();
125 this._timeout = timeout;
126 browser.addTabsProgressListener(this);
127 }
128 LoadListener.prototype = {
129 /**
130 * Returns a promise that will be resolved when the page in the specified tab
131 * finishes loading. Loading will be stopped if the timeout is reached.
132 *
133 * @param {tab} tab
134 * @result {Promise}
135 */
136 waitForLoad: function(tab)
137 {
138 let deferred = Promise.defer();
139 this._deferred.set(tab.linkedBrowser, deferred);
140
141 tab.ownerDocument.defaultView.setTimeout(function()
142 {
143 tab.linkedBrowser.stop();
144 }, this._timeout);
145
146 return deferred.promise;
147 },
148
149 /**
150 * Deactivates this object.
151 */
152 stop: function()
153 {
154 this._browser.removeTabsProgressListener(this);
155 },
156
157 onStateChange: function(browser, progress, request, flags, status)
158 {
159 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg ressListener.STATE_IS_WINDOW))
160 {
161 let deferred = this._deferred.get(browser);
162 if (deferred)
163 {
164 this._deferred.delete(browser);
165
166 let headers = [];
167 if (request instanceof Ci.nsIHttpChannel)
168 {
169 try
170 {
171 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re sponseStatusText);
172 request.visitResponseHeaders((header, value) => headers.push(header + ": " + value));
173 }
174 catch (e)
175 {
176 // Exceptions are expected here
177 }
178 }
179 deferred.resolve([status, headers]);
180 }
181 }
182 }
183 };
184
185 /**
186 * Once created, this object will make sure all new windows are dismissed 114 * Once created, this object will make sure all new windows are dismissed
187 * immediately. 115 * immediately.
188 * 116 *
189 * @constructor 117 * @constructor
190 */ 118 */
191 function WindowCloser() 119 function WindowCloser()
192 { 120 {
193 Services.obs.addObserver(this, "xul-window-registered", true) 121 Services.obs.addObserver(this, "xul-window-registered", true)
194 } 122 }
195 WindowCloser.prototype = { 123 WindowCloser.prototype = {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 * @param {int} maxtabs 197 * @param {int} maxtabs
270 * Maximum number of tabs to be opened 198 * Maximum number of tabs to be opened
271 * @param {String} targetURL 199 * @param {String} targetURL
272 * URL that should receive the results 200 * URL that should receive the results
273 * @param {Function} onDone 201 * @param {Function} onDone
274 * The callback which is called after finishing of all tasks. 202 * The callback which is called after finishing of all tasks.
275 */ 203 */
276 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) 204 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)
277 { 205 {
278 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); 206 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
279 let loadListener = new LoadListener(window.getBrowser(), timeout); 207
280 let running = 0; 208 let running = 0;
281 let windowCloser = new WindowCloser(); 209 let windowCloser = new WindowCloser();
282 let taskDone = function() 210 let taskDone = function()
283 { 211 {
284 running--; 212 running--;
285 if (running <= 0) 213 if (running <= 0)
286 { 214 {
287 loadListener.stop();
288 windowCloser.stop(); 215 windowCloser.stop();
289 onDone(); 216 onDone();
290 } 217 }
291 }; 218 };
292 219
293 for (let url of urls) 220 for (let url of urls)
294 { 221 {
295 running++; 222 running++;
296 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) 223 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r esult)
297 { 224 {
298 let request = new XMLHttpRequest(); 225 let request = new XMLHttpRequest();
299 request.open("POST", targetURL); 226 request.open("POST", targetURL);
300 request.addEventListener("load", taskDone, false); 227 request.addEventListener("load", taskDone, false);
301 request.addEventListener("error", taskDone, false); 228 request.addEventListener("error", taskDone, false);
302 request.send(JSON.stringify(result)); 229 request.send(JSON.stringify(result));
303 }, function(url, exception) 230 }, function(url, exception)
304 { 231 {
305 reportException(exception); 232 reportException(exception);
306 233
307 let request = new XMLHttpRequest(); 234 let request = new XMLHttpRequest();
308 request.open("POST", targetURL); 235 request.open("POST", targetURL);
309 request.addEventListener("load", taskDone, false); 236 request.addEventListener("load", taskDone, false);
310 request.addEventListener("error", taskDone, false); 237 request.addEventListener("error", taskDone, false);
311 request.send(JSON.stringify({ 238 request.send(JSON.stringify({
312 url: url, 239 url: url,
313 startTime: Date.now(), 240 startTime: Date.now(),
314 error: String(exception) 241 error: String(exception)
315 })); 242 }));
316 }.bind(null, url)); 243 }.bind(null, url));
317 } 244 }
318 } 245 }
319 246
320 /** 247 /**
248 * Expects to receive page info gathered in a content process for the specified
249 * `tab`. If there is no relevant message within specified `timeout` then
250 * the result promise is resolve with error object.
251 * @param tab
252 * Tab in which we are interested in
253 * @param {int} timeout
254 * Timeout in milliseconds
255 * @return {Promise} promise which will be resolved with the received page info
256 */
257 function getPageInfo(tab, timeout)
258 {
259 return new Promise((resolve, result) =>
260 {
261 let mm = tab.linkedBrowser.messageManager;
262 let timerID;
263 let onDone = (pageInfo) =>
264 {
265 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
266 clearTimeout(timerID);
267 resolve(pageInfo);
268 }
269 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));;
270 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout);
271 });
272 }
273
274 /**
321 * Crawls a URL. This is a generator meant to be used via a Task object. 275 * Crawls a URL. This is a generator meant to be used via a Task object.
322 * 276 *
323 * @param {String} url 277 * @param {String} url
324 * @param {TabAllocator} tabAllocator 278 * @param {TabAllocator} tabAllocator
325 * @param {loadListener} loadListener 279 * @param {int} timeout
280 * Load timeout in milliseconds
326 * @result {Object} 281 * @result {Object}
327 * Crawling result 282 * Crawling result
328 */ 283 */
329 function* crawl_url(url, tabAllocator, loadListener) 284 function* crawl_url(url, tabAllocator, timeout)
330 { 285 {
331 let tab = yield tabAllocator.getTab(); 286 let tab = yield tabAllocator.getTab();
332 let result = {url, requests: []}; 287 let result = {url, requests: []};
333 let requestNotifier; 288 let requestNotifier;
334 try 289 try
335 { 290 {
336 result.startTime = Date.now(); 291 result.startTime = Date.now();
337 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, 292 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID,
338 function(entry, scanComplete) 293 function(entry, scanComplete)
339 { 294 {
340 if (!entry) 295 if (!entry)
341 return; 296 return;
342 let {type: contentType, location, filter} = entry; 297 let {type: contentType, location, filter} = entry;
343 result.requests.push({location, contentType, filter}); 298 result.requests.push({location, contentType, filter});
344 }); 299 });
345 300
346 tab.linkedBrowser.loadURI(url, null, null); 301 tab.linkedBrowser.loadURI(url, null, null);
347 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; 302
303 result.finalUrl = tab.linkedBrowser.currentURI.spec;
304 Object.assign(result, yield getPageInfo(tab, timeout));
348 result.endTime = Date.now(); 305 result.endTime = Date.now();
349 result.finalUrl = tab.linkedBrowser.currentURI.spec;
350
351 let document = tab.linkedBrowser.contentDocument;
352 if (document.documentElement)
353 {
354 try
355 {
356 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c anvas");
357 canvas.width = document.documentElement.scrollWidth;
358 canvas.height = document.documentElement.scrollHeight;
359
360 let context = canvas.getContext("2d");
361 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig ht, "rgb(255, 255, 255)");
362 result.screenshot = canvas.toDataURL("image/jpeg", 0.8);
363 }
364 catch (e)
365 {
366 reportException(e);
367 result.error = "Capturing screenshot failed: " + e;
368 }
369
370 // TODO: Capture frames as well?
371 let serializer = new tab.ownerDocument.defaultView.XMLSerializer();
372 result.source = serializer.serializeToString(document.documentElement);
373 }
374 } 306 }
375 finally 307 finally
376 { 308 {
377 if (requestNotifier) 309 if (requestNotifier)
378 requestNotifier.shutdown(); 310 requestNotifier.shutdown();
379 tabAllocator.releaseTab(tab); 311 tabAllocator.releaseTab(tab);
380 } 312 }
381 return result; 313 return result;
382 } 314 }
383 315
384 function reportException(e) 316 function reportException(e)
385 { 317 {
386 let stack = ""; 318 let stack = "";
387 if (e && typeof e == "object" && "stack" in e) 319 if (e && typeof e == "object" && "stack" in e)
388 stack = e.stack + "\n"; 320 stack = e.stack + "\n";
389 321
390 Cu.reportError(e); 322 Cu.reportError(e);
391 dump(e + "\n" + stack + "\n"); 323 dump(e + "\n" + stack + "\n");
392 } 324 }
325
326 let {addonRoot} = require("info");
327 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
328 let globalMessageManager = Services.mm;
329 globalMessageManager.loadFrameScript(frameScriptPath, true);
330
331 onShutdown.add(() =>
332 {
333 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
334 });
OLDNEW
« no previous file with comments | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld