Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Patch Set: rebase on #3815 and address some trivial comments Created March 16, 2016, 2:41 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
14 Cu.import("resource://gre/modules/Timer.jsm");
14 15
15 function abprequire(module) 16 function abprequire(module)
16 { 17 {
17 let result = {}; 18 let result = {};
18 result.wrappedJSObject = result; 19 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 20 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 21 return result.exports;
21 } 22 }
22 23
23 let {RequestNotifier} = abprequire("requestNotifier"); 24 let {RequestNotifier} = abprequire("requestNotifier");
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 browser.removeTab(tab); 107 browser.removeTab(tab);
107 108
108 if (this._resolvers.length) 109 if (this._resolvers.length)
109 this._resolvers.shift()(createTab(this._browser)); 110 this._resolvers.shift()(createTab(this._browser));
110 else 111 else
111 this._tabs--; 112 this._tabs--;
112 } 113 }
113 }; 114 };
114 115
115 /** 116 /**
116 * Observes page loads in a particular tabbed browser.
117 *
118 * @param {tabbrowser} browser
119 * The tabbed browser to be observed
120 * @param {int} timeout
121 * Load timeout in milliseconds
122 * @constructor
123 */
124 function LoadListener(browser, timeout)
125 {
126 this._browser = browser;
127 this._deferred = new Map();
128 this._timeout = timeout;
129 browser.addTabsProgressListener(this);
130 }
131 LoadListener.prototype = {
132 /**
133 * Returns a promise that will be resolved when the page in the specified tab
134 * finishes loading. Loading will be stopped if the timeout is reached.
135 *
136 * @param {tab} tab
137 * @result {Promise}
138 */
139 waitForLoad: function(tab)
140 {
141 let deferred = Promise.defer();
142 this._deferred.set(tab.linkedBrowser, deferred);
143
144 tab.ownerDocument.defaultView.setTimeout(function()
145 {
146 tab.linkedBrowser.stop();
147 }, this._timeout);
148
149 return deferred.promise;
150 },
151
152 /**
153 * Deactivates this object.
154 */
155 stop: function()
156 {
157 this._browser.removeTabsProgressListener(this);
158 },
159
160 onStateChange: function(browser, progress, request, flags, status)
161 {
162 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg ressListener.STATE_IS_WINDOW))
163 {
164 let deferred = this._deferred.get(browser);
165 if (deferred)
166 {
167 this._deferred.delete(browser);
168
169 let headers = [];
170 if (request instanceof Ci.nsIHttpChannel)
171 {
172 try
173 {
174 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re sponseStatusText);
175 request.visitResponseHeaders((header, value) => headers.push(header + ": " + value));
176 }
177 catch (e)
178 {
179 // Exceptions are expected here
180 }
181 }
182 deferred.resolve([status, headers]);
183 }
184 }
185 }
186 };
187
188 /**
189 * Once created, this object will make sure all new windows are dismissed 117 * Once created, this object will make sure all new windows are dismissed
190 * immediately. 118 * immediately.
191 * 119 *
192 * @constructor 120 * @constructor
193 */ 121 */
194 function WindowCloser() 122 function WindowCloser()
195 { 123 {
196 Services.obs.addObserver(this, "xul-window-registered", true) 124 Services.obs.addObserver(this, "xul-window-registered", true)
197 } 125 }
198 WindowCloser.prototype = { 126 WindowCloser.prototype = {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
272 * @param {int} maxtabs 200 * @param {int} maxtabs
273 * Maximum number of tabs to be opened 201 * Maximum number of tabs to be opened
274 * @param {String} targetURL 202 * @param {String} targetURL
275 * URL that should receive the results 203 * URL that should receive the results
276 * @param {Function} onDone 204 * @param {Function} onDone
277 * The callback which is called after finishing of all tasks. 205 * The callback which is called after finishing of all tasks.
278 */ 206 */
279 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) 207 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)
280 { 208 {
281 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); 209 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
282 let loadListener = new LoadListener(window.getBrowser(), timeout); 210
283 let running = 0; 211 let running = 0;
284 let windowCloser = new WindowCloser(); 212 let windowCloser = new WindowCloser();
285 let taskDone = function() 213 let taskDone = function()
286 { 214 {
287 running--; 215 running--;
288 if (running <= 0) 216 if (running <= 0)
289 { 217 {
290 loadListener.stop();
291 windowCloser.stop(); 218 windowCloser.stop();
292 onDone(); 219 onDone();
293 } 220 }
294 }; 221 };
295 222
296 for (let url of urls) 223 for (let url of urls)
297 { 224 {
298 running++; 225 running++;
299 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) 226 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r esult)
300 { 227 {
301 let request = new XMLHttpRequest(); 228 let request = new XMLHttpRequest();
302 request.open("POST", targetURL); 229 request.open("POST", targetURL);
303 request.addEventListener("load", taskDone, false); 230 request.addEventListener("load", taskDone, false);
304 request.addEventListener("error", taskDone, false); 231 request.addEventListener("error", taskDone, false);
305 request.send(JSON.stringify(result)); 232 request.send(JSON.stringify(result));
306 }, function(url, exception) 233 }, function(url, exception)
307 { 234 {
308 reportException(exception); 235 reportException(exception);
309 236
310 let request = new XMLHttpRequest(); 237 let request = new XMLHttpRequest();
311 request.open("POST", targetURL); 238 request.open("POST", targetURL);
312 request.addEventListener("load", taskDone, false); 239 request.addEventListener("load", taskDone, false);
313 request.addEventListener("error", taskDone, false); 240 request.addEventListener("error", taskDone, false);
314 request.send(JSON.stringify({ 241 request.send(JSON.stringify({
315 url: url, 242 url: url,
316 startTime: Date.now(), 243 startTime: Date.now(),
317 error: String(exception) 244 error: String(exception)
318 })); 245 }));
319 }.bind(null, url)); 246 }.bind(null, url));
320 } 247 }
321 } 248 }
322 249
323 /** 250 /**
251 * Expects to receive page info gathered in a content process for the specified
252 * `tab`. If there is no relevant message within specified `timeout` then
253 * the result promise is resolve with error object.
254 * @param tab
255 * Tab in which we are interested in
256 * @param {int} timeout
257 * Timeout in milliseconds
258 * @return {Promise} promise which will be resolved with the received page info
259 */
260 function getPageInfo(tab, timeout)
261 {
262 return new Promise((resolve, result) =>
263 {
264 let mm = tab.linkedBrowser.messageManager;
265 let timerID;
266 let onDone = (pageInfo) =>
267 {
268 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
269 clearTimeout(timerID);
270 resolve(pageInfo);
271 }
272 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));;
273 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout);
274 });
275 }
276
277 /**
324 * Crawls a URL. This is a generator meant to be used via a Task object. 278 * Crawls a URL. This is a generator meant to be used via a Task object.
325 * 279 *
326 * @param {String} url 280 * @param {String} url
327 * @param {TabAllocator} tabAllocator 281 * @param {TabAllocator} tabAllocator
328 * @param {loadListener} loadListener 282 * @param {int} timeout
283 * Load timeout in milliseconds
329 * @result {Object} 284 * @result {Object}
330 * Crawling result 285 * Crawling result
331 */ 286 */
332 function* crawl_url(url, tabAllocator, loadListener) 287 function* crawl_url(url, tabAllocator, timeout)
333 { 288 {
334 let tab = yield tabAllocator.getTab(); 289 let tab = yield tabAllocator.getTab();
335 let result = {url, requests: []}; 290 let result = {url, requests: []};
336 let requestNotifier; 291 let requestNotifier;
337 try 292 try
338 { 293 {
339 result.startTime = Date.now(); 294 result.startTime = Date.now();
340 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, 295 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID,
341 function(entry, scanComplete) 296 function(entry, scanComplete)
342 { 297 {
343 if (!entry) 298 if (!entry)
344 return; 299 return;
345 let {type: contentType, location, filter} = entry; 300 let {type: contentType, location, filter} = entry;
346 result.requests.push({location, contentType, filter}); 301 result.requests.push({location, contentType, filter});
347 }); 302 });
348 303
349 tab.linkedBrowser.loadURI(url, null, null); 304 tab.linkedBrowser.loadURI(url, null, null);
350 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; 305
306 result.finalUrl = tab.linkedBrowser.currentURI.spec;
307 Object.assign(result, yield getPageInfo(tab, timeout));
351 result.endTime = Date.now(); 308 result.endTime = Date.now();
352 result.finalUrl = tab.linkedBrowser.currentURI.spec;
353
354 let document = tab.linkedBrowser.contentDocument;
355 if (document.documentElement)
356 {
357 try
358 {
359 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c anvas");
360 canvas.width = document.documentElement.scrollWidth;
361 canvas.height = document.documentElement.scrollHeight;
362
363 let context = canvas.getContext("2d");
364 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig ht, "rgb(255, 255, 255)");
365 result.screenshot = canvas.toDataURL("image/jpeg", 0.8);
366 }
367 catch (e)
368 {
369 reportException(e);
370 result.error = "Capturing screenshot failed: " + e;
371 }
372
373 // TODO: Capture frames as well?
374 let serializer = new tab.ownerDocument.defaultView.XMLSerializer();
375 result.source = serializer.serializeToString(document.documentElement);
376 }
377 } 309 }
378 finally 310 finally
379 { 311 {
380 if (requestNotifier) 312 if (requestNotifier)
381 requestNotifier.shutdown(); 313 requestNotifier.shutdown();
382 tabAllocator.releaseTab(tab); 314 tabAllocator.releaseTab(tab);
383 } 315 }
384 return result; 316 return result;
385 } 317 }
386 318
387 function reportException(e) 319 function reportException(e)
388 { 320 {
389 let stack = ""; 321 let stack = "";
390 if (e && typeof e == "object" && "stack" in e) 322 if (e && typeof e == "object" && "stack" in e)
391 stack = e.stack + "\n"; 323 stack = e.stack + "\n";
392 324
393 Cu.reportError(e); 325 Cu.reportError(e);
394 dump(e + "\n" + stack + "\n"); 326 dump(e + "\n" + stack + "\n");
395 } 327 }
328
329 let {addonRoot} = require("info");
330 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
331 let globalMessageManager = Services.mm;
332 globalMessageManager.loadFrameScript(frameScriptPath, true);
333
334 onShutdown.add(() =>
335 {
336 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
337 });
OLDNEW
« no previous file with comments | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld