Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/browser.js

Issue 10233013: Crawler, second version (Closed)
Patch Set: Created April 12, 2013, 1:38 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/bootstrap_xpcom.js ('k') | lib/client.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 let {Logger} = require( "logger" );
2 let {Action} = require( "action" );
3
4 //-------------------------------------------------------
5 // Tabbed_Browser
6 //-------------------------------------------------------
7 /**
8 * A single OS-level window of a multiple-tab Firefox browser. This is the objec t referred to by the global 'gBrowser'.
9 *
10 * @param {Window} window
11 * @param {Number} max_requests
12 * The maximum number of simultaneous requests this object may have.
13 * @constructor
14 */
15 var Tabbed_Browser = function( window, max_requests )
16 {
17 /**
18 * Browser window through which we access the global browser object.
19 * @type {Window}
20 */
21 this.window = window;
22
23 /**
24 * A browser object that can hold multiple individual tabbed browser panes.
25 */
26 this.tabbed_browser = this.window.gBrowser;
27 if ( !this.tabbed_browser )
28 {
29 throw new Error( "Tabbed_Browser: argument 'window' has null member 'gBrowse r'" );
30 }
31
32 /**
33 * The current number of pending requests in child tabs of this object.
34 * @type {Number}
35 */
36 this.n_requests = 0;
37
38 /**
39 * The maximum number of simultaneous requests this object may have.
40 * @type {Number}
41 */
42 this.max_requests = max_requests;
43
44 /**
45 * The heart of the dispatcher for both handling progress events and tracking block activity is this map from
46 * browser objects to Browser_Tab ones.
47 * @type {Map}
48 */
49 this.map_browser_to_child = new Map();
50
51
52 /**
53 * A transient set for allocated requests that have not started their load cyc le.
54 * @type {Set}
55 */
56 this.allocated_not_loaded = new Set();
57
58 this.listener = { onStateChange: this._progress.bind( this ) };
59 this.tabbed_browser.addTabsProgressListener( this.listener );
60
61 this.logger = new Logger( "Tabbed_Browser" );
62 };
63
64 /**
65 * Release resources held by this object. This includes event handlers. We also close all the child tabs, since they
66 * won't work right after our progress event handler is no longer registered.
67 */
68 Tabbed_Browser.prototype.close = function()
69 {
70 var log = this.logger.make_log( "close" );
71 log( "Tabbed_Browser.close", false );
72 if ( this.listener )
73 {
74 this.tabbed_browser.removeTabsProgressListener( this.listener );
75 this.listener = null;
76 }
77
78 let pair = null;
79 for ( pair of this.map_browser_to_child )
80 {
81 let [ key, value ] = pair;
82 value.child.close();
83 this.map_browser_to_child.delete( key );
84 }
85 };
86
87 /**
88 * Predicate "is there an open request slot?"
89 */
90 Tabbed_Browser.prototype.available = function()
91 {
92 return this.n_requests < this.max_requests;
93 };
94
95 /**
96 * Predicate: "Are there no open tabs?"
97 * @return {boolean}
98 */
99 Tabbed_Browser.prototype.quiescent = function()
100 {
101 return this.n_requests == 0;
102 };
103
104 /**
105 * @param {string} target
106 * @param {Boolean} leave_open
107 * Leave the tab open in the browser after closing the present object
108 * @param {function} finisher
109 * @param {function} catcher
110 */
111 Tabbed_Browser.prototype.make_tab = function( target, leave_open, finisher, catc her )
112 {
113 return new Browser_Tab( this, target, leave_open, finisher, catcher );
114 };
115
116 /**
117 * Request an allocation of available HTTP requests. Allocates one if available.
118 * <p/>
119 * HAZARD: This request is made when the asynchronous action is created, which i s strictly before it is launched. If
120 * the caller does not either launch the action or close it, there will be an in ternal resource leak here.
121 *
122 * @param child
123 * @return {Boolean}
124 */
125 Tabbed_Browser.prototype.request_load = function( child )
126 {
127 if ( !this.available() )
128 {
129 return false;
130 }
131 ++this.n_requests;
132 this.allocated_not_loaded.add( child );
133 return true;
134 };
135
136 /**
137 * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests.
138 *
139 * @param {Browser_Tab} child
140 */
141 Tabbed_Browser.prototype.notify_load_begin = function( child )
142 {
143 if ( this.allocated_not_loaded.has( child ) )
144 {
145 this.allocated_not_loaded.delete( child );
146 }
147 else
148 {
149 Cu.reportError( "notice_load_begin: child not found" );
150 throw "notice_load_begin: child not found";
151 }
152 let value = { child: child };
153 this.map_browser_to_child.set( child.browser, value );
154 };
155
156 /**
157 * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests.
158 * <p/>
159 * The child must only call this function once, since it acts as a resource deal locator, freeing up a request slot.
160 */
161 Tabbed_Browser.prototype.notify_load_end = function()
162 {
163 if ( this.n_requests <= 0 )
164 {
165 throw "Tabbed_Browser.notify_load_end: n_requests <= 0";
166 }
167 --this.n_requests;
168 };
169
170 /**
171 * Notification that a child tab is closing. We leave the tab present in our map of active children until the tab is
172 * closed. This allows us to handle events that occur after the document has loa ded, which typically arise from
173 * scripts on the page.
174 *
175 * @param child
176 */
177 Tabbed_Browser.prototype.notify_close = function( child )
178 {
179 if ( this.map_browser_to_child.has( child.browser ) )
180 {
181 this.map_browser_to_child.delete( child.browser );
182 }
183 else
184 {
185 // If we're getting this notice, it really should be in our map
186 Cu.reportError( "Child browser not found in map during 'notice_close()'" );
187 }
188 };
189
190 //noinspection JSUnusedLocalSymbols
191 /**
192 * Progress event handler. It looks only for STOP states on the present tab. Whe n that happens, it determines the
193 * success status and calls the landing function.
194 *
195 * @param {*} browser
196 * @param {nsIWebProgress} controller
197 * The control object for progress monitoring that dispatches the event.
198 * @param {nsIRequest} browse_request
199 * The request object generated by the called to addTab(), which loads a pa ge.
200 * @param state
201 * The progress state, represented as flags.
202 * @param stop_status
203 * Status code for success or failure if the argument state is a STOP state .
204 */
205 Tabbed_Browser.prototype._progress = function( browser, controller, browse_reque st, state, stop_status )
206 {
207 /*
208 * We only care about STOP states. We're not tracking redirects, which is one of the progress states possible.
209 * We may want to in the future, though, in case redirect behavior is involved with ad delivery in some way.
210 *
211 * As a point of warning, traces on these messages shows that the START messag e is delivered to the present
212 * function _before_ 'notify_load_begin' is called, which seems to mean that t he JS interpreter is doing something
213 * fishy, either using a second thread or dispatching during a function invoca tion or return. Regardless, this
214 * event come in before it's possible that 'map_browser_to_child' has the 'bro wser' element of a new tab as a key.
215 * Thus, a warning that trapping any other progress state here should happen o nly after thoroughly tracing the
216 * event sequence to determine the actual behavior.
217 */
218 //noinspection JSBitwiseOperatorUsage
219 if ( !(state & Ci.nsIWebProgressListener.STATE_STOP) )
220 return;
221
222 /*
223 * This handler receives events for all the tabs present in a tabbrowser eleme nt, even ones that we didn't
224 * add ourselves. It's not an error to receive such events.
225 */
226 if ( !this.map_browser_to_child.has( browser ) )
227 {
228 return;
229 }
230
231 var {child} = this.map_browser_to_child.get( browser );
232 child.progress_stopped( stop_status );
233
234 var log = this.logger.make_log( "_progress" );
235 log( "request name = " + browse_request.name, false );
236 };
237
238 //-------------------------------------------------------
239 // Browser_Tab
240 //-------------------------------------------------------
241 /**
242 * A single browser tab that can asynchronously load a web page.
243 *
244 * There's a small but significant conflation of two concerns in this class. The first is the browser tab as an entity,
245 * a member of some tab set. The second is the browser tab as the action of load ing a target site into the tab. The
246 * present implementation simply combines these two. The combination isn't quite complicated enough to make them worth
247 * separating at the present time. If, however, at some point there are multiple actions (different environments for
248 * loading, for example) on a single tab, it may be worth the effort to split th em out.
249 *
250 * As a asynchronous action, tab load always completes ordinarily unless there i s an internal exception. During
251 * development, exceptions occurred when there was a mismatch between this code and browser behavior. While that's
252 * (mostly?) over, browser behavior may yet change. We reserve exceptional compl etion to indicate this kind of problem.
253 *
254 * Ordinary completion of the tab load action, thus, incorporates both successfu l page loads as well as unsuccessful.
255 * Once the action has completed, the value of the action indicates the state of the page load.
256 * - Successful load. The page complete loading, as indicated by a progress lis tener signal.
257 * - Unsuccessful load. The progress listener stopped but with some error code rather than the success code.
258 * - User closed tab. The user presses the close button on the tab.
259 * - External cancelled load. The cancel() method was called. This is how the c rawler handles time-out.
260 *
261 * @constructor
262 * @extends {Action.Asynchronous_Action}
263 * @param {Tabbed_Browser} parent
264 * @param {string} target
265 * @param {boolean} [leave_open=false]
266 * Leave the tab open in the browser after closing the present object
267 * @param {function} finisher
268 * @param {function} catcher
269 */
270 var Browser_Tab = function( parent, target, leave_open, finisher, catcher )
271 {
272 Action.Asynchronous_Action.init.call( this, finisher, catcher );
273
274 /**
275 * The parent tabbed browser in whose tab set this tab is a member.
276 * @type {Tabbed_Browser}
277 */
278 this.parent = parent;
279
280 /**
281 * The target URL to browse to.
282 * @type {string}
283 */
284 this.target = target;
285
286 /**
287 * Leave the tab open in the browser after the crawler exits. The reason to do this is to allow manual inspection
288 * of the window as the crawler loaded it.
289 * <p/>
290 * It's necessary to call 'close()' on any instance of this object in order to ensure event handlers are released.
291 * This is true whether or not the tab remains open afterwards.
292 *
293 * @type {Boolean}
294 */
295 this.leave_open = (arguments.length >= 2) ? leave_open : false;
296
297 /**
298 * Guard flag for closing the object.
299 * @type {boolean}
300 */
301 this.closed = false;
302
303 /**
304 * A browser object that can hold multiple individual tabbed browser panes.
305 */
306 this.tabbed_browser = this.parent.tabbed_browser;
307
308 /**
309 * Our tab within the tabbed browser. This is the "external" view of browser p ane, the one that allows us to
310 * control loading. The tab must have a URL associated with it, so it's not di splayed at the outset
311 * <p/>
312 * FUTURE: Might it be useful to load the tab with a empty page but descriptiv e title at construction time?
313 */
314 this.tab = null;
315
316 /**
317 *
318 * @type {*}
319 */
320 this.browser = null;
321
322 /**
323 * Initialize the action value to a not-completed state, in case the action is aborted prematurely somehow.
324 */
325 this._argv = [ Browser_Tab.Completion_State.Not_Completed ];
326 };
327 Browser_Tab.prototype = new Action.Asynchronous_Action();
328
329 Browser_Tab.Completion_State = {
330 Not_Completed: 0,
331 Exception: 1,
332 Success: 2,
333 No_Success: 3,
334 User_Close: 4,
335 External_Cancel: 5
336 };
337
338 /**
339 * Close function destroys our allocated host resources, such as tabs, listeners , requests, etc.
340 */
341 Browser_Tab.prototype.close = function()
342 {
343 if ( this.closed )
344 return;
345
346 if ( this.tab )
347 {
348 this.tab.removeEventListener( "TabClose", this.tab_close_listener );
349 this.tab_close_listener = null;
350 if ( !this.leave_open )
351 {
352 this.tabbed_browser.removeTab( this.tab );
353 }
354 this.tab = null;
355 /*
356 * Kill the map from our associated browser to this object. This is the poin t at which we can no longer
357 * locate this object with a 'browser' or 'window' object.
358 */
359 this.parent.notify_close( this );
360 this.browser = null;
361 }
362 /*
363 * FUTURE: Cancel any pending page load here.
364 */
365 this.closed = true;
366 };
367
368 /**
369 * Show the tab by loading a URL target into it.
370 */
371 Browser_Tab.prototype._go = function()
372 {
373 if ( !this.parent.request_load( this ) )
374 {
375 // Should not reach. The caller should be calling available() on the Tabbed_ Browser first.
376 throw new Error( "Browser_Tab: may not launch when no Tabbed_Browser is avai lable." );
377 }
378 try
379 {
380 this.tab = this.tabbed_browser.addTab( this.target );
381 this.browser = this.tabbed_browser.getBrowserForTab( this.tab );
382 this.parent.notify_load_begin( this );
383 this.tab_close_listener = this._user_close_command.bind( this );
384 this.tab.addEventListener( "TabClose", this.tab_close_listener );
385 }
386 catch ( e )
387 {
388 this._argv = [ Browser_Tab.Completion_State.Exception, e ];
389 Cu.reportError( "Unexpected exception in Browser_Tab._go(): " + e.toString() );
390 this.end_badly( e );
391 }
392 };
393
394 Browser_Tab.prototype._end = function( argv )
395 {
396 /*
397 * This check ensures that we only call the finisher once. The browser can sen d multiple STOP events, for example,
398 * when the user focuses on a tab window by clicking on its tab. Since we set a final state below, checking for a
399 * final state ensures that we act idempotently.
400 *
401 * This check also forestalls a race condition where a request completes and s chedules a progress event while we are
402 * closing the object.
403 */
404 if ( this.completed )
405 return;
406 /*
407 * This notice back to the parent must happen after the check for being in a f inal state. Since multiple STOP
408 * events may arrive on a tab (they wouldn't be all for the original document) , we send this notice just once, which
409 * means that we need to examine the state in this Browser_Tab instance first.
410 */
411 this.parent.notify_load_end();
412 /*
413 * The value of the load action includes the action itself. Because load actio ns are processed in bulk, so the crawler
414 * needs a way of identifying this action when it lands. In order to do this, we prepend our own 'this' object to
415 * the value array.
416 */
417 this._argv = argv;
418 this._argv.unshift( this );
419 this.end_well();
420 };
421
422 /**
423 * Stop event handler. It receives only STOP events on the present tab. When tha t happens, it determines the
424 * success status and calls the landing function.
425 *
426 * Note: This function is also called when the user closes a tab manually.
427 *
428 * @param stop_status
429 * Status code for success or failure if the argument state is a STOP state .
430 */
431 Browser_Tab.prototype.progress_stopped = function( stop_status )
432 {
433 if ( stop_status == 0 )
434 {
435 var argv = [ Browser_Tab.Completion_State.Success ];
436 }
437 else
438 {
439 /**
440 * This argument is an XPCOM 'nsresult' value. It could be examined if the c ause of the failure to load needs
441 * to be diagnosed. For example, NS_ERROR_OFFLINE would be useful for suspen ding operation of the crawler while
442 * internet connectivity comes back. NS_ERROR_MALFORMED_URI would be useful for notifing the user of a typo.
443 */
444 argv = [ Browser_Tab.Completion_State.No_Success, stop_status ];
445 }
446 this._end( argv );
447 };
448
449 /**
450 * Event handler when the tab is closed by user gesture. This might or might not interrupt a pending transfer.
451 *
452 * @private
453 */
454 Browser_Tab.prototype._user_close_command = function()
455 {
456 this._end( [ Browser_Tab.Completion_State.User_Close ] );
457 };
458
459 /**
460 * External command to stop loading. Used to implement time-out.
461 */
462 Browser_Tab.prototype.stop = function()
463 {
464 this._end( [ Browser_Tab.Completion_State.External_Cancel ] );
465 };
466
467 exports.Tabbed_Browser = Tabbed_Browser;
468 exports.Browser_Tab = Browser_Tab;
OLDNEW
« no previous file with comments | « lib/bootstrap_xpcom.js ('k') | lib/client.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld