| Index: lib/browser.js | 
| =================================================================== | 
| new file mode 100644 | 
| --- /dev/null | 
| +++ b/lib/browser.js | 
| @@ -0,0 +1,468 @@ | 
| +let {Logger} = require( "logger" ); | 
| +let {Action} = require( "action" ); | 
| + | 
| +//------------------------------------------------------- | 
| +// Tabbed_Browser | 
| +//------------------------------------------------------- | 
| +/** | 
| + * A single OS-level window of a multiple-tab Firefox browser. This is the object referred to by the global 'gBrowser'. | 
| + * | 
| + * @param {Window} window | 
| + * @param {Number} max_requests | 
| + *      The maximum number of simultaneous requests this object may have. | 
| + * @constructor | 
| + */ | 
| +var Tabbed_Browser = function( window, max_requests ) | 
| +{ | 
| +  /** | 
| +   * Browser window through which we access the global browser object. | 
| +   * @type {Window} | 
| +   */ | 
| +  this.window = window; | 
| + | 
| +  /** | 
| +   * A browser object that can hold multiple individual tabbed browser panes. | 
| +   */ | 
| +  this.tabbed_browser = this.window.gBrowser; | 
| +  if ( !this.tabbed_browser ) | 
| +  { | 
| +    throw new Error( "Tabbed_Browser: argument 'window' has null member 'gBrowser'" ); | 
| +  } | 
| + | 
| +  /** | 
| +   * The current number of pending requests in child tabs of this object. | 
| +   * @type {Number} | 
| +   */ | 
| +  this.n_requests = 0; | 
| + | 
| +  /** | 
| +   * The maximum number of simultaneous requests this object may have. | 
| +   * @type {Number} | 
| +   */ | 
| +  this.max_requests = max_requests; | 
| + | 
| +  /** | 
| +   * The heart of the dispatcher for both handling progress events and tracking block activity is this map from | 
| +   * browser objects to Browser_Tab ones. | 
| +   * @type {Map} | 
| +   */ | 
| +  this.map_browser_to_child = new Map(); | 
| + | 
| + | 
| +  /** | 
| +   * A transient set for allocated requests that have not started their load cycle. | 
| +   * @type {Set} | 
| +   */ | 
| +  this.allocated_not_loaded = new Set(); | 
| + | 
| +  this.listener = { onStateChange: this._progress.bind( this ) }; | 
| +  this.tabbed_browser.addTabsProgressListener( this.listener ); | 
| + | 
| +  this.logger = new Logger( "Tabbed_Browser" ); | 
| +}; | 
| + | 
| +/** | 
| + * Release resources held by this object. This includes event handlers. We also close all the child tabs, since they | 
| + * won't work right after our progress event handler is no longer registered. | 
| + */ | 
| +Tabbed_Browser.prototype.close = function() | 
| +{ | 
| +  var log = this.logger.make_log( "close" ); | 
| +  log( "Tabbed_Browser.close", false ); | 
| +  if ( this.listener ) | 
| +  { | 
| +    this.tabbed_browser.removeTabsProgressListener( this.listener ); | 
| +    this.listener = null; | 
| +  } | 
| + | 
| +  let pair = null; | 
| +  for ( pair of this.map_browser_to_child ) | 
| +  { | 
| +    let [ key, value ] = pair; | 
| +    value.child.close(); | 
| +    this.map_browser_to_child.delete( key ); | 
| +  } | 
| +}; | 
| + | 
| +/** | 
| + * Predicate "is there an open request slot?" | 
| + */ | 
| +Tabbed_Browser.prototype.available = function() | 
| +{ | 
| +  return this.n_requests < this.max_requests; | 
| +}; | 
| + | 
| +/** | 
| + * Predicate: "Are there no open tabs?" | 
| + * @return {boolean} | 
| + */ | 
| +Tabbed_Browser.prototype.quiescent = function() | 
| +{ | 
| +  return this.n_requests == 0; | 
| +}; | 
| + | 
| +/** | 
| + * @param {string} target | 
| + * @param {Boolean} leave_open | 
| + *      Leave the tab open in the browser after closing the present object | 
| + * @param {function} finisher | 
| + * @param {function} catcher | 
| + */ | 
| +Tabbed_Browser.prototype.make_tab = function( target, leave_open, finisher, catcher ) | 
| +{ | 
| +  return new Browser_Tab( this, target, leave_open, finisher, catcher ); | 
| +}; | 
| + | 
| +/** | 
| + * Request an allocation of available HTTP requests. Allocates one if available. | 
| + * <p/> | 
| + * HAZARD: This request is made when the asynchronous action is created, which is strictly before it is launched. If | 
| + * the caller does not either launch the action or close it, there will be an internal resource leak here. | 
| + * | 
| + * @param child | 
| + * @return {Boolean} | 
| + */ | 
| +Tabbed_Browser.prototype.request_load = function( child ) | 
| +{ | 
| +  if ( !this.available() ) | 
| +  { | 
| +    return false; | 
| +  } | 
| +  ++this.n_requests; | 
| +  this.allocated_not_loaded.add( child ); | 
| +  return true; | 
| +}; | 
| + | 
| +/** | 
| + * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests. | 
| + * | 
| + * @param {Browser_Tab} child | 
| + */ | 
| +Tabbed_Browser.prototype.notify_load_begin = function( child ) | 
| +{ | 
| +  if ( this.allocated_not_loaded.has( child ) ) | 
| +  { | 
| +    this.allocated_not_loaded.delete( child ); | 
| +  } | 
| +  else | 
| +  { | 
| +    Cu.reportError( "notice_load_begin: child not found" ); | 
| +    throw "notice_load_begin: child not found"; | 
| +  } | 
| +  let value = { child: child }; | 
| +  this.map_browser_to_child.set( child.browser, value ); | 
| +}; | 
| + | 
| +/** | 
| + * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests. | 
| + * <p/> | 
| + * The child must only call this function once, since it acts as a resource deallocator, freeing up a request slot. | 
| + */ | 
| +Tabbed_Browser.prototype.notify_load_end = function() | 
| +{ | 
| +  if ( this.n_requests <= 0 ) | 
| +  { | 
| +    throw "Tabbed_Browser.notify_load_end: n_requests <= 0"; | 
| +  } | 
| +  --this.n_requests; | 
| +}; | 
| + | 
| +/** | 
| + * Notification that a child tab is closing. We leave the tab present in our map of active children until the tab is | 
| + * closed. This allows us to handle events that occur after the document has loaded, which typically arise from | 
| + * scripts on the page. | 
| + * | 
| + * @param child | 
| + */ | 
| +Tabbed_Browser.prototype.notify_close = function( child ) | 
| +{ | 
| +  if ( this.map_browser_to_child.has( child.browser ) ) | 
| +  { | 
| +    this.map_browser_to_child.delete( child.browser ); | 
| +  } | 
| +  else | 
| +  { | 
| +    // If we're getting this notice, it really should be in our map | 
| +    Cu.reportError( "Child browser not found in map during 'notice_close()'" ); | 
| +  } | 
| +}; | 
| + | 
| +//noinspection JSUnusedLocalSymbols | 
| +/** | 
| + * Progress event handler. It looks only for STOP states on the present tab. When that happens, it determines the | 
| + * success status and calls the landing function. | 
| + * | 
| + * @param {*} browser | 
| + * @param {nsIWebProgress} controller | 
| + *      The control object for progress monitoring that dispatches the event. | 
| + * @param {nsIRequest} browse_request | 
| + *      The request object generated by the called to addTab(), which loads a page. | 
| + * @param state | 
| + *      The progress state, represented as flags. | 
| + * @param stop_status | 
| + *      Status code for success or failure if the argument state is a STOP state. | 
| + */ | 
| +Tabbed_Browser.prototype._progress = function( browser, controller, browse_request, state, stop_status ) | 
| +{ | 
| +  /* | 
| +   * We only care about STOP states. We're not tracking redirects, which is one of the progress states possible. | 
| +   * We may want to in the future, though, in case redirect behavior is involved with ad delivery in some way. | 
| +   * | 
| +   * As a point of warning, traces on these messages shows that the START message is delivered to the present | 
| +   * function _before_ 'notify_load_begin' is called, which seems to mean that the JS interpreter is doing something | 
| +   * fishy, either using a second thread or dispatching during a function invocation or return. Regardless, this | 
| +   * event come in before it's possible that 'map_browser_to_child' has the 'browser' element of a new tab as a key. | 
| +   * Thus, a warning that trapping any other progress state here should happen only after thoroughly tracing the | 
| +   * event sequence to determine the actual behavior. | 
| +   */ | 
| +  //noinspection JSBitwiseOperatorUsage | 
| +  if ( !(state & Ci.nsIWebProgressListener.STATE_STOP) ) | 
| +    return; | 
| + | 
| +  /* | 
| +   * This handler receives events for all the tabs present in a tabbrowser element, even ones that we didn't | 
| +   * add ourselves. It's not an error to receive such events. | 
| +   */ | 
| +  if ( !this.map_browser_to_child.has( browser ) ) | 
| +  { | 
| +    return; | 
| +  } | 
| + | 
| +  var {child} = this.map_browser_to_child.get( browser ); | 
| +  child.progress_stopped( stop_status ); | 
| + | 
| +  var log = this.logger.make_log( "_progress" ); | 
| +  log( "request name = " + browse_request.name, false ); | 
| +}; | 
| + | 
| +//------------------------------------------------------- | 
| +// Browser_Tab | 
| +//------------------------------------------------------- | 
| +/** | 
| + * A single browser tab that can asynchronously load a web page. | 
| + * | 
| + * There's a small but significant conflation of two concerns in this class. The first is the browser tab as an entity, | 
| + * a member of some tab set. The second is the browser tab as the action of loading a target site into the tab. The | 
| + * present implementation simply combines these two. The combination isn't quite complicated enough to make them worth | 
| + * separating at the present time. If, however, at some point there are multiple actions (different environments for | 
| + * loading, for example) on a single tab, it may be worth the effort to split them out. | 
| + * | 
| + * As a asynchronous action, tab load always completes ordinarily unless there is an internal exception. During | 
| + * development, exceptions occurred when there was a mismatch between this code and browser behavior. While that's | 
| + * (mostly?) over, browser behavior may yet change. We reserve exceptional completion to indicate this kind of problem. | 
| + * | 
| + * Ordinary completion of the tab load action, thus, incorporates both successful page loads as well as unsuccessful. | 
| + * Once the action has completed, the value of the action indicates the state of the page load. | 
| + *  - Successful load. The page complete loading, as indicated by a progress listener signal. | 
| + *  - Unsuccessful load. The progress listener stopped but with some error code rather than the success code. | 
| + *  - User closed tab. The user presses the close button on the tab. | 
| + *  - External cancelled load. The cancel() method was called. This is how the crawler handles time-out. | 
| + * | 
| + * @constructor | 
| + * @extends {Action.Asynchronous_Action} | 
| + * @param {Tabbed_Browser} parent | 
| + * @param {string} target | 
| + * @param {boolean} [leave_open=false] | 
| + *      Leave the tab open in the browser after closing the present object | 
| + * @param {function} finisher | 
| + * @param {function} catcher | 
| + */ | 
| +var Browser_Tab = function( parent, target, leave_open, finisher, catcher ) | 
| +{ | 
| +  Action.Asynchronous_Action.init.call( this, finisher, catcher ); | 
| + | 
| +  /** | 
| +   * The parent tabbed browser in whose tab set this tab is a member. | 
| +   * @type {Tabbed_Browser} | 
| +   */ | 
| +  this.parent = parent; | 
| + | 
| +  /** | 
| +   * The target URL to browse to. | 
| +   * @type {string} | 
| +   */ | 
| +  this.target = target; | 
| + | 
| +  /** | 
| +   * Leave the tab open in the browser after the crawler exits. The reason to do this is to allow manual inspection | 
| +   * of the window as the crawler loaded it. | 
| +   * <p/> | 
| +   * It's necessary to call 'close()' on any instance of this object in order to ensure event handlers are released. | 
| +   * This is true whether or not the tab remains open afterwards. | 
| +   * | 
| +   * @type {Boolean} | 
| +   */ | 
| +  this.leave_open = (arguments.length >= 2) ? leave_open : false; | 
| + | 
| +  /** | 
| +   * Guard flag for closing the object. | 
| +   * @type {boolean} | 
| +   */ | 
| +  this.closed = false; | 
| + | 
| +  /** | 
| +   * A browser object that can hold multiple individual tabbed browser panes. | 
| +   */ | 
| +  this.tabbed_browser = this.parent.tabbed_browser; | 
| + | 
| +  /** | 
| +   * Our tab within the tabbed browser. This is the "external" view of browser pane, the one that allows us to | 
| +   * control loading. The tab must have a URL associated with it, so it's not displayed at the outset | 
| +   * <p/> | 
| +   * FUTURE: Might it be useful to load the tab with a empty page but descriptive title at construction time? | 
| +   */ | 
| +  this.tab = null; | 
| + | 
| +  /** | 
| +   * | 
| +   * @type {*} | 
| +   */ | 
| +  this.browser = null; | 
| + | 
| +  /** | 
| +   * Initialize the action value to a not-completed state, in case the action is aborted prematurely somehow. | 
| +   */ | 
| +  this._argv = [ Browser_Tab.Completion_State.Not_Completed ]; | 
| +}; | 
| +Browser_Tab.prototype = new Action.Asynchronous_Action(); | 
| + | 
| +Browser_Tab.Completion_State = { | 
| +  Not_Completed: 0, | 
| +  Exception: 1, | 
| +  Success: 2, | 
| +  No_Success: 3, | 
| +  User_Close: 4, | 
| +  External_Cancel: 5 | 
| +}; | 
| + | 
| +/** | 
| + * Close function destroys our allocated host resources, such as tabs, listeners, requests, etc. | 
| + */ | 
| +Browser_Tab.prototype.close = function() | 
| +{ | 
| +  if ( this.closed ) | 
| +    return; | 
| + | 
| +  if ( this.tab ) | 
| +  { | 
| +    this.tab.removeEventListener( "TabClose", this.tab_close_listener ); | 
| +    this.tab_close_listener = null; | 
| +    if ( !this.leave_open ) | 
| +    { | 
| +      this.tabbed_browser.removeTab( this.tab ); | 
| +    } | 
| +    this.tab = null; | 
| +    /* | 
| +     * Kill the map from our associated browser to this object. This is the point at which we can no longer | 
| +     * locate this object with a 'browser' or 'window' object. | 
| +     */ | 
| +    this.parent.notify_close( this ); | 
| +    this.browser = null; | 
| +  } | 
| +  /* | 
| +   * FUTURE: Cancel any pending page load here. | 
| +   */ | 
| +  this.closed = true; | 
| +}; | 
| + | 
| +/** | 
| + * Show the tab by loading a URL target into it. | 
| + */ | 
| +Browser_Tab.prototype._go = function() | 
| +{ | 
| +  if ( !this.parent.request_load( this ) ) | 
| +  { | 
| +    // Should not reach. The caller should be calling available() on the Tabbed_Browser first. | 
| +    throw new Error( "Browser_Tab: may not launch when no Tabbed_Browser is available." ); | 
| +  } | 
| +  try | 
| +  { | 
| +    this.tab = this.tabbed_browser.addTab( this.target ); | 
| +    this.browser = this.tabbed_browser.getBrowserForTab( this.tab ); | 
| +    this.parent.notify_load_begin( this ); | 
| +    this.tab_close_listener = this._user_close_command.bind( this ); | 
| +    this.tab.addEventListener( "TabClose", this.tab_close_listener ); | 
| +  } | 
| +  catch ( e ) | 
| +  { | 
| +    this._argv = [ Browser_Tab.Completion_State.Exception, e ]; | 
| +    Cu.reportError( "Unexpected exception in Browser_Tab._go(): " + e.toString() ); | 
| +    this.end_badly( e ); | 
| +  } | 
| +}; | 
| + | 
| +Browser_Tab.prototype._end = function( argv ) | 
| +{ | 
| +  /* | 
| +   * This check ensures that we only call the finisher once. The browser can send multiple STOP events, for example, | 
| +   * when the user focuses on a tab window by clicking on its tab. Since we set a final state below, checking for a | 
| +   * final state ensures that we act idempotently. | 
| +   * | 
| +   * This check also forestalls a race condition where a request completes and schedules a progress event while we are | 
| +   * closing the object. | 
| +   */ | 
| +  if ( this.completed ) | 
| +    return; | 
| +  /* | 
| +   * This notice back to the parent must happen after the check for being in a final state. Since multiple STOP | 
| +   * events may arrive on a tab (they wouldn't be all for the original document), we send this notice just once, which | 
| +   * means that we need to examine the state in this Browser_Tab instance first. | 
| +   */ | 
| +  this.parent.notify_load_end(); | 
| +  /* | 
| +   * The value of the load action includes the action itself. Because load actions are processed in bulk, so the crawler | 
| +   * needs a way of identifying this action when it lands. In order to do this, we prepend our own 'this' object to | 
| +   * the value array. | 
| +   */ | 
| +  this._argv = argv; | 
| +  this._argv.unshift( this ); | 
| +  this.end_well(); | 
| +}; | 
| + | 
| +/** | 
| + * Stop event handler. It receives only STOP events on the present tab. When that happens, it determines the | 
| + * success status and calls the landing function. | 
| + * | 
| + * Note: This function is also called when the user closes a tab manually. | 
| + * | 
| + * @param stop_status | 
| + *      Status code for success or failure if the argument state is a STOP state. | 
| + */ | 
| +Browser_Tab.prototype.progress_stopped = function( stop_status ) | 
| +{ | 
| +  if ( stop_status == 0 ) | 
| +  { | 
| +    var argv = [ Browser_Tab.Completion_State.Success ]; | 
| +  } | 
| +  else | 
| +  { | 
| +    /** | 
| +     * This argument is an XPCOM 'nsresult' value. It could be examined if the cause of the failure to load needs | 
| +     * to be diagnosed. For example, NS_ERROR_OFFLINE would be useful for suspending operation of the crawler while | 
| +     * internet connectivity comes back. NS_ERROR_MALFORMED_URI would be useful for notifing the user of a typo. | 
| +     */ | 
| +    argv = [ Browser_Tab.Completion_State.No_Success, stop_status ]; | 
| +  } | 
| +  this._end( argv ); | 
| +}; | 
| + | 
| +/** | 
| + * Event handler when the tab is closed by user gesture. This might or might not interrupt a pending transfer. | 
| + * | 
| + * @private | 
| + */ | 
| +Browser_Tab.prototype._user_close_command = function() | 
| +{ | 
| +  this._end( [ Browser_Tab.Completion_State.User_Close ] ); | 
| +}; | 
| + | 
| +/** | 
| + * External command to stop loading. Used to implement time-out. | 
| + */ | 
| +Browser_Tab.prototype.stop = function() | 
| +{ | 
| +  this._end( [ Browser_Tab.Completion_State.External_Cancel ] ); | 
| +}; | 
| + | 
| +exports.Tabbed_Browser = Tabbed_Browser; | 
| +exports.Browser_Tab = Browser_Tab; | 
|  |