Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/browser.js

Issue 10233013: Crawler, second version (Closed)
Patch Set: Created April 12, 2013, 1:38 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/bootstrap_xpcom.js ('k') | lib/client.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/browser.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/lib/browser.js
@@ -0,0 +1,468 @@
+let {Logger} = require( "logger" );
+let {Action} = require( "action" );
+
+//-------------------------------------------------------
+// Tabbed_Browser
+//-------------------------------------------------------
+/**
+ * A single OS-level window of a multiple-tab Firefox browser. This is the object referred to by the global 'gBrowser'.
+ *
+ * @param {Window} window
+ * @param {Number} max_requests
+ * The maximum number of simultaneous requests this object may have.
+ * @constructor
+ */
+var Tabbed_Browser = function( window, max_requests )
+{
+ /**
+ * Browser window through which we access the global browser object.
+ * @type {Window}
+ */
+ this.window = window;
+
+ /**
+ * A browser object that can hold multiple individual tabbed browser panes.
+ */
+ this.tabbed_browser = this.window.gBrowser;
+ if ( !this.tabbed_browser )
+ {
+ throw new Error( "Tabbed_Browser: argument 'window' has null member 'gBrowser'" );
+ }
+
+ /**
+ * The current number of pending requests in child tabs of this object.
+ * @type {Number}
+ */
+ this.n_requests = 0;
+
+ /**
+ * The maximum number of simultaneous requests this object may have.
+ * @type {Number}
+ */
+ this.max_requests = max_requests;
+
+ /**
+ * The heart of the dispatcher for both handling progress events and tracking block activity is this map from
+ * browser objects to Browser_Tab ones.
+ * @type {Map}
+ */
+ this.map_browser_to_child = new Map();
+
+
+ /**
+ * A transient set for allocated requests that have not started their load cycle.
+ * @type {Set}
+ */
+ this.allocated_not_loaded = new Set();
+
+ this.listener = { onStateChange: this._progress.bind( this ) };
+ this.tabbed_browser.addTabsProgressListener( this.listener );
+
+ this.logger = new Logger( "Tabbed_Browser" );
+};
+
+/**
+ * Release resources held by this object. This includes event handlers. We also close all the child tabs, since they
+ * won't work right after our progress event handler is no longer registered.
+ */
+Tabbed_Browser.prototype.close = function()
+{
+ var log = this.logger.make_log( "close" );
+ log( "Tabbed_Browser.close", false );
+ if ( this.listener )
+ {
+ this.tabbed_browser.removeTabsProgressListener( this.listener );
+ this.listener = null;
+ }
+
+ let pair = null;
+ for ( pair of this.map_browser_to_child )
+ {
+ let [ key, value ] = pair;
+ value.child.close();
+ this.map_browser_to_child.delete( key );
+ }
+};
+
+/**
+ * Predicate "is there an open request slot?"
+ */
+Tabbed_Browser.prototype.available = function()
+{
+ return this.n_requests < this.max_requests;
+};
+
+/**
+ * Predicate: "Are there no open tabs?"
+ * @return {boolean}
+ */
+Tabbed_Browser.prototype.quiescent = function()
+{
+ return this.n_requests == 0;
+};
+
+/**
+ * @param {string} target
+ * @param {Boolean} leave_open
+ * Leave the tab open in the browser after closing the present object
+ * @param {function} finisher
+ * @param {function} catcher
+ */
+Tabbed_Browser.prototype.make_tab = function( target, leave_open, finisher, catcher )
+{
+ return new Browser_Tab( this, target, leave_open, finisher, catcher );
+};
+
+/**
+ * Request an allocation of available HTTP requests. Allocates one if available.
+ * <p/>
+ * HAZARD: This request is made when the asynchronous action is created, which is strictly before it is launched. If
+ * the caller does not either launch the action or close it, there will be an internal resource leak here.
+ *
+ * @param child
+ * @return {Boolean}
+ */
+Tabbed_Browser.prototype.request_load = function( child )
+{
+ if ( !this.available() )
+ {
+ return false;
+ }
+ ++this.n_requests;
+ this.allocated_not_loaded.add( child );
+ return true;
+};
+
+/**
+ * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests.
+ *
+ * @param {Browser_Tab} child
+ */
+Tabbed_Browser.prototype.notify_load_begin = function( child )
+{
+ if ( this.allocated_not_loaded.has( child ) )
+ {
+ this.allocated_not_loaded.delete( child );
+ }
+ else
+ {
+ Cu.reportError( "notice_load_begin: child not found" );
+ throw "notice_load_begin: child not found";
+ }
+ let value = { child: child };
+ this.map_browser_to_child.set( child.browser, value );
+};
+
+/**
+ * Notification that a child tab is loading a page. This constitutes a change in the number of unallocated requests.
+ * <p/>
+ * The child must only call this function once, since it acts as a resource deallocator, freeing up a request slot.
+ */
+Tabbed_Browser.prototype.notify_load_end = function()
+{
+ if ( this.n_requests <= 0 )
+ {
+ throw "Tabbed_Browser.notify_load_end: n_requests <= 0";
+ }
+ --this.n_requests;
+};
+
+/**
+ * Notification that a child tab is closing. We leave the tab present in our map of active children until the tab is
+ * closed. This allows us to handle events that occur after the document has loaded, which typically arise from
+ * scripts on the page.
+ *
+ * @param child
+ */
+Tabbed_Browser.prototype.notify_close = function( child )
+{
+ if ( this.map_browser_to_child.has( child.browser ) )
+ {
+ this.map_browser_to_child.delete( child.browser );
+ }
+ else
+ {
+ // If we're getting this notice, it really should be in our map
+ Cu.reportError( "Child browser not found in map during 'notice_close()'" );
+ }
+};
+
+//noinspection JSUnusedLocalSymbols
+/**
+ * Progress event handler. It looks only for STOP states on the present tab. When that happens, it determines the
+ * success status and calls the landing function.
+ *
+ * @param {*} browser
+ * @param {nsIWebProgress} controller
+ * The control object for progress monitoring that dispatches the event.
+ * @param {nsIRequest} browse_request
+ * The request object generated by the called to addTab(), which loads a page.
+ * @param state
+ * The progress state, represented as flags.
+ * @param stop_status
+ * Status code for success or failure if the argument state is a STOP state.
+ */
+Tabbed_Browser.prototype._progress = function( browser, controller, browse_request, state, stop_status )
+{
+ /*
+ * We only care about STOP states. We're not tracking redirects, which is one of the progress states possible.
+ * We may want to in the future, though, in case redirect behavior is involved with ad delivery in some way.
+ *
+ * As a point of warning, traces on these messages shows that the START message is delivered to the present
+ * function _before_ 'notify_load_begin' is called, which seems to mean that the JS interpreter is doing something
+ * fishy, either using a second thread or dispatching during a function invocation or return. Regardless, this
+ * event come in before it's possible that 'map_browser_to_child' has the 'browser' element of a new tab as a key.
+ * Thus, a warning that trapping any other progress state here should happen only after thoroughly tracing the
+ * event sequence to determine the actual behavior.
+ */
+ //noinspection JSBitwiseOperatorUsage
+ if ( !(state & Ci.nsIWebProgressListener.STATE_STOP) )
+ return;
+
+ /*
+ * This handler receives events for all the tabs present in a tabbrowser element, even ones that we didn't
+ * add ourselves. It's not an error to receive such events.
+ */
+ if ( !this.map_browser_to_child.has( browser ) )
+ {
+ return;
+ }
+
+ var {child} = this.map_browser_to_child.get( browser );
+ child.progress_stopped( stop_status );
+
+ var log = this.logger.make_log( "_progress" );
+ log( "request name = " + browse_request.name, false );
+};
+
+//-------------------------------------------------------
+// Browser_Tab
+//-------------------------------------------------------
+/**
+ * A single browser tab that can asynchronously load a web page.
+ *
+ * There's a small but significant conflation of two concerns in this class. The first is the browser tab as an entity,
+ * a member of some tab set. The second is the browser tab as the action of loading a target site into the tab. The
+ * present implementation simply combines these two. The combination isn't quite complicated enough to make them worth
+ * separating at the present time. If, however, at some point there are multiple actions (different environments for
+ * loading, for example) on a single tab, it may be worth the effort to split them out.
+ *
+ * As a asynchronous action, tab load always completes ordinarily unless there is an internal exception. During
+ * development, exceptions occurred when there was a mismatch between this code and browser behavior. While that's
+ * (mostly?) over, browser behavior may yet change. We reserve exceptional completion to indicate this kind of problem.
+ *
+ * Ordinary completion of the tab load action, thus, incorporates both successful page loads as well as unsuccessful.
+ * Once the action has completed, the value of the action indicates the state of the page load.
+ * - Successful load. The page complete loading, as indicated by a progress listener signal.
+ * - Unsuccessful load. The progress listener stopped but with some error code rather than the success code.
+ * - User closed tab. The user presses the close button on the tab.
+ * - External cancelled load. The cancel() method was called. This is how the crawler handles time-out.
+ *
+ * @constructor
+ * @extends {Action.Asynchronous_Action}
+ * @param {Tabbed_Browser} parent
+ * @param {string} target
+ * @param {boolean} [leave_open=false]
+ * Leave the tab open in the browser after closing the present object
+ * @param {function} finisher
+ * @param {function} catcher
+ */
+var Browser_Tab = function( parent, target, leave_open, finisher, catcher )
+{
+ Action.Asynchronous_Action.init.call( this, finisher, catcher );
+
+ /**
+ * The parent tabbed browser in whose tab set this tab is a member.
+ * @type {Tabbed_Browser}
+ */
+ this.parent = parent;
+
+ /**
+ * The target URL to browse to.
+ * @type {string}
+ */
+ this.target = target;
+
+ /**
+ * Leave the tab open in the browser after the crawler exits. The reason to do this is to allow manual inspection
+ * of the window as the crawler loaded it.
+ * <p/>
+ * It's necessary to call 'close()' on any instance of this object in order to ensure event handlers are released.
+ * This is true whether or not the tab remains open afterwards.
+ *
+ * @type {Boolean}
+ */
+ this.leave_open = (arguments.length >= 2) ? leave_open : false;
+
+ /**
+ * Guard flag for closing the object.
+ * @type {boolean}
+ */
+ this.closed = false;
+
+ /**
+ * A browser object that can hold multiple individual tabbed browser panes.
+ */
+ this.tabbed_browser = this.parent.tabbed_browser;
+
+ /**
+ * Our tab within the tabbed browser. This is the "external" view of browser pane, the one that allows us to
+ * control loading. The tab must have a URL associated with it, so it's not displayed at the outset
+ * <p/>
+ * FUTURE: Might it be useful to load the tab with a empty page but descriptive title at construction time?
+ */
+ this.tab = null;
+
+ /**
+ *
+ * @type {*}
+ */
+ this.browser = null;
+
+ /**
+ * Initialize the action value to a not-completed state, in case the action is aborted prematurely somehow.
+ */
+ this._argv = [ Browser_Tab.Completion_State.Not_Completed ];
+};
+Browser_Tab.prototype = new Action.Asynchronous_Action();
+
+Browser_Tab.Completion_State = {
+ Not_Completed: 0,
+ Exception: 1,
+ Success: 2,
+ No_Success: 3,
+ User_Close: 4,
+ External_Cancel: 5
+};
+
+/**
+ * Close function destroys our allocated host resources, such as tabs, listeners, requests, etc.
+ */
+Browser_Tab.prototype.close = function()
+{
+ if ( this.closed )
+ return;
+
+ if ( this.tab )
+ {
+ this.tab.removeEventListener( "TabClose", this.tab_close_listener );
+ this.tab_close_listener = null;
+ if ( !this.leave_open )
+ {
+ this.tabbed_browser.removeTab( this.tab );
+ }
+ this.tab = null;
+ /*
+ * Kill the map from our associated browser to this object. This is the point at which we can no longer
+ * locate this object with a 'browser' or 'window' object.
+ */
+ this.parent.notify_close( this );
+ this.browser = null;
+ }
+ /*
+ * FUTURE: Cancel any pending page load here.
+ */
+ this.closed = true;
+};
+
+/**
+ * Show the tab by loading a URL target into it.
+ */
+Browser_Tab.prototype._go = function()
+{
+ if ( !this.parent.request_load( this ) )
+ {
+ // Should not reach. The caller should be calling available() on the Tabbed_Browser first.
+ throw new Error( "Browser_Tab: may not launch when no Tabbed_Browser is available." );
+ }
+ try
+ {
+ this.tab = this.tabbed_browser.addTab( this.target );
+ this.browser = this.tabbed_browser.getBrowserForTab( this.tab );
+ this.parent.notify_load_begin( this );
+ this.tab_close_listener = this._user_close_command.bind( this );
+ this.tab.addEventListener( "TabClose", this.tab_close_listener );
+ }
+ catch ( e )
+ {
+ this._argv = [ Browser_Tab.Completion_State.Exception, e ];
+ Cu.reportError( "Unexpected exception in Browser_Tab._go(): " + e.toString() );
+ this.end_badly( e );
+ }
+};
+
+Browser_Tab.prototype._end = function( argv )
+{
+ /*
+ * This check ensures that we only call the finisher once. The browser can send multiple STOP events, for example,
+ * when the user focuses on a tab window by clicking on its tab. Since we set a final state below, checking for a
+ * final state ensures that we act idempotently.
+ *
+ * This check also forestalls a race condition where a request completes and schedules a progress event while we are
+ * closing the object.
+ */
+ if ( this.completed )
+ return;
+ /*
+ * This notice back to the parent must happen after the check for being in a final state. Since multiple STOP
+ * events may arrive on a tab (they wouldn't be all for the original document), we send this notice just once, which
+ * means that we need to examine the state in this Browser_Tab instance first.
+ */
+ this.parent.notify_load_end();
+ /*
+ * The value of the load action includes the action itself. Because load actions are processed in bulk, so the crawler
+ * needs a way of identifying this action when it lands. In order to do this, we prepend our own 'this' object to
+ * the value array.
+ */
+ this._argv = argv;
+ this._argv.unshift( this );
+ this.end_well();
+};
+
+/**
+ * Stop event handler. It receives only STOP events on the present tab. When that happens, it determines the
+ * success status and calls the landing function.
+ *
+ * Note: This function is also called when the user closes a tab manually.
+ *
+ * @param stop_status
+ * Status code for success or failure if the argument state is a STOP state.
+ */
+Browser_Tab.prototype.progress_stopped = function( stop_status )
+{
+ if ( stop_status == 0 )
+ {
+ var argv = [ Browser_Tab.Completion_State.Success ];
+ }
+ else
+ {
+ /**
+ * This argument is an XPCOM 'nsresult' value. It could be examined if the cause of the failure to load needs
+ * to be diagnosed. For example, NS_ERROR_OFFLINE would be useful for suspending operation of the crawler while
+ * internet connectivity comes back. NS_ERROR_MALFORMED_URI would be useful for notifing the user of a typo.
+ */
+ argv = [ Browser_Tab.Completion_State.No_Success, stop_status ];
+ }
+ this._end( argv );
+};
+
+/**
+ * Event handler when the tab is closed by user gesture. This might or might not interrupt a pending transfer.
+ *
+ * @private
+ */
+Browser_Tab.prototype._user_close_command = function()
+{
+ this._end( [ Browser_Tab.Completion_State.User_Close ] );
+};
+
+/**
+ * External command to stop loading. Used to implement time-out.
+ */
+Browser_Tab.prototype.stop = function()
+{
+ this._end( [ Browser_Tab.Completion_State.External_Cancel ] );
+};
+
+exports.Tabbed_Browser = Tabbed_Browser;
+exports.Browser_Tab = Browser_Tab;
« no previous file with comments | « lib/bootstrap_xpcom.js ('k') | lib/client.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld