Index: lib/crawler.js |
=================================================================== |
--- a/lib/crawler.js |
+++ b/lib/crawler.js |
@@ -1,132 +1,664 @@ |
-Cu.import("resource://gre/modules/Services.jsm"); |
+Cu.import( "resource://gre/modules/Services.jsm" ); |
-function abprequire(module) |
+function abprequire( module ) |
{ |
let result = {}; |
result.wrappedJSObject = result; |
- Services.obs.notifyObservers(result, "adblockplus-require", module); |
+ Services.obs.notifyObservers( result, "adblockplus-require", module ); |
return result.exports; |
} |
-let {Storage} = require("storage"); |
-let {Client} = require("client"); |
+let { Action } = require( "action" ); |
+let { Browser_Tab, Tabbed_Browser } = require( "browser" ); |
+let { Observation } = require( "instruction" ); |
+let { Encoding } = require( "encoding" ); |
+let { Logger } = require( "logger" ); |
-let {Policy} = abprequire("contentPolicy"); |
-let {Filter} = abprequire("filterClasses"); |
-let {Utils} = abprequire("utils"); |
+let { Policy } = abprequire( "contentPolicy" ); |
+let { RequestNotifier } = abprequire( "requestNotifier" ); |
+let { Filter } = abprequire( "filterClasses" ); |
+let { Utils } = abprequire( "utils" ); |
-let origProcessNode = Policy.processNode; |
+//------------------------------------------------------- |
+// Shim |
+//------------------------------------------------------- |
+/** |
+ * Manager for shim replacement of an external function. |
+ * <p/> |
+ * Since there's no lvalue reference type in JavaScript (non-primitives are all reference types, but they are rvalue |
+ * references), the arguments here provide a substitute. The reference is the expression 'object[ property ]'. |
+ * |
+ * @param {Object} original_object |
+ * The original function whose call and return are to be surrounded by the shim. |
+ * @param {string} original_property |
+ * The original function whose call and return are to be surrounded by the shim. |
+ * @constructor |
+ */ |
+var Shim = function( original_object, original_property ) |
+{ |
+ /** |
+ * @type {Object} |
+ */ |
+ this.original_object = original_object; |
+ /** |
+ * @type {String} |
+ */ |
+ this.original_property = original_property; |
-let siteTabs; |
-let currentTabs; |
+ /** |
+ * The original function as it exists at the time of instantiation. This means that generally the Shim instance |
+ * should be created as soon as possible, such as in module initialization. |
+ */ |
+ this.original_function = original_object[ original_property ]; |
+}; |
-function processNode(wnd, node, contentType, location, collapse) |
+/** |
+ * @return {boolean} |
+ */ |
+Shim.prototype.is_original = function() |
{ |
- let result = origProcessNode.apply(this, arguments); |
- let url = (contentType === Policy.type.ELEMHIDE) ? location.text : |
- location.spec; |
+ return (this.original_object[ this.original_property ] === this.original_function); |
+}; |
- let topWindow = wnd.top; |
- if (!topWindow.document) |
+/** |
+ * |
+ * @param {Function} replacer |
+ * The replacement function transformer. Takes the original function as an argument and returns its replacement. |
+ */ |
+Shim.prototype.replace = function( replacer ) |
+{ |
+ if ( !replacer ) |
+ throw "Must supply a function transformer to supply a replacement function."; |
+ if ( !this.is_original() ) |
+ throw "This version of Shim does not support multiple replacement."; |
+ this.original_object[ this.original_property ] = replacer( this.original_function ); |
+ return this.original_function; |
+}; |
+ |
+/** |
+ * Reset the original function to a non-replaced state. |
+ * <p/> |
+ * May be called correctly even if the original has never been replaced. |
+ */ |
+Shim.prototype.reset = function() |
+{ |
+ this.original_object[ this.original_property ] = this.original_function; |
+}; |
+ |
+/** |
+ * Close out the shim and release resources. |
+ */ |
+Shim.prototype.close = function() |
+{ |
+ this.reset(); |
+ /* |
+ * At present, this class does not use external resources that aren't dealt with by 'reset()'. That could change, |
+ * however, and so we use close() as the substitute-destructor and reset() for ordinary use. |
+ */ |
+}; |
+ |
+/** |
+ * Shim instance for 'processNode'. As of this writing it's the only function in ABP we're shimming. |
+ */ |
+var process_node_shim = new Shim( Policy, "processNode" ); |
+ |
+//------------------------------------------------------- |
+// Crawler |
+//------------------------------------------------------- |
+/** |
+ * Constructor for a single crawl session. The crawler iterates through each instruction, loading its URL in a tab, |
+ * running the hooks present in the processor, and storing results accordingly. |
+ * |
+ * @param {Instruction_Set} instructions |
+ * Instruction generator yields a sequence of tuples: URL to crawl, a processor, and storage. |
+ * @param {*} outputs |
+ * @param {Window} window |
+ * The top window we're operating it. Must be present as an argument because the module context this class is |
+ * defined in does not have a window. (Or at least should not be relied upon.) 1 |
+ * @param {number} time_limit |
+ * The maximum duration that we will allow a page to try to load. |
+ * @param {boolean} leave_open |
+ * @param {number} number_of_tabs |
+ */ |
+var Crawler = function( instructions, outputs, window, time_limit, leave_open, number_of_tabs ) |
+{ |
+ /** |
+ * @type {Instruction_Set} |
+ */ |
+ this.instructions = instructions; |
+ |
+ this.outputs = outputs; |
+ |
+ /** |
+ * Browser window in which to open tabs. Required because, as a module, we don't have a 'Window' object available. |
+ * @type {Window} |
+ */ |
+ this.window = window; |
+ |
+ this.time_limit = time_limit; |
+ |
+ this.leave_open = leave_open; |
+ |
+ if ( number_of_tabs <= 0 ) |
{ |
- Cu.reportError("No document associated with the node's top window"); |
- return result; |
+ /* |
+ * Defensive. The caller should have already validated this argument. |
+ */ |
+ number_of_tabs = 1; |
} |
- let tabbrowser = Utils.getChromeWindow(topWindow).gBrowser; |
- if (!tabbrowser) |
+ /** |
+ * Progress object. It's simple enough not to need its own class. Just override the notice() function to receive |
+ * progress notices. |
+ */ |
+ this.progress = { |
+ active: 0, |
+ completed: 0, |
+ total: instructions.size, |
+ notice: function() |
+ { |
+ }, |
+ status: function() |
+ { |
+ } |
+ }; |
+ |
+ if ( !process_node_shim.is_original() ) |
+ throw "Function 'processNode' is already shimmed. We may not insert a second one."; |
+ process_node_shim.replace( |
+ function( original ) |
+ { |
+ return this.node_action.bind( this, original ); |
+ }.bind( this ) |
+ ); |
+ |
+ /** |
+ * Logging service. |
+ * @type {Logger} |
+ */ |
+ this.logger = new Logger( "Crawler" ); |
+ |
+ this.tabbed_browser = new Tabbed_Browser( this.window, number_of_tabs ); |
+ |
+ /** |
+ * Closed flag. Needed to terminate the generator if this object is closed before the generator stops. |
+ * @type {Boolean} |
+ */ |
+ this.closed = false; |
+ |
+ /** |
+ * The object responsible for gaining access to the call stream for individual entries within each node. This is |
+ * one of two hooks into ABP itself, the other being the shim for 'processNode'. |
+ * |
+ * @type {RequestNotifier} |
+ */ |
+ this.requestNotifier = new RequestNotifier( null, this.node_entry_action.bind( this ) ); |
+ |
+ /** |
+ * The current nodes that are active in a call to 'node_action'. In ordinary cases, this map has at most the |
+ * maximum number of concurrent loads. |
+ * @type {WeakMap} |
+ */ |
+ this.current_nodes = new WeakMap(); |
+}; |
+exports.Crawler = Crawler; |
+ |
+Crawler.prototype.toJSON = function() |
+{ |
+ return { |
+ instructions: this.instructions, |
+ storage: this.storage |
+ }; |
+}; |
+ |
+/** |
+ * Close the present instance. This object holds browser resources because of the browser tabs it holds open. |
+ */ |
+Crawler.prototype.close = function() |
+{ |
+ for ( let j = 0 ; j < this.outputs.length ; ++j ) |
{ |
- Cu.reportError("Unable to get a tabbrowser reference"); |
- return result; |
+ this.outputs[j].storage.close(); |
} |
+ if ( this.tabbed_browser ) this.tabbed_browser.close(); |
+ if ( this.requestNotifier ) this.requestNotifier.shutdown(); |
+ process_node_shim.reset(); |
+ this.closed = true; |
+}; |
- let browser = tabbrowser.getBrowserForDocument(topWindow.document); |
- if (!browser) |
+/** |
+ * The output encoding for the session as a whole. |
+ * @type {*} |
+ */ |
+Crawler.prototype.__encoding__ = Encoding.as_object( [ |
+ // prelude |
+ Encoding.immediate_fields( ["time_start", "instructions"] ), |
+ // observation |
+ Encoding.field( "trials", Encoding.array_stream() ), |
+ // postlude |
+ Encoding.immediate_fields( ["time_finish", "termination"] ) |
+] ); |
+ |
+/** |
+ * Task generator for the crawler |
+ * |
+ * @param {Function} pause |
+ * @param {Function} resume |
+ */ |
+Crawler.prototype.generator = function( pause, resume ) |
+{ |
+ /* |
+ * A crawler object represent a single run of the crawler. Thus the pause and resume function act like object-scope |
+ * variables. |
+ */ |
+ this.pause = pause; |
+ this.resume = resume; |
+ var log = this.logger.make_log( "task" ); |
+ var tab = null; |
+ |
+ var runaway_counter = 0; |
+ |
+ try |
{ |
- Cu.reportError("Unable to get browser for the tab"); |
- return result; |
+ /* |
+ * Preparation code. Ensure that every initialization here can be reversed in the 'finally' clause whether |
+ * or not it executed, in case some initialization throws an exception. |
+ */ |
+ this.time_start = Logger.timestamp(); |
+ |
+ var multiple = new Encoding.Multiple_Format(); |
+ for ( let j = 0 ; j < this.outputs.length ; ++j ) |
+ { |
+ let output = this.outputs[j]; |
+ let formatter = new Encoding[ output.encode ]( output.storage.writer() ); |
+ multiple.add( formatter ); |
+ } |
+ this.encoder = new Encoding.Format_stream( multiple ); |
+ |
+ this.encoder.write( this ); |
+ this.encoder.sequence_start(); |
+ |
+ let gen = this.instructions.generator(); |
+ let instruction = null; // Avoid spurious IDEA warning |
+ for ( instruction of gen ) |
+ { |
+ if ( this.closed ) |
+ { |
+ /* |
+ * Defensive. We only arrive here if some outside code has called our close() method and did not also |
+ * order our cancellation. Regardless, we're done making new tabs. |
+ */ |
+ Cu.reportError( "Crawler closed but its enclosing task not cancelled." ); |
+ break; |
+ } |
+ |
+ if ( this.tabbed_browser.available() ) |
+ { |
+ /* |
+ * Since we'll need a variety of browser-tab behaviors, we'll need to change this factory call |
+ * to something dependent upon the instruction. |
+ */ |
+ tab = this.tabbed_browser.make_tab( instruction.target, this.leave_open, this._deferred_load_finisher.bind( this ), null ); |
+ tab.instruction = instruction; |
+ instruction.begin(); |
+ let join = new Action.Join_Timeout( tab, this.time_limit, this._join_finisher.bind( this ) ); |
+ join.go( tab, resume ); |
+ /* |
+ * The return value of load is an asynchronous action that could be combined with others, if the |
+ * instruction dictates. There's no hook for this yet, although that's the reason we do not immediately |
+ * execute on calling load. |
+ */ |
+ tab.go(); |
+ ++this.progress.active; |
+ this.progress.notice(); |
+ } |
+ if ( !this.tabbed_browser.available() ) |
+ { |
+ pause(); |
+ } |
+ |
+ var cancelled = yield false; |
+ if ( cancelled ) |
+ { |
+ break; |
+ } |
+ } |
+ /* |
+ * At this point in the code, we have launched all the instructions. If we're using more than one tab, |
+ * we'll generally have open tabs still. We need to pause until we have no more tabs left open. |
+ */ |
+ if ( !cancelled ) |
+ { |
+ while ( !this.tabbed_browser.quiescent() ) |
+ { |
+ // Must yield after pause() for it to take effect |
+ pause(); |
+ cancelled = yield false; |
+ if ( cancelled ) |
+ { |
+ break; |
+ } |
+ ++runaway_counter; |
+ if ( runaway_counter > 100 ) |
+ { |
+ Cu.reportError( "Runaway pause loop. counter = " + runaway_counter ); |
+ break; |
+ } |
+ } |
+ } |
+ |
+ /* |
+ * OK. Finally done. |
+ */ |
+ this.termination = cancelled ? "Cancelled" : "Success"; |
+ this.progress.status( cancelled ? "Cancelled" : "Done" ); |
} |
+ catch ( e if e instanceof Error ) |
+ { |
+ log( e.toString() + "\n\n" + e.stack ); |
+ this.termination = "Error"; |
+ } |
+ catch ( e ) |
+ { |
+ log( e.toString() + " - type: " + Object.prototype.toString.call( e ) ); |
+ this.termination = "Unknown exception"; |
+ } |
+ finally |
+ { |
+ /* |
+ * Finish writing the output before closing ourselves down. |
+ */ |
+ this.time_finish = Logger.timestamp(); |
+ this.encoder.sequence_stop(); |
- let site = siteTabs.get(browser); |
- let filtered = !result; |
- Storage.write([url, site, filtered]); |
+ /* |
+ * If everything goes right, this cleanup should not be necessary, as tab instances are closed as they are used. |
+ * Nonetheless, if there's an error and a landing function is not called, this line ensures that all the tabs |
+ * are properly destroyed. |
+ */ |
+ if ( tab ) tab.close(); |
+ // Removes the ABP shim, amongst other things. |
+ this.close(); |
+ } |
+}; |
+ |
+/** |
+ * Landing function for the asynchronous action of loading a tab. For some reasons, Firefox is delivering the |
+ * STATE_STOP progress message before the last ABP filter is being run. It seems that it's firing events immediately, |
+ * once it knows the request has finished its HTTP transfer, but before it has fully finished loading the page as a |
+ * whole (the DOM, layout, etc.). Hence we let the browser finish its work in the current thread and run the actual |
+ * load-end action afterwards. |
+ * <p/> |
+ * The implementation of this function allows it to be defined without arguments. That's not what actually happens. |
+ * Since this function is just a scheduling pass-through, it uses 'arguments' to pass all arguments, no matter what they |
+ * are. (And no matter how they change over time.) |
+ */ |
+Crawler.prototype._deferred_load_finisher = function() |
+{ |
+ /* |
+ * The first argument is the 'this' object when 'apply' runs. The second argument is the 'this' object when |
+ * 'this._load_end_action' runs. |
+ */ |
+ Action.dispatch( Function.prototype.apply.bind( this._load_finisher, this, arguments ) ); |
+}; |
+ |
+/** |
+ * Since we're done loading (the cause doesn't matter), we order the instruction to write out its results, be they |
+ * successful or any of the varieties of unsuccessful. |
+ */ |
+Crawler.prototype._load_finisher = function( tab, completion_state, error_code ) |
+{ |
+ var instruction = tab.instruction; |
+ if ( tab.completed ) |
+ { |
+ if ( tab.completed_well ) |
+ { |
+ switch ( completion_state ) |
+ { |
+ case Browser_Tab.Completion_State.Success: |
+ instruction.end(); |
+ break; |
+ case Browser_Tab.Completion_State.No_Success: |
+ instruction.abort( "unsuccessful load. nsresult = " + error_code ); |
+ break; |
+ case Browser_Tab.Completion_State.User_Close: |
+ instruction.abort( "user closed tab" ); |
+ break; |
+ case Browser_Tab.Completion_State.External_Cancel: |
+ instruction.abort( "timed out" ); |
+ break; |
+ default: |
+ instruction.abort( "WTF?" ); |
+ break; |
+ } |
+ } |
+ else |
+ { |
+ instruction.abort( "exception. message = " + tab.exception.message ); |
+ } |
+ } |
+ else |
+ { |
+ // Defensive. Should not reach. |
+ instruction.abort( "tab load not completed. Huh?" ); |
+ } |
+ this.encoder.sequence_send( instruction ); |
+ |
+ tab.close(); |
+ --this.progress.active; |
+ ++this.progress.completed; |
+ this.progress.notice(); |
+ this.resume(); |
+}; |
+ |
+/** |
+ * The join finisher merely ensures that the tab load action actually completes, stopping it if it hasn't completed yet. |
+ * |
+ * @param tab |
+ */ |
+Crawler.prototype._join_finisher = function( tab ) |
+{ |
+ /* |
+ * If the join timeout caused completion, we must assume that the tab is still loading. If the timeout did not fire, |
+ * then the tab action completed. In all cases, the tab will be complete afterwards. Thus because Join_Timeout is |
+ * reliable, means that we have made tab-load reliable also. |
+ */ |
+ if ( !tab.complete ) |
+ { |
+ tab.stop(); |
+ } |
+}; |
+ |
+//---------------------------------- |
+// Data gathering functions |
+//---------------------------------- |
+/** |
+ * Shim for 'processNode' in ABP. Executes once for each node that ABP processes, whether or not it acts on that node. |
+ * |
+ * @param {Function} original_f |
+ * The original processNode function. |
+ * @param {nsIDOMWindow} wnd |
+ * @param {nsIDOMElement} node |
+ * @param {Number} contentType |
+ * @param {nsIURI} location |
+ * @param {Boolean} collapse |
+ * true to force hiding of the node |
+ * @return {Boolean} false if the node should be blocked |
+ */ |
+Crawler.prototype.node_action = function( original_f, wnd, node, contentType, location, collapse ) |
+{ |
+ //var log = this.logger.make_log( "node_action" ); |
+ |
+ /* |
+ * Set up collecting for node_entry_action(). It should be the case that a node matches either 0 or 1 filters. |
+ * The collection array 'entries' allows more than 1 to be recorded, and for such activity to be detected and |
+ * reported rather than inducing an observation error. |
+ */ |
+ var entries = []; |
+ var entry_hook = function( node, windows, entry ) |
+ { |
+ entries.push( { node: node, windows: windows, entry: entry } ); |
+ }; |
+ this.current_nodes.set( node, entry_hook ); |
+ |
+ /* |
+ * Call the original processNode. If the original throws, then we will too, so this is outside a try clause. |
+ */ |
+ var result = original_f( wnd, node, contentType, location, collapse ); |
+ |
+ try |
+ { |
+ let instruction = null; // Initialize here in case locate_instruction() throws. |
+ try |
+ { |
+ instruction = this.locate_instruction( wnd ); |
+ } |
+ catch ( e ) |
+ { |
+ Cu.reportError( "Crawler/node_action: error locating instruction: " + e.toString() |
+ + ( ( "stack" in e ) ? ( "\n\tstack = " + e.stack) : "" ) |
+ ); |
+ return result; |
+ } |
+ if ( !instruction ) |
+ { |
+ /* |
+ * If we don't have an instruction, we don't report this node. This is by design, because reporting is |
+ * the responsibility of the instruction object. |
+ */ |
+ return result; |
+ } |
+ if ( entries.length == 0 && !instruction.observing_all_nodes() ) |
+ { |
+ // Assert we didn't touch this node and the instruction doesn't want to see it |
+ return result; |
+ } |
+ try |
+ { |
+ var observation = new Observation( |
+ !result, contentType, |
+ (contentType == Policy.type.ELEMHIDE) ? location.text : location.spec, |
+ entries |
+ ); |
+ instruction.observe_node( observation ); |
+ } |
+ catch ( e ) |
+ { |
+ Cu.reportError( "Crawler/node_action: error recording observation: " + e.toString() ); |
+ return result; |
+ } |
+ } |
+ finally |
+ { |
+ /* |
+ * This 'finally' clause ensures that we remove the node from 'this.current_nodes'. Even though it's a weak map, |
+ * we need to remove the key so that 'entry_hook' is not called inadvertently. |
+ */ |
+ this.current_nodes.delete( node ); |
+ } |
return result; |
+}; |
+ |
+/** |
+ * Locate our instruction associated with a window that caused to load. First we find the browser associated with the |
+ * window. There should always be one of these, otherwise we have an error. From the browser, we locate our tab |
+ * associated with it, which need not be present. Finally, we locate the instruction as a tab member, which should |
+ * always exist. |
+ * <p/> |
+ * This is called only in node_action(). It's separate to simplify the control flow. |
+ * |
+ * @param window |
+ * @return {Instruction_class} |
+ */ |
+Crawler.prototype.locate_instruction = function( window ) |
+{ |
+ let topWindow = window.top; |
+ if ( !topWindow.document ) |
+ throw new Error( "No document associated with the node's top window" ); |
+ let tabbrowser = Utils.getChromeWindow( topWindow ).gBrowser; |
+ if ( !tabbrowser ) |
+ throw new Error( "Unable to get a tabbrowser reference from the window" ); |
+ let browser = tabbrowser.getBrowserForDocument( topWindow.document ); |
+ if ( !browser ) |
+ throw new Error( "Unable to get browser for the tab" ); |
+ if ( !this.tabbed_browser.map_browser_to_child.has( browser ) ) |
+ { |
+ /* |
+ * It's not an error for the browser not to appear in this map. If the tab is remains open past the time |
+ * we are monitoring (either on purpose or as the result of a quirk of timing), we simply return a null |
+ * instruction. Nevertheless, the code to report this to the console remains in place, commented out, because |
+ * it's likely to be relevant still during development. |
+ */ |
+ // Cu.reportError( |
+ // "Crawler.node_action: Browser not found in internal map. " + Logger.timestamp() |
+ // + "\nlocation=" + url_location |
+ // ); |
+ // this.logger.stack_trace(); |
+ return null; |
+ } |
+ var tab = this.tabbed_browser.map_browser_to_child.get( browser ).child; |
+ if ( !("instruction" in tab) ) |
+ throw new Error( "'instruction' not found as member of tab object" ); |
+ return tab.instruction; |
+}; |
+ |
+/** |
+ * This function executes solely underneath (in the call stack) 'node_action'. It receives at least one call per node, |
+ * more if there are matches on rules of any kind. |
+ * |
+ * @param window |
+ * @param node |
+ * @param {RequestEntry} entry |
+ */ |
+Crawler.prototype.node_entry_action = function( window, node, entry ) |
+{ |
+ if ( !this.current_nodes.has( node ) ) |
+ { |
+ Cu.reportError( "node_entry_action: node not seen in 'current_nodes'" ); |
+ return; |
+ } |
+ if ( !entry.filter ) |
+ { |
+ /* |
+ * If there's no filter in the entry, then nothing happened to it. We are presently ignoring such entries. In |
+ * the future, however, we will likely want a hook here to process entries that are not associated with any |
+ * filter, for example, to ensure that necessary content is not blocked inadvertently. |
+ */ |
+ return; |
+ } |
+ var windows = []; |
+ var n = 0; |
+ while ( window != null ) |
+ { |
+ if ( ++n > 100 ) |
+ { |
+ // Houston, we have a problem. |
+ windows = null; |
+ Cu.reportError( "Crawler/node_entry_action: runaway window chain" ); |
+ break; |
+ } |
+ windows.push( window ); |
+ if ( window === window.parent ) |
+ { |
+ // This is the ordinary statement to exit the loop. |
+ break; |
+ } |
+ window = window.parent; |
+ } |
+ this.current_nodes.get( node )( node, windows, entry ); |
+}; |
+ |
+ |
+function shutdown_crawler() |
+{ |
+ process_node_shim.close(); |
} |
-function loadSite(site, window, callback) |
+try |
{ |
- if (!site) |
- return; |
- |
- let tabbrowser = window.gBrowser; |
- let tab = tabbrowser.addTab(site); |
- let browser = tabbrowser.getBrowserForTab(tab); |
- |
- siteTabs.set(browser, site); |
- |
- let progressListener = { |
- onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStatus) |
- { |
- if (browser !== aBrowser) |
- return; |
- |
- if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP)) |
- return; |
- |
- tabbrowser.removeTabsProgressListener(progressListener); |
- tabbrowser.removeTab(tab); |
- callback(); |
- } |
- }; |
- tabbrowser.addTabsProgressListener(progressListener); |
+ onShutdown.add( shutdown_crawler ); |
} |
- |
-function loadSites(backendUrl, parallelTabs, window, sites, callback) |
+catch ( e ) |
{ |
- while (currentTabs < parallelTabs && sites.length) |
- { |
- currentTabs++; |
- let site = sites.shift(); |
- loadSite(site, window, function() |
- { |
- currentTabs--; |
- if (!sites.length && !currentTabs) |
- { |
- Storage.finish(); |
- let dataFilePath = Storage.dataFile.path; |
- Client.sendCrawlerDataFile(backendUrl, dataFilePath, function() |
- { |
- Storage.destroy(); |
- callback(); |
- }); |
- } |
- else |
- loadSites(backendUrl, parallelTabs, window, sites, callback); |
- }); |
- } |
+ Cu.reportError( "Failure adding shutdown function. error = \"" + e.message + "\"" ); |
} |
- |
-let Crawler = exports.Crawler = {}; |
- |
-Crawler.crawl = function(backendUrl, parallelTabs, window, callback) |
-{ |
- if (Policy.processNode != origProcessNode) |
- return; |
- |
- Policy.processNode = processNode; |
- |
- siteTabs = new WeakMap(); |
- currentTabs = 0; |
- |
- Storage.init(); |
- |
- Client.fetchCrawlableSites(backendUrl, function(sites) |
- { |
- loadSites(backendUrl, parallelTabs, window, sites, function() |
- { |
- Policy.processNode = origProcessNode; |
- siteTabs = null; |
- callback(); |
- }); |
- }); |
-}; |