OLD | NEW |
1 Cu.import("resource://gre/modules/Services.jsm"); | 1 Cu.import( "resource://gre/modules/Services.jsm" ); |
2 | 2 |
3 function abprequire(module) | 3 function abprequire( module ) |
4 { | 4 { |
5 let result = {}; | 5 let result = {}; |
6 result.wrappedJSObject = result; | 6 result.wrappedJSObject = result; |
7 Services.obs.notifyObservers(result, "adblockplus-require", module); | 7 Services.obs.notifyObservers( result, "adblockplus-require", module ); |
8 return result.exports; | 8 return result.exports; |
9 } | 9 } |
10 | 10 |
11 let {Storage} = require("storage"); | 11 let {Client} = require( "client" ); |
12 let {Client} = require("client"); | 12 let {Browser_Tab,Tabbed_Browser} = require( "browser" ); |
13 | 13 let {Encoding} = require( "encoding" ); |
14 let {Policy} = abprequire("contentPolicy"); | 14 let {Logger} = require( "logger" ); |
15 let {Filter} = abprequire("filterClasses"); | 15 |
16 let {Utils} = abprequire("utils"); | 16 let {Policy} = abprequire( "contentPolicy" ); |
17 | 17 let {RequestNotifier} = abprequire( "requestNotifier" ); |
18 let origProcessNode = Policy.processNode; | 18 let {Filter} = abprequire( "filterClasses" ); |
19 | 19 let {Utils} = abprequire( "utils" ); |
20 let siteTabs; | 20 let {Observation} = require( "instruction" ); |
21 let currentTabs; | 21 |
22 | 22 //------------------------------------------------------- |
23 function processNode(wnd, node, contentType, location, collapse) | 23 // Shim |
24 { | 24 //------------------------------------------------------- |
25 let result = origProcessNode.apply(this, arguments); | 25 /** |
26 let url = (contentType === Policy.type.ELEMHIDE) ? location.text : | 26 * Manager for shim replacement of an external function. |
27 location.spec; | 27 * <p/> |
28 | 28 * Since there's no lvalue reference type in JavaScript (non-primitives are all
reference types, but they are rvalue |
29 let topWindow = wnd.top; | 29 * references), the arguments here provide a substitute. The reference is the ex
pression 'object[ property ]'. |
30 if (!topWindow.document) | 30 * |
31 { | 31 * @param {Object} original_object |
32 Cu.reportError("No document associated with the node's top window"); | 32 * The original function whose call and return are to be surrounded by the
shim. |
| 33 * @param {String} original_property |
| 34 * The original function whose call and return are to be surrounded by the
shim. |
| 35 * @constructor |
| 36 */ |
| 37 var Shim = function( original_object, original_property ) |
| 38 { |
| 39 /** |
| 40 * @type {Object} |
| 41 */ |
| 42 this.original_object = original_object; |
| 43 /** |
| 44 * @type {String} |
| 45 */ |
| 46 this.original_property = original_property; |
| 47 |
| 48 /** |
| 49 * The original function as it exists at the time of instantiation. This mea
ns that generally the Shim instance |
| 50 * should be created as soon as possible, such as in module initialization. |
| 51 */ |
| 52 this.original_function = original_object[ original_property ]; |
| 53 }; |
| 54 |
| 55 /** |
| 56 * @return {boolean} |
| 57 */ |
| 58 Shim.prototype.is_original = function() |
| 59 { |
| 60 return (this.original_object[ this.original_property ] === this.original_fun
ction); |
| 61 }; |
| 62 |
| 63 /** |
| 64 * |
| 65 * @param {Function} replacer |
| 66 * The replacement function transformer. Takes the original function as an
argument and returns its replacement. |
| 67 */ |
| 68 Shim.prototype.replace = function( replacer ) |
| 69 { |
| 70 if ( !replacer ) |
| 71 throw "Must supply a function transformer to supply a replacement functi
on."; |
| 72 if ( !this.is_original() ) |
| 73 throw "This version of Shim does not support multiple replacement."; |
| 74 this.original_object[ this.original_property ] = replacer( this.original_fun
ction ); |
| 75 return this.original_function; |
| 76 }; |
| 77 |
| 78 /** |
| 79 * Reset the original function to a non-replaced state. |
| 80 * <p/> |
| 81 * May be called correctly even if the original has never been replaced. |
| 82 */ |
| 83 Shim.prototype.reset = function() |
| 84 { |
| 85 this.original_object[ this.original_property ] = this.original_function; |
| 86 }; |
| 87 |
| 88 /** |
| 89 * Close out the shim and release resources. |
| 90 */ |
| 91 Shim.prototype.close = function() |
| 92 { |
| 93 this.reset(); |
| 94 /* |
| 95 * At present, this class does not use external resources that aren't dealt
with by 'reset()'. That could change, |
| 96 * however, and so we use close() as the substitute-destructor and reset() f
or ordinary use. |
| 97 */ |
| 98 }; |
| 99 |
| 100 /** |
| 101 * Shim instance for 'processNode'. As of this writing it's the only function in
ABP we're shimming. |
| 102 */ |
| 103 var process_node_shim = new Shim( Policy, "processNode" ); |
| 104 |
| 105 //------------------------------------------------------- |
| 106 // Crawler |
| 107 //------------------------------------------------------- |
| 108 /** |
| 109 * Constructor for a single crawl session. The crawler iterates through each ins
truction, loading its URL in a tab, |
| 110 * running the hooks present in the processor, and storing results accordingly. |
| 111 * |
| 112 * @param {Instruction_Set} instructions |
| 113 * Instruction generator yields a sequence of tuples: URL to crawl, a proce
ssor, and storage. |
| 114 * @param {*} outputs |
| 115 * @param {*} display |
| 116 * @param {Window} window |
| 117 * The top window we're operating it. Must be present as an argument becaus
e the module context this class is |
| 118 * defined in does not have a window. (Or at least should not be relied upo
n.) |
| 119 * @param {boolean} leave_open |
| 120 * @param {number} number_of_tabs |
| 121 */ |
| 122 var Crawler = function( instructions, outputs, display, window, leave_open, numb
er_of_tabs, progress ) |
| 123 { |
| 124 /** |
| 125 * @type {Instruction_Set} |
| 126 */ |
| 127 this.instructions = instructions; |
| 128 |
| 129 this.outputs = outputs; |
| 130 |
| 131 if ( !display ) |
| 132 { |
| 133 throw "No ability to provide a null display object" |
| 134 } |
| 135 /** |
| 136 * Display object for showing progress messages. |
| 137 * @type {*} |
| 138 */ |
| 139 this.display = display; |
| 140 |
| 141 /** |
| 142 * Browser window in which to open tabs. Required because, as a module, we d
on't have a 'Window' object available. |
| 143 * @type {Window} |
| 144 */ |
| 145 this.window = window; |
| 146 |
| 147 this.leave_open = leave_open; |
| 148 |
| 149 if ( number_of_tabs <= 0 ) |
| 150 { |
| 151 /* |
| 152 * Defensive. The caller should have already validated this argument. |
| 153 */ |
| 154 number_of_tabs = 1; |
| 155 } |
| 156 |
| 157 this.progress = progress; |
| 158 |
| 159 if ( !process_node_shim.is_original() ) |
| 160 throw "Function 'processNode' is already shimmed. We may not insert a se
cond one."; |
| 161 process_node_shim.replace( |
| 162 function( original ) |
| 163 { |
| 164 return this.node_action.bind( this, original ); |
| 165 }.bind( this ) |
| 166 ); |
| 167 |
| 168 /** |
| 169 * Logging service. |
| 170 * @type {Logger} |
| 171 */ |
| 172 this.logger = new Logger( "Crawler" ); |
| 173 |
| 174 this.tabbed_browser = new Tabbed_Browser( this.window, number_of_tabs ); |
| 175 |
| 176 /** |
| 177 * Closed flag. Needed to terminate the generator if this object is closed b
efore the generator stops. |
| 178 * @type {Boolean} |
| 179 */ |
| 180 this.closed = false; |
| 181 |
| 182 /** |
| 183 * @type {RequestNotifier} |
| 184 */ |
| 185 this.requestNotifier = new RequestNotifier( null, this.node_entry_action.bin
d( this ) ); |
| 186 |
| 187 /** |
| 188 * The current nodes that are active in a call to 'node_action'. In ordinary
cases, this map has at most the |
| 189 * maximum number of concurrent loads. |
| 190 * @type {WeakMap} |
| 191 */ |
| 192 this.current_nodes = new WeakMap(); |
| 193 |
| 194 this.progress_stats = { |
| 195 active: 0, |
| 196 completed: 0 |
| 197 }; |
| 198 }; |
| 199 exports.Crawler = Crawler; |
| 200 |
| 201 Crawler.prototype.toJSON = function() |
| 202 { |
| 203 return { |
| 204 instructions: this.instructions, |
| 205 storage: this.storage |
| 206 }; |
| 207 }; |
| 208 |
| 209 /** |
| 210 * Close the present instance. This object holds browser resources because of th
e browser tabs it holds open. |
| 211 */ |
| 212 Crawler.prototype.close = function() |
| 213 { |
| 214 for ( let j = 0 ; j < this.outputs.length ; ++j ) |
| 215 { |
| 216 this.outputs[j].storage.close(); |
| 217 } |
| 218 if ( this.tabbed_browser ) this.tabbed_browser.close(); |
| 219 if ( this.requestNotifier ) this.requestNotifier.shutdown(); |
| 220 process_node_shim.reset(); |
| 221 this.closed = true; |
| 222 }; |
| 223 |
| 224 /** |
| 225 * The output encoding for the session as a whole. |
| 226 * @type {*} |
| 227 */ |
| 228 Crawler.prototype.__encoding__ = Encoding.as_object( [ |
| 229 // prelude |
| 230 Encoding.immediate_fields( ["time_start", "instructions"] ), |
| 231 // observation |
| 232 Encoding.field( "trials", Encoding.array_stream() ), |
| 233 // postlude |
| 234 Encoding.immediate_fields( ["time_finish", "termination"] ) |
| 235 ] ); |
| 236 |
| 237 /** |
| 238 * Task generator for the crawler |
| 239 * |
| 240 * @param {Function} pause |
| 241 * @param {Function} resume |
| 242 */ |
| 243 Crawler.prototype.generator = function( pause, resume ) |
| 244 { |
| 245 var log = this.logger.make_log( "task" ); |
| 246 var tab = null; |
| 247 |
| 248 var runaway_counter = 0; |
| 249 |
| 250 try |
| 251 { |
| 252 /* |
| 253 * Preparation code. Ensure that every initialization here can be revers
ed in the 'finally' clause whether |
| 254 * or not it executed, in case some initialization throws an exception. |
| 255 */ |
| 256 this.time_start = Logger.timestamp(); |
| 257 |
| 258 var multiple = new Encoding.Multiple_Format(); |
| 259 for ( let j = 0 ; j < this.outputs.length ; ++j ) |
| 260 { |
| 261 let output = this.outputs[j]; |
| 262 let formatter = new Encoding[ output.encode ]( output.storage.writer
() ); |
| 263 multiple.add( formatter ); |
| 264 } |
| 265 this.encoder = new Encoding.Format_stream( multiple ); |
| 266 |
| 267 this.encoder.write( this ); |
| 268 this.encoder.sequence_start(); |
| 269 |
| 270 let gen = this.instructions.generator(); |
| 271 let instruction = null; // Avoid spurious IDEA warning |
| 272 for ( instruction of gen ) |
| 273 { |
| 274 if ( this.closed ) |
| 275 //noinspection ExceptionCaughtLocallyJS |
| 276 throw StopIteration; |
| 277 |
| 278 if ( this.tabbed_browser.available() ) |
| 279 { |
| 280 /* |
| 281 * Since we'll need a variety of browser-tab behaviors, we'll ne
ed to change this factory call |
| 282 * to something dependent upon the instruction. |
| 283 */ |
| 284 tab = this.tabbed_browser.make_tab( this.leave_open ); |
| 285 tab.instruction = instruction; |
| 286 instruction.begin(); |
| 287 /* |
| 288 * The return value of load is an asynchronous action that could
be combined with others, if the |
| 289 * instruction dictates. There's no hook for this yet, although
that's the reason we do not immediately |
| 290 * execute on calling load. |
| 291 */ |
| 292 tab.load( instruction.target ).go( this._land.bind( this, tab, r
esume ), null ); |
| 293 ++this.progress_stats.active; |
| 294 this.progress.notice( this.progress_stats ); |
| 295 } |
| 296 if ( !this.tabbed_browser.available() ) |
| 297 { |
| 298 pause(); |
| 299 } |
| 300 |
| 301 var cancelled = yield false; |
| 302 if ( cancelled ) |
| 303 { |
| 304 this.display.log( "Crawler cancelled." ); |
| 305 break; |
| 306 } |
| 307 } |
| 308 //this.alert( "Just finished main instruction loop." ); |
| 309 /* |
| 310 * At this point in the code, we have launched all the instructions. If
we're using more than one tab, |
| 311 * we'll generally have open tabs still. We need to pause until we have
no more tabs left open. |
| 312 */ |
| 313 while ( !this.tabbed_browser.quiescent() ) |
| 314 { |
| 315 pause(); |
| 316 // Must yield after pause() for it to take effect |
| 317 cancelled = yield false; |
| 318 if ( cancelled ) |
| 319 { |
| 320 this.display.log( "Crawler cancelled." ); |
| 321 break; |
| 322 } |
| 323 ++runaway_counter; |
| 324 if ( runaway_counter > 100 ) |
| 325 { |
| 326 Cu.reportError( "Runaway pause loop." ); |
| 327 break; |
| 328 } |
| 329 } |
| 330 |
| 331 /* |
| 332 * OK. Finally done. |
| 333 */ |
| 334 this.termination = "ordinary"; |
| 335 } |
| 336 catch ( e if e instanceof Error ) |
| 337 { |
| 338 log( e.toString() + "\n\n" + e.stack ); |
| 339 this.termination = "Error"; |
| 340 } |
| 341 catch ( e ) |
| 342 { |
| 343 log( e.toString() + " - type: " + Object.prototype.toString.call( e ) )
; |
| 344 this.termination = "Unknown exception"; |
| 345 } |
| 346 finally |
| 347 { |
| 348 /* |
| 349 * Finish writing the output before closing ourselves down. |
| 350 */ |
| 351 if ( !( "termination" in this) ) |
| 352 { |
| 353 this.termination = "Success"; |
| 354 } |
| 355 this.time_finish = Logger.timestamp(); |
| 356 this.encoder.sequence_stop(); |
| 357 |
| 358 /* |
| 359 * If everything goes right, this cleanup should not be necessary, as ta
b instances are closed as they are used. |
| 360 * Nonetheless, if there's an error and a landing function is not called
, this line ensures that all the tabs |
| 361 * are properly destroyed. |
| 362 */ |
| 363 if ( tab ) tab.close(); |
| 364 // Removes the ABP shim, amongst other things. |
| 365 this.close(); |
| 366 } |
| 367 }; |
| 368 |
| 369 /** |
| 370 * Landing function for the asynchronous action of loading a tab. For some reaso
ns, Firefox is delivering the |
| 371 * STATE_STOP progress message before the last ABP filter is being run. It seems
that it's firing events immediately, |
| 372 * once it knows the request has finished its HTTP transfer, but before it has f
ully finished loading the page as a |
| 373 * whole (the DOM, layout, etc.). Hence we let the browser finish its work in th
e current thread and run the actual |
| 374 * load-end action afterwards. |
| 375 * <p/> |
| 376 * The implementation of this function allows it to be defined without arguments
. That's not what actually happens. |
| 377 * Since this function is just a scheduling pass-through, it uses 'arguments' to
pass all arguments, no matter what they |
| 378 * are. (And no matter how they change over time.) |
| 379 */ |
| 380 Crawler.prototype._land = function() |
| 381 { |
| 382 /* |
| 383 * The first argument is the 'this' object when 'apply' runs. The second arg
ument is the 'this' object when |
| 384 * 'this._load_end_action' runs. |
| 385 */ |
| 386 Utils.threadManager.currentThread.dispatch( |
| 387 { run: Function.prototype.apply.bind( this._load_end_action, this, argum
ents )}, |
| 388 Ci.nsIEventTarget.DISPATCH_NORMAL ); |
| 389 }; |
| 390 |
| 391 /** |
| 392 * Action at the end of loading a tab. |
| 393 * |
| 394 * @param tab |
| 395 * @param {Function} resume |
| 396 */ |
| 397 Crawler.prototype._load_end_action = function( tab, resume ) |
| 398 { |
| 399 var instruction = tab.instruction; |
| 400 tab.instruction.end(); |
| 401 tab.close(); |
| 402 this.encoder.sequence_send( instruction ); |
| 403 --this.progress_stats.active; |
| 404 ++this.progress_stats.completed; |
| 405 this.progress.notice( this.progress_stats ); |
| 406 resume(); |
| 407 }; |
| 408 |
| 409 /** |
| 410 * Shim for 'processNode' in ABP. Executes once for each node that ABP processes
, whether or not it acts on that node. |
| 411 * |
| 412 * @param {Function} original_f |
| 413 * The original processNode function. |
| 414 * @param {nsIDOMWindow} wnd |
| 415 * @param {nsIDOMElement} node |
| 416 * @param {Number} contentType |
| 417 * @param {nsIURI} location |
| 418 * @param {Boolean} collapse |
| 419 * true to force hiding of the node |
| 420 * @return {Boolean} false if the node should be blocked |
| 421 */ |
| 422 Crawler.prototype.node_action = function( original_f, wnd, node, contentType, lo
cation, collapse ) |
| 423 { |
| 424 //var log = this.logger.make_log( "node_action" ); |
| 425 |
| 426 /* |
| 427 * Set up collecting for node_entry_action(). It should be the case that a n
ode matches either 0 or 1 filters. |
| 428 * The collection array 'entries' allows more than 1 to be recorded, and for
such activity to be detected and |
| 429 * reported rather than inducing an observation error. |
| 430 */ |
| 431 var entries = []; |
| 432 var entry_hook = function( node, windows, entry ) |
| 433 { |
| 434 entries.push( { node: node, windows: windows, entry: entry } ); |
| 435 }; |
| 436 this.current_nodes.set( node, entry_hook ); |
| 437 |
| 438 /* |
| 439 * Call the original processNode. If the original throws, then we will too,
so this is outside a try clause. |
| 440 */ |
| 441 var result = original_f( wnd, node, contentType, location, collapse ); |
| 442 |
| 443 try |
| 444 { |
| 445 let instruction = null; // Initialize here in case locate_instructio
n() throws. |
| 446 try |
| 447 { |
| 448 instruction = this.locate_instruction( wnd ); |
| 449 } |
| 450 catch ( e ) |
| 451 { |
| 452 Cu.reportError( "Crawler/node_action: error locating instruction: "
+ e.toString() |
| 453 + ( ( "stack" in e ) ? ( "\n\tstack = " + e.stack) : "" ) |
| 454 ); |
| 455 return result; |
| 456 } |
| 457 if ( !instruction ) |
| 458 { |
| 459 /* |
| 460 * If we don't have an instruction, we don't report this node. This
is by design, because reporting is |
| 461 * the responsibility of the instruction object. |
| 462 */ |
| 463 return result; |
| 464 } |
| 465 if ( entries.length == 0 && !instruction.observing_all_nodes() ) |
| 466 { |
| 467 // Assert we didn't touch this node and the instruction doesn't want
to see it |
| 468 return result; |
| 469 } |
| 470 try |
| 471 { |
| 472 var observation = new Observation( |
| 473 !result, contentType, |
| 474 (contentType == Policy.type.ELEMHIDE) ? location.text : location
.spec, |
| 475 entries |
| 476 ); |
| 477 instruction.observe_node( observation ); |
| 478 } |
| 479 catch ( e ) |
| 480 { |
| 481 Cu.reportError( "Crawler/node_action: error recording observation: "
+ e.toString() ); |
| 482 return result; |
| 483 } |
| 484 } |
| 485 finally |
| 486 { |
| 487 /* |
| 488 * This 'finally' clause ensures that we remove the node from 'this.curr
ent_nodes'. Even though it's a weak map, |
| 489 * we need to remove the key so that 'entry_hook' is not called inadvert
ently. |
| 490 */ |
| 491 this.current_nodes.delete( node ); |
| 492 } |
33 return result; | 493 return result; |
34 } | 494 }; |
35 | 495 |
36 let tabbrowser = Utils.getChromeWindow(topWindow).gBrowser; | 496 /** |
37 if (!tabbrowser) | 497 * Locate our instruction associated with a window that caused to load. First we
find the browser associated with the |
38 { | 498 * window. There should always be one of these, otherwise we have an error. From
the browser, we locate our tab |
39 Cu.reportError("Unable to get a tabbrowser reference"); | 499 * associated with it, which need not be present. Finally, we locate the instruc
tion as a tab member, which should |
40 return result; | 500 * always exist. |
41 } | 501 * <p/> |
42 | 502 * This is called only in node_action(). It's separate to simplify the control f
low. |
43 let browser = tabbrowser.getBrowserForDocument(topWindow.document); | 503 * |
44 if (!browser) | 504 * @param window |
45 { | 505 * @return {Instruction_class} |
46 Cu.reportError("Unable to get browser for the tab"); | 506 */ |
47 return result; | 507 Crawler.prototype.locate_instruction = function( window ) |
48 } | 508 { |
49 | 509 let topWindow = window.top; |
50 let site = siteTabs.get(browser); | 510 if ( !topWindow.document ) |
51 let filtered = !result; | 511 throw new Error( "No document associated with the node's top window" ); |
52 Storage.write([url, site, filtered]); | 512 let tabbrowser = Utils.getChromeWindow( topWindow ).gBrowser; |
53 return result; | 513 if ( !tabbrowser ) |
| 514 throw new Error( "Unable to get a tabbrowser reference from the window"
); |
| 515 let browser = tabbrowser.getBrowserForDocument( topWindow.document ); |
| 516 if ( !browser ) |
| 517 throw new Error( "Unable to get browser for the tab" ); |
| 518 if ( !this.tabbed_browser.map_browser_to_child.has( browser ) ) |
| 519 { |
| 520 /* |
| 521 * It's not an error for the browser not to appear in this map. If the t
ab is remains open past the time |
| 522 * we are monitoring (either on purpose or as the result of a quirk of t
iming), we simply return a null |
| 523 * instruction. Nevertheless, the code to report this to the console rem
ains in place, commented out, because |
| 524 * it's likely to be relevant still during development. |
| 525 */ |
| 526 // Cu.reportError( |
| 527 // "Crawler.node_action: Browser not found in internal map. " + Log
ger.timestamp() |
| 528 // + "\nlocation=" + url_location |
| 529 // ); |
| 530 // this.logger.stack_trace(); |
| 531 return null; |
| 532 } |
| 533 var tab = this.tabbed_browser.map_browser_to_child.get( browser ).child; |
| 534 if ( !("instruction" in tab) ) |
| 535 throw new Error( "'instruction' not found as member of tab object" ); |
| 536 return tab.instruction; |
| 537 }; |
| 538 |
| 539 /** |
| 540 * This function executes solely underneath (in the call stack) 'node_action'. I
t receives at least one call per node, |
| 541 * more if there are matches on rules of any kind. |
| 542 * |
| 543 * @param window |
| 544 * @param node |
| 545 * @param {RequestEntry} entry |
| 546 */ |
| 547 Crawler.prototype.node_entry_action = function( window, node, entry ) |
| 548 { |
| 549 if ( !this.current_nodes.has( node ) ) |
| 550 { |
| 551 Cu.reportError( "node_entry_action: node not seen in 'current_nodes'" ); |
| 552 return; |
| 553 } |
| 554 if ( !entry.filter ) |
| 555 { |
| 556 /* |
| 557 * If there's no filter in the entry, then nothing happened to it. We ar
e presently ignoring such entries. In |
| 558 * the future, however, we will likely want a hook here to process entri
es that are not associated with any |
| 559 * filter, for example, to ensure that necessary content is not blocked
inadvertently. |
| 560 */ |
| 561 return; |
| 562 } |
| 563 var windows = []; |
| 564 var n = 0; |
| 565 while ( window != null ) |
| 566 { |
| 567 if ( ++n > 100 ) |
| 568 { |
| 569 // Houston, we have a problem. |
| 570 windows = null; |
| 571 Cu.reportError( "Crawler/node_entry_action: runaway window chain" ); |
| 572 break; |
| 573 } |
| 574 windows.push( window ); |
| 575 if ( window === window.parent ) |
| 576 { |
| 577 // This is the ordinary statement to exit the loop. |
| 578 break; |
| 579 } |
| 580 window = window.parent; |
| 581 } |
| 582 this.current_nodes.get( node )( node, windows, entry ); |
| 583 }; |
| 584 |
| 585 |
| 586 function shutdown_crawler() |
| 587 { |
| 588 process_node_shim.close(); |
54 } | 589 } |
55 | 590 |
56 function loadSite(site, window, callback) | 591 try |
57 { | 592 { |
58 if (!site) | 593 onShutdown.add( shutdown_crawler ); |
59 return; | |
60 | |
61 let tabbrowser = window.gBrowser; | |
62 let tab = tabbrowser.addTab(site); | |
63 let browser = tabbrowser.getBrowserForTab(tab); | |
64 | |
65 siteTabs.set(browser, site); | |
66 | |
67 let progressListener = { | |
68 onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStat
us) | |
69 { | |
70 if (browser !== aBrowser) | |
71 return; | |
72 | |
73 if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP)) | |
74 return; | |
75 | |
76 tabbrowser.removeTabsProgressListener(progressListener); | |
77 tabbrowser.removeTab(tab); | |
78 callback(); | |
79 } | |
80 }; | |
81 tabbrowser.addTabsProgressListener(progressListener); | |
82 } | 594 } |
83 | 595 catch ( e ) |
84 function loadSites(backendUrl, parallelTabs, window, sites, callback) | 596 { |
85 { | 597 Cu.reportError( "Failure adding shutdown function. error = \"" + e.message +
"\"" ); |
86 while (currentTabs < parallelTabs && sites.length) | |
87 { | |
88 currentTabs++; | |
89 let site = sites.shift(); | |
90 loadSite(site, window, function() | |
91 { | |
92 currentTabs--; | |
93 if (!sites.length && !currentTabs) | |
94 { | |
95 Storage.finish(); | |
96 let dataFilePath = Storage.dataFile.path; | |
97 Client.sendCrawlerDataFile(backendUrl, dataFilePath, function() | |
98 { | |
99 Storage.destroy(); | |
100 callback(); | |
101 }); | |
102 } | |
103 else | |
104 loadSites(backendUrl, parallelTabs, window, sites, callback); | |
105 }); | |
106 } | |
107 } | 598 } |
108 | |
109 let Crawler = exports.Crawler = {}; | |
110 | |
111 Crawler.crawl = function(backendUrl, parallelTabs, window, callback) | |
112 { | |
113 if (Policy.processNode != origProcessNode) | |
114 return; | |
115 | |
116 Policy.processNode = processNode; | |
117 | |
118 siteTabs = new WeakMap(); | |
119 currentTabs = 0; | |
120 | |
121 Storage.init(); | |
122 | |
123 Client.fetchCrawlableSites(backendUrl, function(sites) | |
124 { | |
125 loadSites(backendUrl, parallelTabs, window, sites, function() | |
126 { | |
127 Policy.processNode = origProcessNode; | |
128 siteTabs = null; | |
129 callback(); | |
130 }); | |
131 }); | |
132 }; | |
OLD | NEW |