OLD | NEW |
1 Cu.import("resource://gre/modules/Services.jsm"); | 1 Cu.import( "resource://gre/modules/Services.jsm" ); |
2 | 2 |
3 function abprequire(module) | 3 function abprequire( module ) |
4 { | 4 { |
5 let result = {}; | 5 let result = {}; |
6 result.wrappedJSObject = result; | 6 result.wrappedJSObject = result; |
7 Services.obs.notifyObservers(result, "adblockplus-require", module); | 7 Services.obs.notifyObservers( result, "adblockplus-require", module ); |
8 return result.exports; | 8 return result.exports; |
9 } | 9 } |
10 | 10 |
11 let {Storage} = require("storage"); | 11 let { Action } = require( "action" ); |
12 let {Client} = require("client"); | 12 let { Browser_Tab, Tabbed_Browser } = require( "browser" ); |
13 | 13 let { Observation } = require( "instruction" ); |
14 let {Policy} = abprequire("contentPolicy"); | 14 let { Encoding } = require( "encoding" ); |
15 let {Filter} = abprequire("filterClasses"); | 15 let { Logger } = require( "logger" ); |
16 let {Utils} = abprequire("utils"); | 16 |
17 | 17 let { Policy } = abprequire( "contentPolicy" ); |
18 let origProcessNode = Policy.processNode; | 18 let { RequestNotifier } = abprequire( "requestNotifier" ); |
19 | 19 let { Filter } = abprequire( "filterClasses" ); |
20 let siteTabs; | 20 let { Utils } = abprequire( "utils" ); |
21 let currentTabs; | 21 |
22 | 22 //------------------------------------------------------- |
23 function processNode(wnd, node, contentType, location, collapse) | 23 // Shim |
24 { | 24 //------------------------------------------------------- |
25 let result = origProcessNode.apply(this, arguments); | 25 /** |
26 let url = (contentType === Policy.type.ELEMHIDE) ? location.text : | 26 * Manager for shim replacement of an external function. |
27 location.spec; | 27 * <p/> |
28 | 28 * Since there's no lvalue reference type in JavaScript (non-primitives are all
reference types, but they are rvalue |
29 let topWindow = wnd.top; | 29 * references), the arguments here provide a substitute. The reference is the ex
pression 'object[ property ]'. |
30 if (!topWindow.document) | 30 * |
31 { | 31 * @param {Object} original_object |
32 Cu.reportError("No document associated with the node's top window"); | 32 * The original function whose call and return are to be surrounded by the
shim. |
33 return result; | 33 * @param {string} original_property |
34 } | 34 * The original function whose call and return are to be surrounded by the
shim. |
35 | 35 * @constructor |
36 let tabbrowser = Utils.getChromeWindow(topWindow).gBrowser; | 36 */ |
37 if (!tabbrowser) | 37 var Shim = function( original_object, original_property ) |
38 { | 38 { |
39 Cu.reportError("Unable to get a tabbrowser reference"); | 39 /** |
40 return result; | 40 * @type {Object} |
41 } | 41 */ |
42 | 42 this.original_object = original_object; |
43 let browser = tabbrowser.getBrowserForDocument(topWindow.document); | 43 /** |
44 if (!browser) | 44 * @type {String} |
45 { | 45 */ |
46 Cu.reportError("Unable to get browser for the tab"); | 46 this.original_property = original_property; |
47 return result; | 47 |
48 } | 48 /** |
49 | 49 * The original function as it exists at the time of instantiation. This means
that generally the Shim instance |
50 let site = siteTabs.get(browser); | 50 * should be created as soon as possible, such as in module initialization. |
51 let filtered = !result; | 51 */ |
52 Storage.write([url, site, filtered]); | 52 this.original_function = original_object[ original_property ]; |
| 53 }; |
| 54 |
| 55 /** |
| 56 * @return {boolean} |
| 57 */ |
| 58 Shim.prototype.is_original = function() |
| 59 { |
| 60 return (this.original_object[ this.original_property ] === this.original_funct
ion); |
| 61 }; |
| 62 |
| 63 /** |
| 64 * |
| 65 * @param {Function} replacer |
| 66 * The replacement function transformer. Takes the original function as an
argument and returns its replacement. |
| 67 */ |
| 68 Shim.prototype.replace = function( replacer ) |
| 69 { |
| 70 if ( !replacer ) |
| 71 throw "Must supply a function transformer to supply a replacement function."
; |
| 72 if ( !this.is_original() ) |
| 73 throw "This version of Shim does not support multiple replacement."; |
| 74 this.original_object[ this.original_property ] = replacer( this.original_funct
ion ); |
| 75 return this.original_function; |
| 76 }; |
| 77 |
| 78 /** |
| 79 * Reset the original function to a non-replaced state. |
| 80 * <p/> |
| 81 * May be called correctly even if the original has never been replaced. |
| 82 */ |
| 83 Shim.prototype.reset = function() |
| 84 { |
| 85 this.original_object[ this.original_property ] = this.original_function; |
| 86 }; |
| 87 |
| 88 /** |
| 89 * Close out the shim and release resources. |
| 90 */ |
| 91 Shim.prototype.close = function() |
| 92 { |
| 93 this.reset(); |
| 94 /* |
| 95 * At present, this class does not use external resources that aren't dealt wi
th by 'reset()'. That could change, |
| 96 * however, and so we use close() as the substitute-destructor and reset() for
ordinary use. |
| 97 */ |
| 98 }; |
| 99 |
| 100 /** |
| 101 * Shim instance for 'processNode'. As of this writing it's the only function in
ABP we're shimming. |
| 102 */ |
| 103 var process_node_shim = new Shim( Policy, "processNode" ); |
| 104 |
| 105 //------------------------------------------------------- |
| 106 // Crawler |
| 107 //------------------------------------------------------- |
| 108 /** |
| 109 * Constructor for a single crawl session. The crawler iterates through each ins
truction, loading its URL in a tab, |
| 110 * running the hooks present in the processor, and storing results accordingly. |
| 111 * |
| 112 * @param {Instruction_Set} instructions |
| 113 * Instruction generator yields a sequence of tuples: URL to crawl, a proce
ssor, and storage. |
| 114 * @param {*} outputs |
| 115 * @param {Window} window |
| 116 * The top window we're operating it. Must be present as an argument because
the module context this class is |
| 117 * defined in does not have a window. (Or at least should not be relied upon.
) 1 |
| 118 * @param {number} time_limit |
| 119 * The maximum duration that we will allow a page to try to load. |
| 120 * @param {boolean} leave_open |
| 121 * @param {number} number_of_tabs |
| 122 */ |
| 123 var Crawler = function( instructions, outputs, window, time_limit, leave_open, n
umber_of_tabs ) |
| 124 { |
| 125 /** |
| 126 * @type {Instruction_Set} |
| 127 */ |
| 128 this.instructions = instructions; |
| 129 |
| 130 this.outputs = outputs; |
| 131 |
| 132 /** |
| 133 * Browser window in which to open tabs. Required because, as a module, we don
't have a 'Window' object available. |
| 134 * @type {Window} |
| 135 */ |
| 136 this.window = window; |
| 137 |
| 138 this.time_limit = time_limit; |
| 139 |
| 140 this.leave_open = leave_open; |
| 141 |
| 142 if ( number_of_tabs <= 0 ) |
| 143 { |
| 144 /* |
| 145 * Defensive. The caller should have already validated this argument. |
| 146 */ |
| 147 number_of_tabs = 1; |
| 148 } |
| 149 |
| 150 /** |
| 151 * Progress object. It's simple enough not to need its own class. Just overrid
e the notice() function to receive |
| 152 * progress notices. |
| 153 */ |
| 154 this.progress = { |
| 155 active: 0, |
| 156 completed: 0, |
| 157 total: instructions.size, |
| 158 notice: function() |
| 159 { |
| 160 }, |
| 161 status: function() |
| 162 { |
| 163 } |
| 164 }; |
| 165 |
| 166 if ( !process_node_shim.is_original() ) |
| 167 throw "Function 'processNode' is already shimmed. We may not insert a second
one."; |
| 168 process_node_shim.replace( |
| 169 function( original ) |
| 170 { |
| 171 return this.node_action.bind( this, original ); |
| 172 }.bind( this ) |
| 173 ); |
| 174 |
| 175 /** |
| 176 * Logging service. |
| 177 * @type {Logger} |
| 178 */ |
| 179 this.logger = new Logger( "Crawler" ); |
| 180 |
| 181 this.tabbed_browser = new Tabbed_Browser( this.window, number_of_tabs ); |
| 182 |
| 183 /** |
| 184 * Closed flag. Needed to terminate the generator if this object is closed bef
ore the generator stops. |
| 185 * @type {Boolean} |
| 186 */ |
| 187 this.closed = false; |
| 188 |
| 189 /** |
| 190 * The object responsible for gaining access to the call stream for individual
entries within each node. This is |
| 191 * one of two hooks into ABP itself, the other being the shim for 'processNode
'. |
| 192 * |
| 193 * @type {RequestNotifier} |
| 194 */ |
| 195 this.requestNotifier = new RequestNotifier( null, this.node_entry_action.bind(
this ) ); |
| 196 |
| 197 /** |
| 198 * The current nodes that are active in a call to 'node_action'. In ordinary c
ases, this map has at most the |
| 199 * maximum number of concurrent loads. |
| 200 * @type {WeakMap} |
| 201 */ |
| 202 this.current_nodes = new WeakMap(); |
| 203 }; |
| 204 exports.Crawler = Crawler; |
| 205 |
| 206 Crawler.prototype.toJSON = function() |
| 207 { |
| 208 return { |
| 209 instructions: this.instructions, |
| 210 storage: this.storage |
| 211 }; |
| 212 }; |
| 213 |
| 214 /** |
| 215 * Close the present instance. This object holds browser resources because of th
e browser tabs it holds open. |
| 216 */ |
| 217 Crawler.prototype.close = function() |
| 218 { |
| 219 for ( let j = 0 ; j < this.outputs.length ; ++j ) |
| 220 { |
| 221 this.outputs[j].storage.close(); |
| 222 } |
| 223 if ( this.tabbed_browser ) this.tabbed_browser.close(); |
| 224 if ( this.requestNotifier ) this.requestNotifier.shutdown(); |
| 225 process_node_shim.reset(); |
| 226 this.closed = true; |
| 227 }; |
| 228 |
| 229 /** |
| 230 * The output encoding for the session as a whole. |
| 231 * @type {*} |
| 232 */ |
| 233 Crawler.prototype.__encoding__ = Encoding.as_object( [ |
| 234 // prelude |
| 235 Encoding.immediate_fields( ["time_start", "instructions"] ), |
| 236 // observation |
| 237 Encoding.field( "trials", Encoding.array_stream() ), |
| 238 // postlude |
| 239 Encoding.immediate_fields( ["time_finish", "termination"] ) |
| 240 ] ); |
| 241 |
| 242 /** |
| 243 * Task generator for the crawler |
| 244 * |
| 245 * @param {Function} pause |
| 246 * @param {Function} resume |
| 247 */ |
| 248 Crawler.prototype.generator = function( pause, resume ) |
| 249 { |
| 250 /* |
| 251 * A crawler object represent a single run of the crawler. Thus the pause and
resume function act like object-scope |
| 252 * variables. |
| 253 */ |
| 254 this.pause = pause; |
| 255 this.resume = resume; |
| 256 var log = this.logger.make_log( "task" ); |
| 257 var tab = null; |
| 258 |
| 259 var runaway_counter = 0; |
| 260 |
| 261 try |
| 262 { |
| 263 /* |
| 264 * Preparation code. Ensure that every initialization here can be reversed i
n the 'finally' clause whether |
| 265 * or not it executed, in case some initialization throws an exception. |
| 266 */ |
| 267 this.time_start = Logger.timestamp(); |
| 268 |
| 269 var multiple = new Encoding.Multiple_Format(); |
| 270 for ( let j = 0 ; j < this.outputs.length ; ++j ) |
| 271 { |
| 272 let output = this.outputs[j]; |
| 273 let formatter = new Encoding[ output.encode ]( output.storage.writer() ); |
| 274 multiple.add( formatter ); |
| 275 } |
| 276 this.encoder = new Encoding.Format_stream( multiple ); |
| 277 |
| 278 this.encoder.write( this ); |
| 279 this.encoder.sequence_start(); |
| 280 |
| 281 let gen = this.instructions.generator(); |
| 282 let instruction = null; // Avoid spurious IDEA warning |
| 283 for ( instruction of gen ) |
| 284 { |
| 285 if ( this.closed ) |
| 286 { |
| 287 /* |
| 288 * Defensive. We only arrive here if some outside code has called our cl
ose() method and did not also |
| 289 * order our cancellation. Regardless, we're done making new tabs. |
| 290 */ |
| 291 Cu.reportError( "Crawler closed but its enclosing task not cancelled." )
; |
| 292 break; |
| 293 } |
| 294 |
| 295 if ( this.tabbed_browser.available() ) |
| 296 { |
| 297 /* |
| 298 * Since we'll need a variety of browser-tab behaviors, we'll need to ch
ange this factory call |
| 299 * to something dependent upon the instruction. |
| 300 */ |
| 301 tab = this.tabbed_browser.make_tab( instruction.target, this.leave_open,
this._deferred_load_finisher.bind( this ), null ); |
| 302 tab.instruction = instruction; |
| 303 instruction.begin(); |
| 304 let join = new Action.Join_Timeout( tab, this.time_limit, this._join_fin
isher.bind( this ) ); |
| 305 join.go( tab, resume ); |
| 306 /* |
| 307 * The return value of load is an asynchronous action that could be comb
ined with others, if the |
| 308 * instruction dictates. There's no hook for this yet, although that's t
he reason we do not immediately |
| 309 * execute on calling load. |
| 310 */ |
| 311 tab.go(); |
| 312 ++this.progress.active; |
| 313 this.progress.notice(); |
| 314 } |
| 315 if ( !this.tabbed_browser.available() ) |
| 316 { |
| 317 pause(); |
| 318 } |
| 319 |
| 320 var cancelled = yield false; |
| 321 if ( cancelled ) |
| 322 { |
| 323 break; |
| 324 } |
| 325 } |
| 326 /* |
| 327 * At this point in the code, we have launched all the instructions. If we'r
e using more than one tab, |
| 328 * we'll generally have open tabs still. We need to pause until we have no m
ore tabs left open. |
| 329 */ |
| 330 if ( !cancelled ) |
| 331 { |
| 332 while ( !this.tabbed_browser.quiescent() ) |
| 333 { |
| 334 // Must yield after pause() for it to take effect |
| 335 pause(); |
| 336 cancelled = yield false; |
| 337 if ( cancelled ) |
| 338 { |
| 339 break; |
| 340 } |
| 341 ++runaway_counter; |
| 342 if ( runaway_counter > 100 ) |
| 343 { |
| 344 Cu.reportError( "Runaway pause loop. counter = " + runaway_counter ); |
| 345 break; |
| 346 } |
| 347 } |
| 348 } |
| 349 |
| 350 /* |
| 351 * OK. Finally done. |
| 352 */ |
| 353 this.termination = cancelled ? "Cancelled" : "Success"; |
| 354 this.progress.status( cancelled ? "Cancelled" : "Done" ); |
| 355 } |
| 356 catch ( e if e instanceof Error ) |
| 357 { |
| 358 log( e.toString() + "\n\n" + e.stack ); |
| 359 this.termination = "Error"; |
| 360 } |
| 361 catch ( e ) |
| 362 { |
| 363 log( e.toString() + " - type: " + Object.prototype.toString.call( e ) ); |
| 364 this.termination = "Unknown exception"; |
| 365 } |
| 366 finally |
| 367 { |
| 368 /* |
| 369 * Finish writing the output before closing ourselves down. |
| 370 */ |
| 371 this.time_finish = Logger.timestamp(); |
| 372 this.encoder.sequence_stop(); |
| 373 |
| 374 /* |
| 375 * If everything goes right, this cleanup should not be necessary, as tab in
stances are closed as they are used. |
| 376 * Nonetheless, if there's an error and a landing function is not called, th
is line ensures that all the tabs |
| 377 * are properly destroyed. |
| 378 */ |
| 379 if ( tab ) tab.close(); |
| 380 // Removes the ABP shim, amongst other things. |
| 381 this.close(); |
| 382 } |
| 383 }; |
| 384 |
| 385 /** |
| 386 * Landing function for the asynchronous action of loading a tab. For some reaso
ns, Firefox is delivering the |
| 387 * STATE_STOP progress message before the last ABP filter is being run. It seems
that it's firing events immediately, |
| 388 * once it knows the request has finished its HTTP transfer, but before it has f
ully finished loading the page as a |
| 389 * whole (the DOM, layout, etc.). Hence we let the browser finish its work in th
e current thread and run the actual |
| 390 * load-end action afterwards. |
| 391 * <p/> |
| 392 * The implementation of this function allows it to be defined without arguments
. That's not what actually happens. |
| 393 * Since this function is just a scheduling pass-through, it uses 'arguments' to
pass all arguments, no matter what they |
| 394 * are. (And no matter how they change over time.) |
| 395 */ |
| 396 Crawler.prototype._deferred_load_finisher = function() |
| 397 { |
| 398 /* |
| 399 * The first argument is the 'this' object when 'apply' runs. The second argum
ent is the 'this' object when |
| 400 * 'this._load_end_action' runs. |
| 401 */ |
| 402 Action.dispatch( Function.prototype.apply.bind( this._load_finisher, this, arg
uments ) ); |
| 403 }; |
| 404 |
| 405 /** |
| 406 * Since we're done loading (the cause doesn't matter), we order the instruction
to write out its results, be they |
| 407 * successful or any of the varieties of unsuccessful. |
| 408 */ |
| 409 Crawler.prototype._load_finisher = function( tab, completion_state, error_code ) |
| 410 { |
| 411 var instruction = tab.instruction; |
| 412 if ( tab.completed ) |
| 413 { |
| 414 if ( tab.completed_well ) |
| 415 { |
| 416 switch ( completion_state ) |
| 417 { |
| 418 case Browser_Tab.Completion_State.Success: |
| 419 instruction.end(); |
| 420 break; |
| 421 case Browser_Tab.Completion_State.No_Success: |
| 422 instruction.abort( "unsuccessful load. nsresult = " + error_code ); |
| 423 break; |
| 424 case Browser_Tab.Completion_State.User_Close: |
| 425 instruction.abort( "user closed tab" ); |
| 426 break; |
| 427 case Browser_Tab.Completion_State.External_Cancel: |
| 428 instruction.abort( "timed out" ); |
| 429 break; |
| 430 default: |
| 431 instruction.abort( "WTF?" ); |
| 432 break; |
| 433 } |
| 434 } |
| 435 else |
| 436 { |
| 437 instruction.abort( "exception. message = " + tab.exception.message ); |
| 438 } |
| 439 } |
| 440 else |
| 441 { |
| 442 // Defensive. Should not reach. |
| 443 instruction.abort( "tab load not completed. Huh?" ); |
| 444 } |
| 445 this.encoder.sequence_send( instruction ); |
| 446 |
| 447 tab.close(); |
| 448 --this.progress.active; |
| 449 ++this.progress.completed; |
| 450 this.progress.notice(); |
| 451 this.resume(); |
| 452 }; |
| 453 |
| 454 /** |
| 455 * The join finisher merely ensures that the tab load action actually completes,
stopping it if it hasn't completed yet. |
| 456 * |
| 457 * @param tab |
| 458 */ |
| 459 Crawler.prototype._join_finisher = function( tab ) |
| 460 { |
| 461 /* |
| 462 * If the join timeout caused completion, we must assume that the tab is still
loading. If the timeout did not fire, |
| 463 * then the tab action completed. In all cases, the tab will be complete after
wards. Thus because Join_Timeout is |
| 464 * reliable, means that we have made tab-load reliable also. |
| 465 */ |
| 466 if ( !tab.complete ) |
| 467 { |
| 468 tab.stop(); |
| 469 } |
| 470 }; |
| 471 |
| 472 //---------------------------------- |
| 473 // Data gathering functions |
| 474 //---------------------------------- |
| 475 /** |
| 476 * Shim for 'processNode' in ABP. Executes once for each node that ABP processes
, whether or not it acts on that node. |
| 477 * |
| 478 * @param {Function} original_f |
| 479 * The original processNode function. |
| 480 * @param {nsIDOMWindow} wnd |
| 481 * @param {nsIDOMElement} node |
| 482 * @param {Number} contentType |
| 483 * @param {nsIURI} location |
| 484 * @param {Boolean} collapse |
| 485 * true to force hiding of the node |
| 486 * @return {Boolean} false if the node should be blocked |
| 487 */ |
| 488 Crawler.prototype.node_action = function( original_f, wnd, node, contentType, lo
cation, collapse ) |
| 489 { |
| 490 //var log = this.logger.make_log( "node_action" ); |
| 491 |
| 492 /* |
| 493 * Set up collecting for node_entry_action(). It should be the case that a nod
e matches either 0 or 1 filters. |
| 494 * The collection array 'entries' allows more than 1 to be recorded, and for s
uch activity to be detected and |
| 495 * reported rather than inducing an observation error. |
| 496 */ |
| 497 var entries = []; |
| 498 var entry_hook = function( node, windows, entry ) |
| 499 { |
| 500 entries.push( { node: node, windows: windows, entry: entry } ); |
| 501 }; |
| 502 this.current_nodes.set( node, entry_hook ); |
| 503 |
| 504 /* |
| 505 * Call the original processNode. If the original throws, then we will too, so
this is outside a try clause. |
| 506 */ |
| 507 var result = original_f( wnd, node, contentType, location, collapse ); |
| 508 |
| 509 try |
| 510 { |
| 511 let instruction = null; // Initialize here in case locate_instruction()
throws. |
| 512 try |
| 513 { |
| 514 instruction = this.locate_instruction( wnd ); |
| 515 } |
| 516 catch ( e ) |
| 517 { |
| 518 Cu.reportError( "Crawler/node_action: error locating instruction: " + e.to
String() |
| 519 + ( ( "stack" in e ) ? ( "\n\tstack = " + e.stack) : "" ) |
| 520 ); |
| 521 return result; |
| 522 } |
| 523 if ( !instruction ) |
| 524 { |
| 525 /* |
| 526 * If we don't have an instruction, we don't report this node. This is by
design, because reporting is |
| 527 * the responsibility of the instruction object. |
| 528 */ |
| 529 return result; |
| 530 } |
| 531 if ( entries.length == 0 && !instruction.observing_all_nodes() ) |
| 532 { |
| 533 // Assert we didn't touch this node and the instruction doesn't want to se
e it |
| 534 return result; |
| 535 } |
| 536 try |
| 537 { |
| 538 var observation = new Observation( |
| 539 !result, contentType, |
| 540 (contentType == Policy.type.ELEMHIDE) ? location.text : location.spec, |
| 541 entries |
| 542 ); |
| 543 instruction.observe_node( observation ); |
| 544 } |
| 545 catch ( e ) |
| 546 { |
| 547 Cu.reportError( "Crawler/node_action: error recording observation: " + e.t
oString() ); |
| 548 return result; |
| 549 } |
| 550 } |
| 551 finally |
| 552 { |
| 553 /* |
| 554 * This 'finally' clause ensures that we remove the node from 'this.current_
nodes'. Even though it's a weak map, |
| 555 * we need to remove the key so that 'entry_hook' is not called inadvertentl
y. |
| 556 */ |
| 557 this.current_nodes.delete( node ); |
| 558 } |
53 return result; | 559 return result; |
| 560 }; |
| 561 |
| 562 /** |
| 563 * Locate our instruction associated with a window that caused to load. First we
find the browser associated with the |
| 564 * window. There should always be one of these, otherwise we have an error. From
the browser, we locate our tab |
| 565 * associated with it, which need not be present. Finally, we locate the instruc
tion as a tab member, which should |
| 566 * always exist. |
| 567 * <p/> |
| 568 * This is called only in node_action(). It's separate to simplify the control f
low. |
| 569 * |
| 570 * @param window |
| 571 * @return {Instruction_class} |
| 572 */ |
| 573 Crawler.prototype.locate_instruction = function( window ) |
| 574 { |
| 575 let topWindow = window.top; |
| 576 if ( !topWindow.document ) |
| 577 throw new Error( "No document associated with the node's top window" ); |
| 578 let tabbrowser = Utils.getChromeWindow( topWindow ).gBrowser; |
| 579 if ( !tabbrowser ) |
| 580 throw new Error( "Unable to get a tabbrowser reference from the window" ); |
| 581 let browser = tabbrowser.getBrowserForDocument( topWindow.document ); |
| 582 if ( !browser ) |
| 583 throw new Error( "Unable to get browser for the tab" ); |
| 584 if ( !this.tabbed_browser.map_browser_to_child.has( browser ) ) |
| 585 { |
| 586 /* |
| 587 * It's not an error for the browser not to appear in this map. If the tab i
s remains open past the time |
| 588 * we are monitoring (either on purpose or as the result of a quirk of timin
g), we simply return a null |
| 589 * instruction. Nevertheless, the code to report this to the console remains
in place, commented out, because |
| 590 * it's likely to be relevant still during development. |
| 591 */ |
| 592 // Cu.reportError( |
| 593 // "Crawler.node_action: Browser not found in internal map. " + Logger.
timestamp() |
| 594 // + "\nlocation=" + url_location |
| 595 // ); |
| 596 // this.logger.stack_trace(); |
| 597 return null; |
| 598 } |
| 599 var tab = this.tabbed_browser.map_browser_to_child.get( browser ).child; |
| 600 if ( !("instruction" in tab) ) |
| 601 throw new Error( "'instruction' not found as member of tab object" ); |
| 602 return tab.instruction; |
| 603 }; |
| 604 |
| 605 /** |
| 606 * This function executes solely underneath (in the call stack) 'node_action'. I
t receives at least one call per node, |
| 607 * more if there are matches on rules of any kind. |
| 608 * |
| 609 * @param window |
| 610 * @param node |
| 611 * @param {RequestEntry} entry |
| 612 */ |
| 613 Crawler.prototype.node_entry_action = function( window, node, entry ) |
| 614 { |
| 615 if ( !this.current_nodes.has( node ) ) |
| 616 { |
| 617 Cu.reportError( "node_entry_action: node not seen in 'current_nodes'" ); |
| 618 return; |
| 619 } |
| 620 if ( !entry.filter ) |
| 621 { |
| 622 /* |
| 623 * If there's no filter in the entry, then nothing happened to it. We are pr
esently ignoring such entries. In |
| 624 * the future, however, we will likely want a hook here to process entries t
hat are not associated with any |
| 625 * filter, for example, to ensure that necessary content is not blocked inad
vertently. |
| 626 */ |
| 627 return; |
| 628 } |
| 629 var windows = []; |
| 630 var n = 0; |
| 631 while ( window != null ) |
| 632 { |
| 633 if ( ++n > 100 ) |
| 634 { |
| 635 // Houston, we have a problem. |
| 636 windows = null; |
| 637 Cu.reportError( "Crawler/node_entry_action: runaway window chain" ); |
| 638 break; |
| 639 } |
| 640 windows.push( window ); |
| 641 if ( window === window.parent ) |
| 642 { |
| 643 // This is the ordinary statement to exit the loop. |
| 644 break; |
| 645 } |
| 646 window = window.parent; |
| 647 } |
| 648 this.current_nodes.get( node )( node, windows, entry ); |
| 649 }; |
| 650 |
| 651 |
| 652 function shutdown_crawler() |
| 653 { |
| 654 process_node_shim.close(); |
54 } | 655 } |
55 | 656 |
56 function loadSite(site, window, callback) | 657 try |
57 { | 658 { |
58 if (!site) | 659 onShutdown.add( shutdown_crawler ); |
59 return; | |
60 | |
61 let tabbrowser = window.gBrowser; | |
62 let tab = tabbrowser.addTab(site); | |
63 let browser = tabbrowser.getBrowserForTab(tab); | |
64 | |
65 siteTabs.set(browser, site); | |
66 | |
67 let progressListener = { | |
68 onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStat
us) | |
69 { | |
70 if (browser !== aBrowser) | |
71 return; | |
72 | |
73 if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP)) | |
74 return; | |
75 | |
76 tabbrowser.removeTabsProgressListener(progressListener); | |
77 tabbrowser.removeTab(tab); | |
78 callback(); | |
79 } | |
80 }; | |
81 tabbrowser.addTabsProgressListener(progressListener); | |
82 } | 660 } |
83 | 661 catch ( e ) |
84 function loadSites(backendUrl, parallelTabs, window, sites, callback) | 662 { |
85 { | 663 Cu.reportError( "Failure adding shutdown function. error = \"" + e.message + "
\"" ); |
86 while (currentTabs < parallelTabs && sites.length) | |
87 { | |
88 currentTabs++; | |
89 let site = sites.shift(); | |
90 loadSite(site, window, function() | |
91 { | |
92 currentTabs--; | |
93 if (!sites.length && !currentTabs) | |
94 { | |
95 Storage.finish(); | |
96 let dataFilePath = Storage.dataFile.path; | |
97 Client.sendCrawlerDataFile(backendUrl, dataFilePath, function() | |
98 { | |
99 Storage.destroy(); | |
100 callback(); | |
101 }); | |
102 } | |
103 else | |
104 loadSites(backendUrl, parallelTabs, window, sites, callback); | |
105 }); | |
106 } | |
107 } | 664 } |
108 | |
109 let Crawler = exports.Crawler = {}; | |
110 | |
111 Crawler.crawl = function(backendUrl, parallelTabs, window, callback) | |
112 { | |
113 if (Policy.processNode != origProcessNode) | |
114 return; | |
115 | |
116 Policy.processNode = processNode; | |
117 | |
118 siteTabs = new WeakMap(); | |
119 currentTabs = 0; | |
120 | |
121 Storage.init(); | |
122 | |
123 Client.fetchCrawlableSites(backendUrl, function(sites) | |
124 { | |
125 loadSites(backendUrl, parallelTabs, window, sites, function() | |
126 { | |
127 Policy.processNode = origProcessNode; | |
128 siteTabs = null; | |
129 callback(); | |
130 }); | |
131 }); | |
132 }; | |
OLD | NEW |