Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/task.js

Issue 10233013: Crawler, second version (Closed)
Patch Set: Created April 12, 2013, 1:38 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/storage.js ('k') | lib/yaml.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 let {Logger} = require( "logger" );
2 /**
3 * The default value for runaway_limit, used only in the constructor.
4 * @type {Number}
5 */
6 const default_runaway_limit = 1000;
7
8 /**
9 * Class for long-running tasks. Such tasks divide work into discrete units of e ffort, which allows them to be
10 * interrupted and to post progress.
11 * <p/>
12 * Note that are no callback or events in this class. Any state feedback should be implemented as needed within the
13 * task generator. task generator can distinguish cancellation by examining the rvalue of its yield statements. The
14 * generator cannot detect being paused, but the control object that started the task can.
15 * <p/>
16 * PLANNED: The current version will handle AJAX calls by polling the task gener ator every millisecond to see if there
17 * have been pending calls that have completed since the last poll. It would mak e for better performance for the task
18 * to signal that it should pause execution and then continue only after a pendi ng call completes. This would eliminate
19 * run-time polling overhead.
20 * <p/>
21 * There would also need to be an interface presented to the task for pause and resume. Resuming is the harder one,
22 * because the task needs an object to call when 'Long_Task.prototype.run' is no t in the call stack. Currently, the
23 * generator is instantiated before the constructor for this class, which means either passing such an object with
24 * 'send()' if that constructor is to be called. The better way is likely for th is class to instantiate the generator
25 * with a pause/resume object as argument, but this only allows parametric gener ators (pretty much a requirement) if
26 * if 'Function.protoype.bind()' works on generator-functions (untried) or if an equivalent can be hacked up.
27 *
28 * @param {Generator} task_generator
29 * The task generator is the task to be run, implemented as a generator. Ea ch call to the generator performs an
30 * increment of computation, whose size is determined by the task. This is part of a cooperative multitasking
31 * system.
32 * <p/>
33 * Note that this argument is a generator, not the function that returns a generator when called. Instantiating
34 * the generator with a function call is the responsibility of the code tha t instantiates this class.
35 * <p/>
36 * Note 'task_generator.next()' is always called at least once, because 'ru n()' calls that method before it detects
37 * cancellation. This is required by the interface to a generator, since it 's valid to call 'send()', which is how
38 * the runner signals cancellation to the task, only after the first call t o 'next'. If, for whatever reason, it's
39 * necessary to detect cancellation before the work begins, the generator s hould have an extra do-nothing 'yield'
40 * statement at the beginning.
41 *
42 * @param {Number} [runaway_limit]
43 * The maximum number of iterations before 'run()' will throw an exception. Disable runaway detection by
44 * setting this value to zero.
45 *
46 */
47 var Long_Task = exports.Long_Task = function( task_instance, runaway_limit )
48 {
49 /**
50 * The core of a long task is a generator that runs on unit of computation wit h each call to next(). This object
51 * will give us such a generator by calling its generator() member.
52 * @type {*}
53 */
54 this.task_instance = task_instance;
55
56 /**
57 * The task generator for the task. It's initialized to null here, and set to the actual generator at the beginning
58 * of the run() method.
59 * <p/>
60 * It returns 'false' when it is not yet completed and 'true' once it has. Cal ling the generator with 'send( true )'
61 * notifies the generator that it has been cancelled; thereafter it must retur n 'true' always.
62 * @type {Boolean}
63 */
64 this.task_generator = null;
65
66 /**
67 * Cancellation flag. Set in the cancel() method. Tested each iteration in run ().
68 * @type {Boolean}
69 */
70 this.cancelled = false;
71
72 /**
73 * Pause state flag.
74 * @type {Boolean}
75 */
76 this.paused = false;
77
78 /**
79 * Runnable flag. This is essentially the state variable of a two-state machin e, which starts at "runnable" and
80 * goes to "completed".
81 * @type {Boolean}
82 */
83 this.runnable = true;
84
85 /**
86 * The maximum number of iterations before 'run()' will throw an exception. Di sable runaway detection by setting
87 * this value to zero.
88 * @type {Number}
89 */
90 this.runaway_limit = (arguments.length < 2) ? default_runaway_limit : runaway_ limit;
91
92 /**
93 * Iteration counter. This is only incremented when a runaway limit is in effe ct.
94 * @type {Number}
95 */
96 this.count = 0;
97
98 /**
99 * XPCOM thread manager. Used to implement dispatch().
100 * @type {nsIThreadManager}
101 */
102 this.thread_manager = Cc["@mozilla.org/thread-manager;1"].createInstance( Ci.n sIThreadManager );
103
104 /**
105 * Logging service.
106 * @type {Logger}
107 */
108 this.logger = new Logger( "Long_Task" );
109 };
110
111 /**
112 * Close the task out completely.
113 */
114 Long_Task.prototype.close = function()
115 {
116 this.cancel();
117
118 // DEFECT: We need to close the iterator, as well.
119 /*
120 * This is not trivial to implement correctly. If the task is paused, it means there's a pending operation that we
121 * cannot prevent from executing, but will at some point will call resume(). I t's also possible that there's already
122 * another iteration of the main loop already dispatched.
123 */
124 };
125
126 /**
127 * Cancel command. Calling this function cancels the pending task as soon as pos sible, which is nowhere near
128 * immediate with JavaScript.
129 * <p/>
130 * WARNING: The current way that cancellation is implemented, there will be one additional call to the task
131 * generator before cancellation. If that's a problem, it's time to fix the algo rithm, which means making
132 * an initial call to 'next()' before setting up the standing loop, and swapping the order of iterating and checking
133 * for cancellation.
134 */
135 Long_Task.prototype.cancel = function()
136 {
137 this.cancelled = true;
138 };
139
140 /**
141 * Run command
142 *
143 * @param {Function} finisher
144 * @param {Function} [catcher]
145 */
146 Long_Task.prototype.run = function( finisher, catcher )
147 {
148 var log = this.logger.make_log( "run" );
149 log( "Begin, runaway_limit = " + this.runaway_limit );
150
151 if ( !this.runnable )
152 {
153 throw new Error( "Long_Task no longer runnable" );
154 }
155 /*
156 * We don't want to start up another after the first one has started. Therefor e, we can only call run() once and
157 * get any useful behavior.
158 */
159 this.runnable = false;
160
161 /**
162 * Function to transfer control to when the run is completed. This happens reg ardless of whether or not there's an
163 * exception thrown.
164 * @type {Function}
165 */
166 this.finisher = finisher;
167 /**
168 * Function to call if there's an exception thrown during the run.
169 * @type {Function}
170 */
171 this.catcher = catcher;
172
173 try
174 {
175 /**
176 * The generator that acts as the body of the Long_Task. It's instantiated h ere so that the 'pause' and 'resume'
177 * functions can be passed to the generator.
178 * @type {Generator}
179 */
180 this.task_generator = this.task_instance.generator( this.pause.bind( this ), this.resume.bind( this ) );
181 }
182 catch ( e )
183 {
184 throw e;
185 //this._run_catch( e );
186 //return;
187 }
188 this._run_once();
189 };
190
191 /**
192 * The main body of the runner.
193 *
194 * The return points of this function fall into two categories. The internal ret urn points keep the Long_Task active as
195 * a control structure. The external return points pass control back to the call ing code.
196 */
197 Long_Task.prototype._run_once = function()
198 {
199 var log = this.logger.make_log( "_run_once" );
200
201 /*
202 * If we must pause, we simply don't do anything else now. The landing code of the asynchronous object must
203 * call resume(), which will dispatch the present function again and start the iteration cycle up again. This
204 * can be a source of failure if pause-resume coordination isn't done correctl y in the task generator.
205 */
206 if ( this.paused )
207 {
208 /*
209 * Internal return. Nothing scheduled.
210 */
211 return;
212 /*
213 * FUTURE: start a watchdog timer here that will cancel the object if the ta sk times out. It might be the case
214 * that this is better done by the task generator only if necessary. A gener ator-transformer that sets up such
215 * a timer could be the replacement of setting up a timer here.
216 */
217 }
218
219 /*
220 * Main iteration call. The call to run() goes into a try-block to ensure we s top gracefully if the generator
221 * throws, since that doesn't always signal an error
222 */
223 try
224 {
225 if ( this.task_generator.next() )
226 {
227 /*
228 * The task generator returned true, which means that it's finished.
229 *
230 * External return. Ordinary.
231 */
232 this._run_finally();
233 return;
234 }
235 }
236 catch ( ex )
237 {
238 if ( ex === StopIteration )
239 {
240 log( "End. Task iterator stopped" );
241 /*
242 * StopIteration is not an error but just signals the end of data for an o rdinary iterator. Since the
243 * generator has signalled us, we don't send any signal by calling 'send() '.
244 *
245 * External return. Ordinary.
246 */
247 this._run_finally();
248 }
249 else
250 {
251 log( "Iteration exception " + ex.toString() );
252 /*
253 * External return. Exceptional.
254 */
255 this._run_catch( ex );
256 }
257 return;
258 }
259 // Assert 'this.task_generator' returned false, which means that it isn't fini shed.
260
261 /*
262 * Runaway detection.
263 */
264 if ( this.runaway_limit > 0 )
265 {
266 ++this.count;
267 log( "Iteration " + this.count, false );
268 if ( this.count >= this.runaway_limit )
269 {
270 this.cancelled = true;
271 /*
272 * External return. Exceptional.
273 */
274 this._run_catch( new Error( "Long_Task: runaway iteration. count=" + this. count ) );
275 return;
276 }
277 }
278
279 /*
280 * Cancellation detection.
281 */
282 if ( this.cancelled )
283 {
284 log( "Cancellation begin" );
285 try
286 {
287 /*
288 * We've received a command to cancel from elsewhere. Notify the generator that we're shutting down and
289 * exit the loop. We're doing this within a try-block because the generato r will typically throw
290 * StopIteration at this point, which isn't an error.
291 */
292 this.task_generator.send( true );
293 }
294 catch ( ex )
295 {
296 /*
297 * StopIteration is not an error as a result of cancellation, but any othe r exception is.
298 */
299 if ( ex !== StopIteration )
300 {
301 log( "Cancellation exception: " + ex.toString() );
302 /*
303 * External return. Exceptional.
304 */
305 this._run_catch( ex );
306 }
307 }
308 log( "Cancellation end" );
309 /*
310 * External return. Ordinary.
311 */
312 this._run_finally();
313 return;
314 }
315
316 /*
317 * Infinite loop behavior happens here, where we schedule ourselves for anothe r run as soon as possible
318 * after we complete. This uses the container's thread manager, so it executes more-or-less immediately.
319 * If there are long-duration asynchronous actions in the task, such as loadin g web pages or AJAX calls,
320 * this routine runs too fast to be effective as a poll. Such tasks should pau se when such operations are
321 * pending.
322 */
323 this._dispatch( this._run_once.bind( this ) );
324 /*
325 * Internal return. Next iteration is scheduled.
326 */
327 };
328
329 /**
330 * Execute the finally-function.
331 */
332 Long_Task.prototype._run_finally = function()
333 {
334 /*
335 * We dispatch the actual catch-function so that it's not running in this cont ext. This alleviates some odd timing
336 * behavior for the user if there are pending events.
337 */
338 if ( this.finisher )
339 {
340 this._dispatch( this.finisher );
341 }
342 };
343
344 /**
345 * Execute the catch-function and then the finally-function, mimicking the behav ior of a 'try' statement.
346 *
347 * @param {*} e
348 * The value thrown as an exception. It's treated as an opaque type.
349 */
350 Long_Task.prototype._run_catch = function( e )
351 {
352 if ( this.catcher )
353 {
354 this._dispatch(
355 function()
356 {
357 this.catcher( e );
358 this._run_finally();
359 }.bind( this )
360 );
361 }
362 };
363
364 /**
365 * Pause instruction. Since JavaScript is not multi-threaded, the pause instruct ion does not block. Instead, it takes
366 * effect at the next 'yield' statement.
367 */
368 Long_Task.prototype.pause = function()
369 {
370 this.paused = true;
371 };
372
373 /**
374 *
375 */
376 Long_Task.prototype.resume = function()
377 {
378 this.paused = false;
379 this._dispatch( this._run_once.bind( this ) );
380 };
381
382 /**
383 * Dispatch another iteration. This is used ordinarily at the end of _run_once() and also by resume() to restart
384 * the iteration.
385 */
386 Long_Task.prototype._dispatch = function( f )
387 {
388 this.thread_manager.currentThread.dispatch(
389 {run: f},
390 Ci.nsIEventTarget.DISPATCH_NORMAL
391 );
392 };
393
OLDNEW
« no previous file with comments | « lib/storage.js ('k') | lib/yaml.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld