I'm using Onnxruntime in NodeJS to execute onnx converted models in cpu backend. I run model inference in parallel using Promise.allSettled:
var promises = sequences.map(seq => self.inference(self.session, self.tokenizer, seq));
results = (await Promise.allSettled(promises)).filter(p => p.status === "fulfilled").map(p => p.value);
running this class instance method inference
that calls a static method Util.performance.now
ONNX.prototype.inference = async function (session, tokenizer, text) {
const default_labels = this._options.model.default_labels;
const labels = this._options.model.labels;
const debug = this._options.debug;
try {
const encoded_ids = await tokenizer.tokenize(text);
if (encoded_ids.length === 0) {
return [0.0, default_labels];
}
const model_input = ONNX.create_model_input(encoded_ids);
const start = Util.performance.now();
const output = await session.run(model_input, ['output_0']);
const duration = Util.performance.now(start).toFixed(1);
const sequence_length = model_input['input_ids'].size;
if (debug) console.log("latency = " + duration + "ms, sequence_length=" + sequence_length);
const probs = output['output_0'].data.map(ONNX.sigmoid).map(t => Math.floor(t * 100));
const result = [];
for (var i = 0; i < labels.length; i++) {
const t = [labels[i], probs[i]];
result[i] = t;
}
result.sort(ONNX.sortResult);
const result_list = [];
for (i = 0; i < 6; i++) {
result_list[i] = result[i];
}
return [parseFloat(duration), result_list];
} catch (e) {
return [0.0, default_labels];
}
}//inference
the timing is wrong and summed up.
The performance
object looks like
Util = {
performance: {
now: function (start) {
if (!start) {
return process.hrtime();
}
var end = process.hrtime(start);
return Math.round((end[0] * 1000) + (end[1] / 1000000));
}
}
}
and it used in the usual way
// this runs parallel
const start = Util.performance.now();
// computation
const duration = (Util.performance.now() - start).toFixed(1);
Now, within the performance
fun the start
and end
variables scope is local, so what happens using Promise.allSettled
? I would expect that timing would be correct due to the local scope.