Skip to content

Commit

Permalink
Merge pull request #1390 from lindapaiste/cleanup/video-load
Browse files Browse the repository at this point in the history
Cleanup: Common utility for awaiting video load
  • Loading branch information
lindapaiste authored May 22, 2022
2 parents e7725de + 0c3a854 commit c3123ca
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 70 deletions.
17 changes: 3 additions & 14 deletions src/BodyPix/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import generatedImageResult from '../utils/generatedImageResult';
import handleArguments from '../utils/handleArguments';
import p5Utils from '../utils/p5Utils';
import BODYPIX_PALETTE from './BODYPIX_PALETTE';
import { mediaReady } from '../utils/imageUtilities';

/**
* @typedef {Record<string, {color: [number, number, number], id: number}>} BodyPixPalette
Expand Down Expand Up @@ -135,13 +136,7 @@ class BodyPix {
async segmentWithPartsInternal(imgToSegment, segmentationOptions) {
// estimatePartSegmentation
await this.ready;
await tf.nextFrame();

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => resolve();
});
}
await mediaReady(imgToSegment, true);

this.config.palette = segmentationOptions.palette || this.config.palette;
this.config.outputStride = segmentationOptions.outputStride || this.config.outputStride;
Expand Down Expand Up @@ -253,13 +248,7 @@ class BodyPix {
async segmentInternal(imgToSegment, segmentationOptions) {

await this.ready;
await tf.nextFrame();

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => resolve();
});
}
await mediaReady(imgToSegment, true);

this.config.outputStride = segmentationOptions.outputStride || this.config.outputStride;
this.config.segmentationThreshold = segmentationOptions.segmentationThreshold || this.config.segmentationThreshold;
Expand Down
18 changes: 3 additions & 15 deletions src/FaceApi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
* Ported and integrated from all the hard work by: https://github.com/justadudewhohacks/face-api.js?files=1
*/

import * as tf from "@tensorflow/tfjs";
import * as faceapi from "face-api.js";
import callCallback from "../utils/callcallback";
import handleArguments from "../utils/handleArguments";
import { mediaReady } from "../utils/imageUtilities";
import { getModelPath } from "../utils/modelLoader";

const DEFAULTS = {
Expand Down Expand Up @@ -158,13 +158,7 @@ class FaceApiBase {
*/
async detectInternal(imgToClassify, faceApiOptions) {
await this.ready;
await tf.nextFrame();

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => resolve();
});
}
await mediaReady(imgToClassify, true);

// sets the return options if any are passed in during .detect() or .detectSingle()
this.config = this.setReturnOptions(faceApiOptions);
Expand Down Expand Up @@ -223,13 +217,7 @@ class FaceApiBase {
*/
async detectSingleInternal(imgToClassify, faceApiOptions) {
await this.ready;
await tf.nextFrame();

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => resolve();
});
}
await mediaReady(imgToClassify, true);

// sets the return options if any are passed in during .detect() or .detectSingle()
this.config = this.setReturnOptions(faceApiOptions);
Expand Down
9 changes: 2 additions & 7 deletions src/Facemesh/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import * as facemeshCore from "@tensorflow-models/facemesh";
import { EventEmitter } from "events";
import callCallback from "../utils/callcallback";
import handleArguments from "../utils/handleArguments";
import { mediaReady } from '../utils/imageUtilities';

class Facemesh extends EventEmitter {
/**
Expand Down Expand Up @@ -43,13 +44,6 @@ class Facemesh extends EventEmitter {
this.model = await facemeshCore.load(this.config);
this.modelReady = true;

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => {
resolve();
};
});
}
if (this.video) {
this.predict();
}
Expand All @@ -65,6 +59,7 @@ class Facemesh extends EventEmitter {
if (!image) {
throw new Error("No input image found.");
}
await mediaReady(image, false);
const { flipHorizontal } = this.config;
const predictions = await this.model.estimateFaces(image, flipHorizontal);
const result = predictions;
Expand Down
10 changes: 2 additions & 8 deletions src/Handpose/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import * as handposeCore from "@tensorflow-models/handpose";
import { EventEmitter } from "events";
import callCallback from "../utils/callcallback";
import handleArguments from "../utils/handleArguments";
import { mediaReady } from '../utils/imageUtilities';

class Handpose extends EventEmitter {
/**
Expand Down Expand Up @@ -43,14 +44,6 @@ class Handpose extends EventEmitter {
this.model = await handposeCore.load(this.config);
this.modelReady = true;

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => {
resolve();
};
});
}

if (this.video) {
this.predict();
}
Expand All @@ -66,6 +59,7 @@ class Handpose extends EventEmitter {
if (!image) {
throw new Error("No input image found.");
}
await mediaReady(image, false);
const { flipHorizontal } = this.config;
const predictions = await this.model.estimateHands(image, flipHorizontal);
const result = predictions;
Expand Down
18 changes: 2 additions & 16 deletions src/ImageClassifier/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import handleArguments from "../utils/handleArguments";
import * as darknet from "./darknet";
import * as doodlenet from "./doodlenet";
import callCallback from "../utils/callcallback";
import { imgToTensor } from "../utils/imageUtilities";
import { imgToTensor, mediaReady } from "../utils/imageUtilities";

const DEFAULTS = {
mobilenet: {
Expand Down Expand Up @@ -134,21 +134,7 @@ class ImageClassifier {
async classifyInternal(imgToPredict, numberOfClasses) {
// Wait for the model to be ready
await this.ready;
await tf.nextFrame();

if (imgToPredict instanceof HTMLVideoElement && imgToPredict.readyState === 0) {
const video = imgToPredict;
// Wait for the video to be ready
await new Promise(resolve => {
video.onloadeddata = () => resolve();
});
}

if (this.video && this.video.readyState === 0) {
await new Promise(resolve => {
this.video.onloadeddata = () => resolve();
});
}
await mediaReady(imgToPredict, true);

// Process the images
const imageResize = [IMAGE_SIZE, IMAGE_SIZE];
Expand Down
11 changes: 4 additions & 7 deletions src/UNET/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import * as tf from '@tensorflow/tfjs';
import callCallback from '../utils/callcallback';
import generatedImageResult from '../utils/generatedImageResult';
import handleArguments from "../utils/handleArguments";
import { mediaReady } from '../utils/imageUtilities';

const DEFAULTS = {
modelPath: 'https://raw.githubusercontent.com/zaidalyafeai/HostedModels/master/unet-128/model.json',
Expand All @@ -33,8 +34,8 @@ class UNET {
modelPath: typeof options.modelPath !== 'undefined' ? options.modelPath : DEFAULTS.modelPath,
imageSize: typeof options.imageSize !== 'undefined' ? options.imageSize : DEFAULTS.imageSize,
returnTensors: typeof options.returnTensors !== 'undefined' ? options.returnTensors : DEFAULTS.returnTensors,

};
this.video = video;
this.ready = callCallback(this.loadModel(), callback);
}

Expand All @@ -46,17 +47,13 @@ class UNET {

async segment(inputOrCallback, cb) {
const { image, callback } = handleArguments(this.video, inputOrCallback, cb);
await this.ready;
return callCallback(this.segmentInternal(image), callback);
}

async segmentInternal(imgToPredict) {
// Wait for the model to be ready
// Wait for the model to be ready and video input to be loaded
await this.ready;
// skip asking for next frame if it's not video
if (imgToPredict instanceof HTMLVideoElement) {
await tf.nextFrame();
}
await mediaReady(imgToPredict, true);
this.isPredicting = true;

const {
Expand Down
15 changes: 14 additions & 1 deletion src/utils/handleArguments.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ export const isVideo = (img) => {
img instanceof HTMLVideoElement;
}

/**
* Check if a variable is an HTMLAudioElement.
* @param {any} img
* @returns {img is HTMLAudioElement}
*/
export const isAudio = (img) => {
return typeof (HTMLAudioElement) !== 'undefined' &&
img instanceof HTMLAudioElement;
}

/**
* Check if a variable is an HTMLCanvasElement.
* @param {any} img
Expand Down Expand Up @@ -203,7 +213,10 @@ class ArgHelper {
});
}
}
// TODO: handle audio elements and p5.sound
// TODO: handle p5.sound
if (isAudio(arg)) {
this.set({ audio: arg });
}
// Check for arrays
else if (Array.isArray(arg)) {
this.set({ array: arg });
Expand Down
44 changes: 42 additions & 2 deletions src/utils/imageUtilities.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,16 @@
// https://opensource.org/licenses/MIT

import * as tf from '@tensorflow/tfjs';
import { getImageElement, isCanvas, isImageData, isImageElement, isP5Image } from "./handleArguments";
import {
getImageElement,
isAudio,
isCanvas,
isImageData,
isImageElement,
isImg,
isP5Image,
isVideo
} from "./handleArguments";
import p5Utils from './p5Utils';

// Resize video elements
Expand Down Expand Up @@ -162,12 +171,43 @@ function imgToPixelArray(img) {
return Array.from(imgData.data);
}

/**
* Extract common logic from models accepting video input.
* Makes sure that the video/audio/image data has loaded.
* Optionally can wait for the next frame every time the function is called.
* Will resolve immediately if the input is undefined or a different element type.
* @param {InputImage | undefined} input
* @param {boolean} nextFrame
* @returns {Promise<void>}
*/
async function mediaReady(input, nextFrame) {
if (input && (isVideo(input) || isAudio(input))) {
if (nextFrame) {
await tf.nextFrame();
}
if (input.readyState === 0) {
await new Promise((resolve, reject) => {
input.addEventListener('error', () => reject(input.error));
input.addEventListener('loadeddata', resolve);
});
}
} else if (input && isImg(input)) {
if (!input.complete) {
await new Promise((resolve, reject) => {
input.addEventListener('error', reject);
input.addEventListener('load', resolve);
});
}
}
}

export {
array3DToImage,
processVideo,
cropImage,
imgToTensor,
isInstanceOfSupportedElement,
flipImage,
imgToPixelArray
imgToPixelArray,
mediaReady
};

0 comments on commit c3123ca

Please sign in to comment.