demo.py #91

kanybekasanbekov · 2022-12-02T02:14:21Z

I have implemented small code for real-time demo from a webcam. I hope it will be helpful to someone. My overall verdict is that this repo is useless for real-time demos.

Inference speed is relatively slow(fps=5~6 on CPU).
You have to integrate face detection, otherwise, the model will predict key points even on the frame without faces.
Face has to be right in front of the camera. If your face is a bit far away from the camera then the model fails to predict key points correctly.

import os
import argparse
import time
import cv2
import torch
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import lib.models as models
from lib.config import config, update_config
from lib.core.evaluation import decode_preds
import numpy as np


def parse_args():

    parser = argparse.ArgumentParser(description='Real-time webcam demo')

    parser.add_argument('--cfg', help='experiment configuration filename', type=str,
                        default='experiments/wflw/face_alignment_wflw_hrnet_w18.yaml')
    parser.add_argument('--model-file', help='model parameters', type=str,
                        default='hrnetv2_pretrained/HR18-WFLW.pth')

    args = parser.parse_args()
    update_config(config, args)
    return args


def main():
    args = parse_args()

    config.defrost()
    config.MODEL.INIT_WEIGHTS = False
    config.freeze()
    model = models.get_face_alignment_net(config)

    # load model
    state_dict = torch.load(args.model_file)
    model.load_state_dict(state_dict)
    model.eval()

    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
    cap = cv2.VideoCapture(0)

    # Check if the webcam is opened correctly
    if not cap.isOpened():
        raise IOError("Cannot open webcam")

    while True:
        start = time.time()
        ret, frame = cap.read()
        frame = cv2.resize(frame, [256,256])
        input = np.copy(frame).astype(np.float32)
        input = (input / 255.0 - mean) / std
        input = torch.from_numpy(input).permute(2,0,1).unsqueeze(0)

        output = model(input)
        score_map = output.data.cpu()
        preds = decode_preds(score_map, torch.tensor([[128.0,128.0]]), torch.tensor([1.28]), [64, 64])

        for x,y in preds[0]:
            x, y = int(x), int(y)
            cv2.circle(frame, (x,y), 1, (0, 0, 255), 2)
        
        fps = 1 / (time.time() - start)
        cv2.putText(frame, text=f'FPS: {round(fps,2)}', org=[10, 30], fontFace=cv2.FONT_HERSHEY_SIMPLEX, \
            fontScale=0.7, color=(0,200,0), thickness=2)
        cv2.imshow('Input', frame)

        c = cv2.waitKey(1)
        if c == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

    
if __name__ == '__main__':
    main()

kanybekasanbekov mentioned this issue Dec 2, 2022

I want to detect landmarks in real time #90

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

demo.py #91

demo.py #91

kanybekasanbekov commented Dec 2, 2022 •

edited

Loading

demo.py #91

demo.py #91

Comments

kanybekasanbekov commented Dec 2, 2022 • edited Loading

kanybekasanbekov commented Dec 2, 2022 •

edited

Loading