Skip to content

run demo_video.py error #16

@lujuncong2000

Description

@lujuncong2000

Hello! I modify demo_video.py like:
import argparse
import os
import random

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import gradio as gr

import lavis.tasks as tasks

from lavis.common.config import Config
from lavis.common.dist_utils import get_rank
from lavis.common.registry import registry
from lavis.conversation.conversation import Chat, CONV_VISION_MS, CONV_VISION_MS_TEXT
imports modules for registration

from lavis.datasets.builders import *
from lavis.models import *
from lavis.processors import *
from lavis.runners import *
from lavis.tasks import *

def parse_args():
parser = argparse.ArgumentParser(description="Demo")
parser.add_argument("--cfg-path", default="lavis/projects/blip2/eval/demo.yaml", help="path to configuration file.")
parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
parser.add_argument(
"--options",
nargs="+",
help="override some settings in the used config, the key-value pair "
"in xxx=yyy format will be merged into config file (deprecate), "
"change to --cfg-options instead.",
)
args = parser.parse_args()
return args

def setup_seeds(config):
seed = config.run_cfg.seed + get_rank()

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

cudnn.benchmark = False
cudnn.deterministic = True

print('Initializing Chat')
args = parse_args()
cfg = Config(args)

model_config = cfg.model_cfg
model_config.device_8bit = args.gpu_id
model_cls = registry.get_model_class(model_config.arch)
print("model_cls", model_cls)
model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))

#TODO
task = tasks.setup_task(cfg)
dataset = task.build_datasets(cfg)

print(cfg.dict)
pre_cfg = cfg.config.preprocess

vis_processor_cfg = pre_cfg.vis_processor.eval
print(vis_processor_cfg.dict)
vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
#vis_processors, txt_processors = load_preprocess(pre_cfg)
#vis_processor = vis_processors['eval']
chat = Chat(model, vis_processor, task=task, dataset=dataset, device='cuda:{}'.format(args.gpu_id))
print('Initialization Finished')

def upload_vid(gr_vid, temperature=0.1, input_splits=""):
chat_state = CONV_VISION_MS.copy()
if input_splits == 'Automatic detection':
input_splits = ''
img_list = []
llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits)
chat.ask("Please describe this video in detail.", chat_state)
summary = chat.answer(conv=chat_state,
num_beams=1,
temperature=temperature,
max_new_tokens=650,
max_length=2048)[0][0]
print(gr_vid, summary)
return summary

import os

video_path = "/media/cv/09C1B27DA5EB573A/ASIT"
video_child_path = os.listdir(video_path)

for v_id in video_child_path:
upload_vid(video_child_path)

But the error is:
Initialization Finished
/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py:210: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1646755903507/work/torch/csrc/utils/tensor_numpy.cpp:178.)
resized_frame = torch.from_numpy(frame).permute(2, 0, 1)
ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 59.940 FPS, resolution: 1920 x 1080
INFO:pyscenedetect:Downscale factor set to 7, effective resolution: 274 x 154
INFO:pyscenedetect:Detecting scenes...
ERROR:pyscenedetect:base_timecode argument is deprecated and has no effect.
Scenes from /hy-tmp/dataset/ASIT/gBR_sFM_c01_d04_mBR0_ch01.mp4:
New scene detection results 1 0
flexible_sampling
Traceback (most recent call last):
File "demo.py", line 83, in
upload_video(video_path)
File "demo.py", line 70, in upload_video
llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits)
File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 645, in upload_video_ms_standalone
self.samples = get_split(video, self.vis_processor.transform, dataset, self.transnet_model, self.asr_model, sampling='headtail', input_splits=input_splits)
File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 467, in get_split
frms = transform(frms)
File "/usr/local/miniconda3/envs/shot2story/lib/python3.8/site-packages/torchvision/transforms/transforms.py", line 95, in call
img = t(img)
File "/hy-tmp/Shot2Story-temp/code/lavis/processors/transforms_video.py", line 129, in call
return F.normalize(clip, self.mean, self.std, self.inplace)
File "/hy-tmp/Shot2Story-temp/code/lavis/processors/functional_video.py", line 108, in normalize
clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
RuntimeError: The size of tensor a (224) must match the size of tensor b (3) at non-singleton dimension 0

what shoule i do? i need your help. And when i try your online demo, it isn't work. Thank you very much!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions