-
Notifications
You must be signed in to change notification settings - Fork 7
Description
Hello! I modify demo_video.py like:
import argparse
import os
import random
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import gradio as gr
import lavis.tasks as tasks
from lavis.common.config import Config
from lavis.common.dist_utils import get_rank
from lavis.common.registry import registry
from lavis.conversation.conversation import Chat, CONV_VISION_MS, CONV_VISION_MS_TEXT
imports modules for registration
from lavis.datasets.builders import *
from lavis.models import *
from lavis.processors import *
from lavis.runners import *
from lavis.tasks import *
def parse_args():
parser = argparse.ArgumentParser(description="Demo")
parser.add_argument("--cfg-path", default="lavis/projects/blip2/eval/demo.yaml", help="path to configuration file.")
parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
parser.add_argument(
"--options",
nargs="+",
help="override some settings in the used config, the key-value pair "
"in xxx=yyy format will be merged into config file (deprecate), "
"change to --cfg-options instead.",
)
args = parser.parse_args()
return args
def setup_seeds(config):
seed = config.run_cfg.seed + get_rank()
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
cudnn.benchmark = False
cudnn.deterministic = True
print('Initializing Chat')
args = parse_args()
cfg = Config(args)
model_config = cfg.model_cfg
model_config.device_8bit = args.gpu_id
model_cls = registry.get_model_class(model_config.arch)
print("model_cls", model_cls)
model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))
#TODO
task = tasks.setup_task(cfg)
dataset = task.build_datasets(cfg)
print(cfg.dict)
pre_cfg = cfg.config.preprocess
vis_processor_cfg = pre_cfg.vis_processor.eval
print(vis_processor_cfg.dict)
vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
#vis_processors, txt_processors = load_preprocess(pre_cfg)
#vis_processor = vis_processors['eval']
chat = Chat(model, vis_processor, task=task, dataset=dataset, device='cuda:{}'.format(args.gpu_id))
print('Initialization Finished')
def upload_vid(gr_vid, temperature=0.1, input_splits=""):
chat_state = CONV_VISION_MS.copy()
if input_splits == 'Automatic detection':
input_splits = ''
img_list = []
llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits)
chat.ask("Please describe this video in detail.", chat_state)
summary = chat.answer(conv=chat_state,
num_beams=1,
temperature=temperature,
max_new_tokens=650,
max_length=2048)[0][0]
print(gr_vid, summary)
return summary
import os
video_path = "/media/cv/09C1B27DA5EB573A/ASIT"
video_child_path = os.listdir(video_path)
for v_id in video_child_path:
upload_vid(video_child_path)
But the error is:
Initialization Finished
/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py:210: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1646755903507/work/torch/csrc/utils/tensor_numpy.cpp:178.)
resized_frame = torch.from_numpy(frame).permute(2, 0, 1)
ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 59.940 FPS, resolution: 1920 x 1080
INFO:pyscenedetect:Downscale factor set to 7, effective resolution: 274 x 154
INFO:pyscenedetect:Detecting scenes...
ERROR:pyscenedetect:base_timecode argument is deprecated and has no effect.
Scenes from /hy-tmp/dataset/ASIT/gBR_sFM_c01_d04_mBR0_ch01.mp4:
New scene detection results 1 0
flexible_sampling
Traceback (most recent call last):
File "demo.py", line 83, in
upload_video(video_path)
File "demo.py", line 70, in upload_video
llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits)
File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 645, in upload_video_ms_standalone
self.samples = get_split(video, self.vis_processor.transform, dataset, self.transnet_model, self.asr_model, sampling='headtail', input_splits=input_splits)
File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 467, in get_split
frms = transform(frms)
File "/usr/local/miniconda3/envs/shot2story/lib/python3.8/site-packages/torchvision/transforms/transforms.py", line 95, in call
img = t(img)
File "/hy-tmp/Shot2Story-temp/code/lavis/processors/transforms_video.py", line 129, in call
return F.normalize(clip, self.mean, self.std, self.inplace)
File "/hy-tmp/Shot2Story-temp/code/lavis/processors/functional_video.py", line 108, in normalize
clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
RuntimeError: The size of tensor a (224) must match the size of tensor b (3) at non-singleton dimension 0
what shoule i do? i need your help. And when i try your online demo, it isn't work. Thank you very much!