diff --git a/.github/workflows/dockerImageBuild.yml b/.github/workflows/dockerImageBuild.yml index 3089eb0..6c4755e 100644 --- a/.github/workflows/dockerImageBuild.yml +++ b/.github/workflows/dockerImageBuild.yml @@ -3,6 +3,7 @@ name: build_docker on: release: types: [created] # 表示在创建新的 Release 时触发 + workflow_dispatch: jobs: build_docker: diff --git a/app/test/test_gemini.py b/app/test/test_gemini.py index 916d4d9..aa96a39 100644 --- a/app/test/test_gemini.py +++ b/app/test/test_gemini.py @@ -5,7 +5,10 @@ os.environ["HTTP_PROXY"] = config.proxy.get("http") os.environ["HTTPS_PROXY"] = config.proxy.get("https") -genai.configure(api_key=config.app.get("vision_gemini_api_key")) -model = genai.GenerativeModel("gemini-1.5-flash") -response = model.generate_content("直接回复我文本'当前网络可用'") -print(response.text) +genai.configure(api_key="") +model = genai.GenerativeModel("gemini-1.5-pro") + + +for i in range(50): + response = model.generate_content("直接回复我文本'当前网络可用'") + print(i, response.text) diff --git a/app/utils/script_generator.py b/app/utils/script_generator.py index ad6d3a8..e36064a 100644 --- a/app/utils/script_generator.py +++ b/app/utils/script_generator.py @@ -56,6 +56,7 @@ def generate_script(self, scene_description: str, word_count: int) -> str: 当前画面描述:{scene_description} 请确保新生成的文案与上文自然衔接,保持叙事的连贯性和趣味性。 +不要出现除了文案以外的其他任何内容; 严格字数要求:{word_count}字,允许误差±5字。""" messages = [ @@ -328,7 +329,9 @@ def __init__(self, model_name: str, api_key: str = None, base_url: str = None, p self.generator = OpenAIGenerator(model_name, self.api_key, self.prompt, self.base_url) def _get_default_prompt(self) -> str: - return f"""你是一位极具幽默感的短视频脚本创作大师,擅长用"温和的违反"制造笑点,让{self.video_theme}视频既有趣又富有传播力。你的任务是将视频画面描述转化为能在社交平台疯狂传播的爆款口播文案。 + return f""" + 你是一位极具幽默感的短视频脚本创作大师,擅长用"温和的违反"制造笑点,让主题为 《{self.video_theme}》 的视频既有趣又富有传播力。 +你的任务是将视频画面描述转化为能在社交平台疯狂传播的爆款口播文案。 目标受众:热爱生活、追求独特体验的18-35岁年轻人 文案风格:基于HKRR理论 + 段子手精神 @@ -342,17 +345,16 @@ def _get_default_prompt(self) -> str: 【爆款内容四要素】 【快乐元素 Happy】 -1. 用调侃的语气描述建造过程中的"笨手笨脚" +1. 用调侃的语气描述画面 2. 巧妙植入网络流行梗,增加内容的传播性 3. 适时自嘲,展现真实且有趣的一面 【知识价值 Knowledge】 -1. 用段子手的方式解释专业知识(比如:"这根木头不是一般的木头,它比我前任还难搞...") -2. 把复杂的建造技巧转化为生动有趣的比喻 -3. 在幽默中传递实用的野外生存技能 +1. 用段子手的方式解释专业知识 +2. 在幽默中传递实用的生活常识 【情感共鸣 Resonance】 -1. 描述"真实但夸张"的建造困境 +1. 描述"真实但夸张"的环境描述 2. 把对自然的感悟融入俏皮话中 3. 用接地气的表达方式拉近与观众距离 @@ -366,10 +368,10 @@ def _get_default_prompt(self) -> str: 2. 使用恰当的连接词和过渡语,确保叙事流畅 3. 保持人物视角和语气的一致性 4. 避免重复上一段已经提到的信息 -5. 确保情节和建造过程的逻辑连续性 +5. 确保情节的逻辑连续性 我会按顺序提供多段视频画面描述。请创作既搞笑又能火爆全网的口播文案。 -记住:要敢于用"温和的违反"制造笑点,但要把握好尺度,让观众在轻松愉快中感受野外建造的乐趣。""" +记住:要敢于用"温和的违反"制造笑点,但要把握好尺度,让观众在轻松愉快中感受到乐趣。""" def calculate_duration_and_word_count(self, time_range: str) -> int: try: diff --git a/app/utils/utils.py b/app/utils/utils.py index bcebe4c..307823c 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -393,11 +393,7 @@ def cut_video(params, progress_callback=None): video_script_list = st.session_state['video_clip_json'] time_list = [i['timestamp'] for i in video_script_list] - - total_clips = len(time_list) - print("time_list", time_list) - def clip_progress(current, total): progress = int((current / total) * 100) if progress_callback: @@ -414,17 +410,11 @@ def clip_progress(current, total): raise ValueError("裁剪视频失败") st.session_state['subclip_videos'] = subclip_videos - - print("list:", subclip_videos) - for i, video_script in enumerate(video_script_list): - print(i) - print(video_script) try: video_script['path'] = subclip_videos[video_script['timestamp']] except KeyError as err: logger.error(f"裁剪视频失败: {err}") - # raise ValueError(f"裁剪视频失败: {err}") return task_id, subclip_videos diff --git a/app/utils/vision_analyzer.py b/app/utils/vision_analyzer.py index 8024729..06342d7 100644 --- a/app/utils/vision_analyzer.py +++ b/app/utils/vision_analyzer.py @@ -55,7 +55,7 @@ async def _generate_content_with_retry(self, prompt, batch): async def analyze_images(self, images: Union[List[str], List[PIL.Image.Image]], prompt: str, - batch_size: int = 5) -> List[Dict]: + batch_size: int) -> List[Dict]: """批量分析多张图片""" try: # 加载图片 @@ -82,6 +82,8 @@ async def analyze_images(self, results = [] total_batches = (len(images) + batch_size - 1) // batch_size + logger.debug(f"共 {total_batches} 个批次,每批次 {batch_size} 张图片") + with tqdm(total=total_batches, desc="分析进度") as pbar: for i in range(0, len(images), batch_size): batch = images[i:i + batch_size] diff --git a/config.example.toml b/config.example.toml index 52432d2..e6b3919 100644 --- a/config.example.toml +++ b/config.example.toml @@ -13,7 +13,7 @@ ########### Vision NarratoAPI Key narrato_api_key = "" - narrato_api_url = "https://narratoapi.scsmtech.cn/api/v1" + narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1" narrato_vision_model = "gemini-1.5-flash" narrato_vision_key = "" narrato_llm_model = "gpt-4o" diff --git a/webui/components/basic_settings.py b/webui/components/basic_settings.py index 960587a..adeca9e 100644 --- a/webui/components/basic_settings.py +++ b/webui/components/basic_settings.py @@ -116,7 +116,7 @@ def render_vision_llm_settings(tr): st.subheader(tr("Vision Model Settings")) # 视频分析模型提供商选择 - vision_providers = ['Gemini', 'NarratoAPI'] + vision_providers = ['Gemini', 'NarratoAPI(待发布)', 'QwenVL(待发布)'] saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower() saved_provider_index = 0 @@ -141,7 +141,18 @@ def render_vision_llm_settings(tr): # 渲染视觉模型配置输入框 st_vision_api_key = st.text_input(tr("Vision API Key"), value=vision_api_key, type="password") - st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url) + + # 当选择 Gemini 时禁用 base_url 输入 + if vision_provider.lower() == 'gemini': + st_vision_base_url = st.text_input( + tr("Vision Base URL"), + value=vision_base_url, + disabled=True, + help=tr("Gemini API does not require a base URL") + ) + else: + st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url) + st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name) # 在配置输入框后添加测试按钮 @@ -204,7 +215,7 @@ def render_vision_llm_settings(tr): # tr("Vision Model API Key"), # value=config.app.get("narrato_vision_key", ""), # type="password", - # help="用于视频分析的模型 API Key" + # help="用于视频分析的模 API Key" # ) # # if narrato_vision_model: @@ -247,6 +258,76 @@ def render_vision_llm_settings(tr): # st.session_state['narrato_batch_size'] = narrato_batch_size +def test_text_model_connection(api_key, base_url, model_name, provider, tr): + """测试文本模型连接 + + Args: + api_key: API密钥 + base_url: 基础URL + model_name: 模型名称 + provider: 提供商名称 + + Returns: + bool: 连接是否成功 + str: 测试结果消息 + """ + import requests + + try: + # 构建统一的测试请求(遵循OpenAI格式) + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + # 如果没有指定base_url,使用默认值 + if not base_url: + if provider.lower() == 'openai': + base_url = "https://api.openai.com/v1" + elif provider.lower() == 'moonshot': + base_url = "https://api.moonshot.cn/v1" + elif provider.lower() == 'deepseek': + base_url = "https://api.deepseek.com/v1" + + # 构建测试URL + test_url = f"{base_url.rstrip('/')}/chat/completions" + + # 特殊处理Gemini + if provider.lower() == 'gemini': + import google.generativeai as genai + try: + genai.configure(api_key=api_key) + model = genai.GenerativeModel(model_name or 'gemini-pro') + model.generate_content("直接回复我文本'当前网络可用'") + return True, tr("Gemini model is available") + except Exception as e: + return False, f"{tr('Gemini model is not available')}: {str(e)}" + + # 构建测试消息 + test_data = { + "model": model_name, + "messages": [ + {"role": "user", "content": "直接回复我文本'当前网络可用'"} + ], + "max_tokens": 10 + } + + # 发送测试请求 + response = requests.post( + test_url, + headers=headers, + json=test_data, + timeout=10 + ) + + if response.status_code == 200: + return True, tr("Text model is available") + else: + return False, f"{tr('Text model is not available')}: HTTP {response.status_code}" + + except Exception as e: + return False, f"{tr('Connection failed')}: {str(e)}" + def render_text_llm_settings(tr): """渲染文案生成模型设置""" st.subheader(tr("Text Generation Model Settings")) @@ -279,6 +360,22 @@ def render_text_llm_settings(tr): st_text_base_url = st.text_input(tr("Text Base URL"), value=text_base_url) st_text_model_name = st.text_input(tr("Text Model Name"), value=text_model_name) + # 添加测试按钮 + if st.button(tr("Test Connection"), key="test_text_connection"): + with st.spinner(tr("Testing connection...")): + success, message = test_text_model_connection( + api_key=st_text_api_key, + base_url=st_text_base_url, + model_name=st_text_model_name, + provider=text_provider, + tr=tr + ) + + if success: + st.success(message) + else: + st.error(message) + # 保存文本模型配置 if st_text_api_key: config.app[f"text_{text_provider}_api_key"] = st_text_api_key diff --git a/webui/components/review_settings.py b/webui/components/review_settings.py index 513f938..932ec9b 100644 --- a/webui/components/review_settings.py +++ b/webui/components/review_settings.py @@ -2,6 +2,7 @@ import os from loguru import logger + def render_review_panel(tr): """渲染视频审查面板""" with st.expander(tr("Video Check"), expanded=False): @@ -26,10 +27,11 @@ def render_review_panel(tr): with cols[col]: render_video_item(tr, video_list, subclip_videos, index) + def render_video_item(tr, video_list, subclip_videos, index): """渲染单个视频项""" video_script = video_list[index] - + # 显示时间戳 timestamp = video_script.get('timestamp', '') st.text_area( @@ -39,7 +41,7 @@ def render_video_item(tr, video_list, subclip_videos, index): disabled=True, key=f"timestamp_{index}" ) - + # 显示视频播放器 video_path = subclip_videos.get(timestamp) if video_path and os.path.exists(video_path): @@ -50,7 +52,7 @@ def render_video_item(tr, video_list, subclip_videos, index): st.error(f"无法加载视频: {os.path.basename(video_path)}") else: st.warning(tr("视频文件未找到")) - + # 显示画面描述 st.text_area( tr("Picture Description"), @@ -59,7 +61,7 @@ def render_video_item(tr, video_list, subclip_videos, index): disabled=True, key=f"picture_{index}" ) - + # 显示旁白文本 narration = st.text_area( tr("Narration"), @@ -71,15 +73,16 @@ def render_video_item(tr, video_list, subclip_videos, index): if narration != video_script.get('narration', ''): video_script['narration'] = narration st.session_state['video_clip_json'] = video_list - + # 显示剪辑模式 ost = st.selectbox( tr("Clip Mode"), - options=range(1, 10), - index=video_script.get('OST', 1) - 1, - key=f"ost_{index}" + options=range(0, 3), + index=video_script.get('OST', 0), + key=f"ost_{index}", + help=tr("0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio") ) # 保存修改后的剪辑模式 - if ost != video_script.get('OST', 1): + if ost != video_script.get('OST', 0): video_script['OST'] = ost - st.session_state['video_clip_json'] = video_list \ No newline at end of file + st.session_state['video_clip_json'] = video_list diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 50bf4a9..30c23d3 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -417,11 +417,12 @@ def update_progress(progress: float, message: str = ""): asyncio.set_event_loop(loop) # 执行异步分析 + vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size") results = loop.run_until_complete( analyzer.analyze_images( images=keyframe_files, prompt=config.app.get('vision_analysis_prompt'), - batch_size=config.frames.get("vision_batch_size", st.session_state.get('vision_batch_size', 5)) + batch_size=vision_batch_size ) ) loop.close() @@ -437,8 +438,8 @@ def update_progress(progress: float, message: str = ""): if 'error' in result: logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}") continue - - batch_files = get_batch_files(keyframe_files, result, config.frames.get("vision_batch_size", 5)) + # 获取当前批次的文件列表 + batch_files = get_batch_files(keyframe_files, result, vision_batch_size) logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片") logger.debug(batch_files) @@ -477,7 +478,7 @@ def update_progress(progress: float, message: str = ""): if 'error' in result: continue - batch_files = get_batch_files(keyframe_files, result, config.frames.get("vision_batch_size", 5)) + batch_files = get_batch_files(keyframe_files, result, vision_batch_size) _, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files) frame_content = { @@ -497,15 +498,19 @@ def update_progress(progress: float, message: str = ""): raise Exception("没有有效的帧内容可以处理") # ===================开始生成文案=================== - update_progress(90, "正在生成文案...") + update_progress(80, "正在生成文案...") # 校验配置 api_params = { - 'vision_model_name': vision_model, - 'vision_api_key': vision_api_key, - 'vision_base_url': vision_base_url, - 'text_model_name': text_model, - 'text_api_key': text_api_key, - 'text_base_url': text_base_url + "vision_api_key": vision_api_key, + "vision_model_name": vision_model, + "vision_base_url": vision_base_url or "", + "text_api_key": text_api_key, + "text_model_name": text_model, + "text_base_url": text_base_url or "" + } + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' } session = requests.Session() retry_strategy = Retry( @@ -518,25 +523,26 @@ def update_progress(progress: float, message: str = ""): try: response = session.post( f"{config.app.get('narrato_api_url')}/video/config", - params=api_params, + headers=headers, + json=api_params, timeout=30, - verify=True # 启用证书验证 + verify=True ) - except: + except Exception as e: pass - custom_prompt = st.session_state.get('custom_prompt', '') processor = ScriptProcessor( model_name=text_model, api_key=text_api_key, prompt=custom_prompt, + base_url=text_base_url or "", video_theme=st.session_state.get('video_theme', '') ) # 处理帧内容生成脚本 script_result = processor.process_frames(frame_content_list) - # 将结果转换为JSON字符串 + # ��结果转换为JSON字符串 script = json.dumps(script_result, ensure_ascii=False, indent=2) except Exception as e: @@ -561,7 +567,7 @@ def update_progress(progress: float, message: str = ""): if not api_key: raise ValueError("未配置 Narrato API Key,请在基础设置中配置") - # 准备API请求 + # 准���API请求 headers = { 'X-API-Key': api_key, 'accept': 'application/json' diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index c3c06fd..68b968a 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -63,13 +63,11 @@ "Video Generation Failed": "视频生成失败", "You can download the generated video from the following links": "你可以从以下链接下载生成的视频", "Basic Settings": "**基础设置** (:blue[点击展开])", - "Language": "界面语言", "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos)) :red[可以不用配置,如果 Pexels 无法使用,再选择Pixabay]", "Video LLM Provider": "视频转录大模型", "LLM Provider": "大语言模型", "API Key": "API Key (:red[必填,需要到大模型提供商的后台申请])", "Base Url": "Base Url (可选)", - "Account ID": "账户ID (Cloudflare的dash面板url中获取)", "Model Name": "模型名称 (:blue[需要到大模型提供商的后台确认被授权的模型名称])", "Please Enter the LLM API Key": "请先填写大模型 **API Key**", "Please Enter the Pixabay API Key": "请先填写 **Pixabay API Key**", @@ -104,7 +102,6 @@ "Video Script": "视频脚本", "Video Quality": "视频质量", "Custom prompt for LLM, leave empty to use default prompt": "自定义提示词,留空则使用默认提示词", - "Basic Settings": "基础设置", "Proxy Settings": "代理设置", "Language": "界面语言", "HTTP_PROXY": "HTTP 代理", @@ -134,6 +131,9 @@ "gemini model is not available": "Gemini 模型不可用", "NarratoAPI is available": "NarratoAPI 可用", "NarratoAPI is not available": "NarratoAPI 不可用", - "Unsupported provider": "不支持的提供商" + "Unsupported provider": "不支持的提供商", + "0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio": "0: 仅保留音频,1: 仅保留原声,2: 保留原声和音频", + "Text model is not available": "文案生成模型不可用", + "Text model is available": "文案生成模型可用" } }