Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
linyqh committed Nov 23, 2024
2 parents cf6df70 + 4ad9c41 commit be20d9d
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 58 deletions.
1 change: 1 addition & 0 deletions .github/workflows/dockerImageBuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: build_docker
on:
release:
types: [created] # 表示在创建新的 Release 时触发
workflow_dispatch:

jobs:
build_docker:
Expand Down
11 changes: 7 additions & 4 deletions app/test/test_gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
os.environ["HTTP_PROXY"] = config.proxy.get("http")
os.environ["HTTPS_PROXY"] = config.proxy.get("https")

genai.configure(api_key=config.app.get("vision_gemini_api_key"))
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("直接回复我文本'当前网络可用'")
print(response.text)
genai.configure(api_key="")
model = genai.GenerativeModel("gemini-1.5-pro")


for i in range(50):
response = model.generate_content("直接回复我文本'当前网络可用'")
print(i, response.text)
18 changes: 10 additions & 8 deletions app/utils/script_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def generate_script(self, scene_description: str, word_count: int) -> str:
当前画面描述:{scene_description}
请确保新生成的文案与上文自然衔接,保持叙事的连贯性和趣味性。
不要出现除了文案以外的其他任何内容;
严格字数要求:{word_count}字,允许误差±5字。"""

messages = [
Expand Down Expand Up @@ -328,7 +329,9 @@ def __init__(self, model_name: str, api_key: str = None, base_url: str = None, p
self.generator = OpenAIGenerator(model_name, self.api_key, self.prompt, self.base_url)

def _get_default_prompt(self) -> str:
return f"""你是一位极具幽默感的短视频脚本创作大师,擅长用"温和的违反"制造笑点,让{self.video_theme}视频既有趣又富有传播力。你的任务是将视频画面描述转化为能在社交平台疯狂传播的爆款口播文案。
return f"""
你是一位极具幽默感的短视频脚本创作大师,擅长用"温和的违反"制造笑点,让主题为 《{self.video_theme}》 的视频既有趣又富有传播力。
你的任务是将视频画面描述转化为能在社交平台疯狂传播的爆款口播文案。
目标受众:热爱生活、追求独特体验的18-35岁年轻人
文案风格:基于HKRR理论 + 段子手精神
Expand All @@ -342,17 +345,16 @@ def _get_default_prompt(self) -> str:
【爆款内容四要素】
【快乐元素 Happy】
1. 用调侃的语气描述建造过程中的"笨手笨脚"
1. 用调侃的语气描述画面
2. 巧妙植入网络流行梗,增加内容的传播性
3. 适时自嘲,展现真实且有趣的一面
【知识价值 Knowledge】
1. 用段子手的方式解释专业知识(比如:"这根木头不是一般的木头,它比我前任还难搞...")
2. 把复杂的建造技巧转化为生动有趣的比喻
3. 在幽默中传递实用的野外生存技能
1. 用段子手的方式解释专业知识
2. 在幽默中传递实用的生活常识
【情感共鸣 Resonance】
1. 描述"真实但夸张"的建造困境
1. 描述"真实但夸张"的环境描述
2. 把对自然的感悟融入俏皮话中
3. 用接地气的表达方式拉近与观众距离
Expand All @@ -366,10 +368,10 @@ def _get_default_prompt(self) -> str:
2. 使用恰当的连接词和过渡语,确保叙事流畅
3. 保持人物视角和语气的一致性
4. 避免重复上一段已经提到的信息
5. 确保情节和建造过程的逻辑连续性
5. 确保情节的逻辑连续性
我会按顺序提供多段视频画面描述。请创作既搞笑又能火爆全网的口播文案。
记住:要敢于用"温和的违反"制造笑点,但要把握好尺度,让观众在轻松愉快中感受野外建造的乐趣。"""
记住:要敢于用"温和的违反"制造笑点,但要把握好尺度,让观众在轻松愉快中感受到乐趣。"""

def calculate_duration_and_word_count(self, time_range: str) -> int:
try:
Expand Down
10 changes: 0 additions & 10 deletions app/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,7 @@ def cut_video(params, progress_callback=None):

video_script_list = st.session_state['video_clip_json']
time_list = [i['timestamp'] for i in video_script_list]

total_clips = len(time_list)

print("time_list", time_list)

def clip_progress(current, total):
progress = int((current / total) * 100)
if progress_callback:
Expand All @@ -414,17 +410,11 @@ def clip_progress(current, total):
raise ValueError("裁剪视频失败")

st.session_state['subclip_videos'] = subclip_videos

print("list:", subclip_videos)

for i, video_script in enumerate(video_script_list):
print(i)
print(video_script)
try:
video_script['path'] = subclip_videos[video_script['timestamp']]
except KeyError as err:
logger.error(f"裁剪视频失败: {err}")
# raise ValueError(f"裁剪视频失败: {err}")

return task_id, subclip_videos

Expand Down
4 changes: 3 additions & 1 deletion app/utils/vision_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ async def _generate_content_with_retry(self, prompt, batch):
async def analyze_images(self,
images: Union[List[str], List[PIL.Image.Image]],
prompt: str,
batch_size: int = 5) -> List[Dict]:
batch_size: int) -> List[Dict]:
"""批量分析多张图片"""
try:
# 加载图片
Expand All @@ -82,6 +82,8 @@ async def analyze_images(self,
results = []
total_batches = (len(images) + batch_size - 1) // batch_size

logger.debug(f"共 {total_batches} 个批次,每批次 {batch_size} 张图片")

with tqdm(total=total_batches, desc="分析进度") as pbar:
for i in range(0, len(images), batch_size):
batch = images[i:i + batch_size]
Expand Down
2 changes: 1 addition & 1 deletion config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

########### Vision NarratoAPI Key
narrato_api_key = ""
narrato_api_url = "https://narratoapi.scsmtech.cn/api/v1"
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
narrato_vision_model = "gemini-1.5-flash"
narrato_vision_key = ""
narrato_llm_model = "gpt-4o"
Expand Down
103 changes: 100 additions & 3 deletions webui/components/basic_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def render_vision_llm_settings(tr):
st.subheader(tr("Vision Model Settings"))

# 视频分析模型提供商选择
vision_providers = ['Gemini', 'NarratoAPI']
vision_providers = ['Gemini', 'NarratoAPI(待发布)', 'QwenVL(待发布)']
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
saved_provider_index = 0

Expand All @@ -141,7 +141,18 @@ def render_vision_llm_settings(tr):

# 渲染视觉模型配置输入框
st_vision_api_key = st.text_input(tr("Vision API Key"), value=vision_api_key, type="password")
st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url)

# 当选择 Gemini 时禁用 base_url 输入
if vision_provider.lower() == 'gemini':
st_vision_base_url = st.text_input(
tr("Vision Base URL"),
value=vision_base_url,
disabled=True,
help=tr("Gemini API does not require a base URL")
)
else:
st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url)

st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name)

# 在配置输入框后添加测试按钮
Expand Down Expand Up @@ -204,7 +215,7 @@ def render_vision_llm_settings(tr):
# tr("Vision Model API Key"),
# value=config.app.get("narrato_vision_key", ""),
# type="password",
# help="用于视频分析的模型 API Key"
# help="用于视频分析的模 API Key"
# )
#
# if narrato_vision_model:
Expand Down Expand Up @@ -247,6 +258,76 @@ def render_vision_llm_settings(tr):
# st.session_state['narrato_batch_size'] = narrato_batch_size


def test_text_model_connection(api_key, base_url, model_name, provider, tr):
"""测试文本模型连接
Args:
api_key: API密钥
base_url: 基础URL
model_name: 模型名称
provider: 提供商名称
Returns:
bool: 连接是否成功
str: 测试结果消息
"""
import requests

try:
# 构建统一的测试请求(遵循OpenAI格式)
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}

# 如果没有指定base_url,使用默认值
if not base_url:
if provider.lower() == 'openai':
base_url = "https://api.openai.com/v1"
elif provider.lower() == 'moonshot':
base_url = "https://api.moonshot.cn/v1"
elif provider.lower() == 'deepseek':
base_url = "https://api.deepseek.com/v1"

# 构建测试URL
test_url = f"{base_url.rstrip('/')}/chat/completions"

# 特殊处理Gemini
if provider.lower() == 'gemini':
import google.generativeai as genai
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name or 'gemini-pro')
model.generate_content("直接回复我文本'当前网络可用'")
return True, tr("Gemini model is available")
except Exception as e:
return False, f"{tr('Gemini model is not available')}: {str(e)}"

# 构建测试消息
test_data = {
"model": model_name,
"messages": [
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
],
"max_tokens": 10
}

# 发送测试请求
response = requests.post(
test_url,
headers=headers,
json=test_data,
timeout=10
)

if response.status_code == 200:
return True, tr("Text model is available")
else:
return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"

except Exception as e:
return False, f"{tr('Connection failed')}: {str(e)}"

def render_text_llm_settings(tr):
"""渲染文案生成模型设置"""
st.subheader(tr("Text Generation Model Settings"))
Expand Down Expand Up @@ -279,6 +360,22 @@ def render_text_llm_settings(tr):
st_text_base_url = st.text_input(tr("Text Base URL"), value=text_base_url)
st_text_model_name = st.text_input(tr("Text Model Name"), value=text_model_name)

# 添加测试按钮
if st.button(tr("Test Connection"), key="test_text_connection"):
with st.spinner(tr("Testing connection...")):
success, message = test_text_model_connection(
api_key=st_text_api_key,
base_url=st_text_base_url,
model_name=st_text_model_name,
provider=text_provider,
tr=tr
)

if success:
st.success(message)
else:
st.error(message)

# 保存文本模型配置
if st_text_api_key:
config.app[f"text_{text_provider}_api_key"] = st_text_api_key
Expand Down
23 changes: 13 additions & 10 deletions webui/components/review_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from loguru import logger


def render_review_panel(tr):
"""渲染视频审查面板"""
with st.expander(tr("Video Check"), expanded=False):
Expand All @@ -26,10 +27,11 @@ def render_review_panel(tr):
with cols[col]:
render_video_item(tr, video_list, subclip_videos, index)


def render_video_item(tr, video_list, subclip_videos, index):
"""渲染单个视频项"""
video_script = video_list[index]

# 显示时间戳
timestamp = video_script.get('timestamp', '')
st.text_area(
Expand All @@ -39,7 +41,7 @@ def render_video_item(tr, video_list, subclip_videos, index):
disabled=True,
key=f"timestamp_{index}"
)

# 显示视频播放器
video_path = subclip_videos.get(timestamp)
if video_path and os.path.exists(video_path):
Expand All @@ -50,7 +52,7 @@ def render_video_item(tr, video_list, subclip_videos, index):
st.error(f"无法加载视频: {os.path.basename(video_path)}")
else:
st.warning(tr("视频文件未找到"))

# 显示画面描述
st.text_area(
tr("Picture Description"),
Expand All @@ -59,7 +61,7 @@ def render_video_item(tr, video_list, subclip_videos, index):
disabled=True,
key=f"picture_{index}"
)

# 显示旁白文本
narration = st.text_area(
tr("Narration"),
Expand All @@ -71,15 +73,16 @@ def render_video_item(tr, video_list, subclip_videos, index):
if narration != video_script.get('narration', ''):
video_script['narration'] = narration
st.session_state['video_clip_json'] = video_list

# 显示剪辑模式
ost = st.selectbox(
tr("Clip Mode"),
options=range(1, 10),
index=video_script.get('OST', 1) - 1,
key=f"ost_{index}"
options=range(0, 3),
index=video_script.get('OST', 0),
key=f"ost_{index}",
help=tr("0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio")
)
# 保存修改后的剪辑模式
if ost != video_script.get('OST', 1):
if ost != video_script.get('OST', 0):
video_script['OST'] = ost
st.session_state['video_clip_json'] = video_list
st.session_state['video_clip_json'] = video_list
Loading

0 comments on commit be20d9d

Please sign in to comment.