From df45f11115051929d6296a0c138b99472abf497f Mon Sep 17 00:00:00 2001 From: Jun-Howie <62869005+Jun-Howie@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:24:10 +0800 Subject: [PATCH] FEAT: Support Marco-o1 (#2749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: JunHowie Co-authored-by: 赵俊豪 --- xinference/model/llm/llm_family.json | 59 ++++++++++++++++++ .../model/llm/llm_family_modelscope.json | 61 +++++++++++++++++++ xinference/model/llm/vllm/core.py | 1 + 3 files changed, 121 insertions(+) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 030c911dc3..5088e0ef46 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -8990,6 +8990,65 @@ "<|endoftext|>" ] }, + { + "version": 1, + "context_length": 32768, + "model_name": "marco-o1", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "AIDC-AI/Marco-o1" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_0", + "Q4_1", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_1", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q8_0" + ], + "model_id": "QuantFactory/Marco-o1-GGUF", + "model_file_name_template": "Marco-o1.{quantization}.gguf" + } + ], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在内完成,内输出你的结果。\n应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] + }, { "version": 1, "context_length": 4096, diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 0908d314a2..762bcdb690 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -6722,6 +6722,67 @@ "<|im_end|>", "<|endoftext|>" ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "marco-o1", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "AIDC-AI/Marco-o1", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_0", + "Q4_1", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_1", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q8_0" + ], + "model_file_name_template": "Marco-o1.{quantization}.gguf", + "model_hub": "modelscope", + "model_id": "QuantFactory/Marco-o1-GGUF" + } + ], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在内完成,内输出你的结果。\n应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] }, { "version": 1, diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py index e0556b80d2..a2a1ece6c0 100644 --- a/xinference/model/llm/vllm/core.py +++ b/xinference/model/llm/vllm/core.py @@ -156,6 +156,7 @@ class VLLMGenerateConfig(TypedDict, total=False): VLLM_SUPPORTED_MODELS.append("qwen2.5-coder") VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-coder-instruct") VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B-Preview") + VLLM_SUPPORTED_CHAT_MODELS.append("marco-o1") if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":