Skip to content

Commit 6df686b

Browse files
authored
server : refactor oai_parser_opt, move it to server_chat_params (#18937)
* server_chat_params * move chat format into CLI * use meta whenever possible * clean up, no more chatml fallback
1 parent 1706a6d commit 6df686b

File tree

8 files changed

+112
-103
lines changed

8 files changed

+112
-103
lines changed

common/chat.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -601,18 +601,18 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
601601
return tmpls->has_explicit_template;
602602
}
603603

604-
const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) {
605-
if (variant != nullptr) {
606-
if (strcmp(variant, "tool_use") == 0) {
604+
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
605+
if (!variant.empty()) {
606+
if (variant == "tool_use") {
607607
if (tmpls->template_tool_use) {
608-
return tmpls->template_tool_use->source().c_str();
608+
return tmpls->template_tool_use->source();
609609
}
610-
return nullptr;
610+
return "";
611611
} else {
612-
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant);
612+
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
613613
}
614614
}
615-
return tmpls->template_default->source().c_str();
615+
return tmpls->template_default->source();
616616
}
617617

618618
common_chat_templates_ptr common_chat_templates_init(

common/chat.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ common_chat_templates_ptr common_chat_templates_init(
191191
const std::string & eos_token_override = "");
192192

193193
bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
194-
const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
194+
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
195195

196196

197197
struct common_chat_params common_chat_templates_apply(

tools/cli/cli.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,16 @@ struct cli_context {
7171

7272
std::string generate_completion(result_timings & out_timings) {
7373
server_response_reader rd = ctx_server.get_response_reader();
74+
auto formatted = format_chat();
7475
{
7576
// TODO: reduce some copies here in the future
7677
server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
77-
task.id = rd.get_new_id();
78-
task.index = 0;
79-
task.params = defaults; // copy
80-
task.cli_input = messages; // copy
81-
task.cli_files = input_files; // copy
78+
task.id = rd.get_new_id();
79+
task.index = 0;
80+
task.params = defaults; // copy
81+
task.cli_prompt = formatted.prompt; // copy
82+
task.cli_files = input_files; // copy
83+
task.cli = true;
8284
rd.post_task({std::move(task)});
8385
}
8486

@@ -156,6 +158,26 @@ struct cli_context {
156158
return content;
157159
}
158160
}
161+
162+
common_chat_params format_chat() {
163+
auto meta = ctx_server.get_meta();
164+
auto & chat_params = meta.chat_params;
165+
166+
common_chat_templates_inputs inputs;
167+
inputs.messages = common_chat_msgs_parse_oaicompat(messages);
168+
inputs.tools = {}; // TODO
169+
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
170+
inputs.json_schema = ""; // TODO
171+
inputs.grammar = ""; // TODO
172+
inputs.use_jinja = chat_params.use_jinja;
173+
inputs.parallel_tool_calls = false;
174+
inputs.add_generation_prompt = true;
175+
inputs.reasoning_format = chat_params.reasoning_format;
176+
inputs.enable_thinking = chat_params.enable_thinking;
177+
178+
// Apply chat template to the list of messages
179+
return common_chat_templates_apply(chat_params.tmpls.get(), inputs);
180+
}
159181
};
160182

161183
int main(int argc, char ** argv) {

tools/server/server-common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ static void handle_media(
831831
// used by /chat/completions endpoint
832832
json oaicompat_chat_params_parse(
833833
json & body, /* openai api json semantics */
834-
const oaicompat_parser_options & opt,
834+
const server_chat_params & opt,
835835
std::vector<raw_buffer> & out_files)
836836
{
837837
json llama_params;
@@ -1012,7 +1012,7 @@ json oaicompat_chat_params_parse(
10121012
}
10131013

10141014
// Apply chat template to the list of messages
1015-
auto chat_params = common_chat_templates_apply(opt.tmpls, inputs);
1015+
auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);
10161016

10171017
/* Append assistant prefilled message */
10181018
if (prefill_assistant_message) {

tools/server/server-common.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -274,25 +274,25 @@ std::vector<server_tokens> tokenize_input_prompts(
274274
// OAI utils
275275
//
276276

277-
// used by /completions endpoint
278-
json oaicompat_completion_params_parse(const json & body);
279-
280-
struct oaicompat_parser_options {
277+
struct server_chat_params {
281278
bool use_jinja;
282279
bool prefill_assistant;
283280
common_reasoning_format reasoning_format;
284-
std::map<std::string,std::string> chat_template_kwargs;
285-
common_chat_templates * tmpls;
281+
std::map<std::string, std::string> chat_template_kwargs; // mapping key --> json value
282+
common_chat_templates_ptr tmpls;
286283
bool allow_image;
287284
bool allow_audio;
288285
bool enable_thinking = true;
289286
std::string media_path;
290287
};
291288

289+
// used by /completions endpoint
290+
json oaicompat_completion_params_parse(const json & body);
291+
292292
// used by /chat/completions endpoint
293293
json oaicompat_chat_params_parse(
294294
json & body, /* openai api json semantics */
295-
const oaicompat_parser_options & opt,
295+
const server_chat_params & opt,
296296
std::vector<raw_buffer> & out_files);
297297

298298
// convert Anthropic Messages API format to OpenAI Chat Completions API format

0 commit comments

Comments
 (0)