@@ -71,14 +71,16 @@ struct cli_context {
7171
7272 std::string generate_completion (result_timings & out_timings) {
7373 server_response_reader rd = ctx_server.get_response_reader ();
74+ auto formatted = format_chat ();
7475 {
7576 // TODO: reduce some copies here in the future
7677 server_task task = server_task (SERVER_TASK_TYPE_COMPLETION);
77- task.id = rd.get_new_id ();
78- task.index = 0 ;
79- task.params = defaults; // copy
80- task.cli_input = messages; // copy
81- task.cli_files = input_files; // copy
78+ task.id = rd.get_new_id ();
79+ task.index = 0 ;
80+ task.params = defaults; // copy
81+ task.cli_prompt = formatted.prompt ; // copy
82+ task.cli_files = input_files; // copy
83+ task.cli = true ;
8284 rd.post_task ({std::move (task)});
8385 }
8486
@@ -156,6 +158,26 @@ struct cli_context {
156158 return content;
157159 }
158160 }
161+
162+ common_chat_params format_chat () {
163+ auto meta = ctx_server.get_meta ();
164+ auto & chat_params = meta.chat_params ;
165+
166+ common_chat_templates_inputs inputs;
167+ inputs.messages = common_chat_msgs_parse_oaicompat (messages);
168+ inputs.tools = {}; // TODO
169+ inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
170+ inputs.json_schema = " " ; // TODO
171+ inputs.grammar = " " ; // TODO
172+ inputs.use_jinja = chat_params.use_jinja ;
173+ inputs.parallel_tool_calls = false ;
174+ inputs.add_generation_prompt = true ;
175+ inputs.reasoning_format = chat_params.reasoning_format ;
176+ inputs.enable_thinking = chat_params.enable_thinking ;
177+
178+ // Apply chat template to the list of messages
179+ return common_chat_templates_apply (chat_params.tmpls .get (), inputs);
180+ }
159181};
160182
161183int main (int argc, char ** argv) {
0 commit comments