Qwen3 response_schema might not be correct

Qwen3 response schema taken from

https://github.com/huggingface/transformers/blob/8598421b5120a6a57efe9f57fbfda9bfed29a4dc/tests/utils/test_chat_parsing_utils.py#L149-L181

```python
from transformers import AutoModelForCausalLM, AutoTokenizer

checkpoint = "Qwen/Qwen3-0.6B"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, dtype="auto", device_map="auto")

# taken from test
qwen3_schema = {
    "x-regex": r"^(?:(?:<think>)?\s*(?P<thinking>.+?)\s*</think>)?\s*(?:<tool_call>(?P<tool_calls>.*?)\s*</tool_call>)?\s*(?P<content>.+?)?\s*$",
    "type": "object",
    "properties": {
        "role": {"const": "assistant"},
        "content": {"type": "string"},
        "thinking": {"type": "string"},
        "tool_calls": {
            "x-regex-iterator": r"^(.*)$",  # We have already extracted tool calls and there can only be one, so just make it a list
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "type": {"const": "function"},
                    "function": {
                        "type": "object",
                        "properties": {
                            "name": {"type": "string", "x-regex": r"<function=(\w+)>"},
                            "arguments": {
                                "type": "object",
                                "x-regex-key-value": r"<parameter=(?P<key>\w+)>\n(?P<value>.*?)\n</parameter>",
                                "additionalProperties": {
                                    "x-parser": "json",
                                    "x-parser-args": {"allow_non_json": True},
                                },
                            },
                        },
                    },
                },
            },
        },
    },
}

tokenizer.response_schema = qwen3_schema


def multiply(a: int, b: int) -> int:
    """
    Multiplies two integers.

    Args:
        a: The first integer.
        b: The second integer.

    Returns:
        The product of the two integers.
    """
    return a * b


messages = [{"role": "user", "content": "Use the tool to multiply 3 and 4."}]

processed = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_tensors="pt",
    tools=[multiply],
    enable_thinking=False,
)
input_ids = processed["input_ids"].to(model.device)
outputs = model.generate(input_ids, max_new_tokens=1024)[0, input_ids.shape[1] :]
out_text = tokenizer.decode(outputs)
print(out_text)
```

```
<tool_call>
{"name": "multiply", "arguments": {"a": 3, "b": 4}}
</tool_call><|im_end|>
```

```python
parsed = tokenizer.parse_response(out_text)
print(parsed)
```

```
{'role': 'assistant', 'content': '<|im_end|>', 'tool_calls': [{'type': 'function', 'function': {}}]}
```

Interestingly, if I use `smollm_schema` instead, it seems to work fine:

```python
tokenizer.response_schema = smollm_schema
parsed = tokenizer.parse_response(out_text)
print(parsed)
```

```
{'role': 'assistant', 'content': '<|im_end|>', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
```

cc @Rocketknight1 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Qwen3 response_schema might not be correct #42220

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

	qwen3_schema = {
	"x-regex": r"^(?:(?:<think>)?\s(?P<thinking>.+?)\s</think>)?\s(?:<tool_call>(?P<tool_calls>.?)\s</tool_call>)?\s(?P<content>.+?)?\s*$",
	"type": "object",
	"properties": {
	"role": {"const": "assistant"},
	"content": {"type": "string"},
	"thinking": {"type": "string"},
	"tool_calls": {
	"x-regex-iterator": r"^(.*)$", # We have already extracted tool calls and there can only be one, so just make it a list
	"type": "array",
	"items": {
	"type": "object",
	"properties": {
	"type": {"const": "function"},
	"function": {
	"type": "object",
	"properties": {
	"name": {"type": "string", "x-regex": r"<function=(\w+)>"},
	"arguments": {
	"type": "object",
	"x-regex-key-value": r"<parameter=(?P<key>\w+)>\n(?P<value>.*?)\n</parameter>",
	"additionalProperties": {
	"x-parser": "json",
	"x-parser-args": {"allow_non_json": True},
	},
	},
	},
	},
	},
	},
	},
	},
	}

Qwen3 response_schema might not be correct #42220

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions