Skip to content

vllm.entrypoints.openai.tool_parsers.mistral_tool_parser

ALPHANUMERIC module-attribute

ALPHANUMERIC = ascii_letters + digits

logger module-attribute

logger = init_logger(__name__)

MistralToolCall

Bases: ToolCall

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
class MistralToolCall(ToolCall):
    id: str = Field(default_factory=lambda: MistralToolCall.generate_random_id())

    @staticmethod
    def generate_random_id():
        # Mistral Tool Call Ids must be alphanumeric with a length of 9.
        # https://github.com/mistralai/mistral-common/blob/21ee9f6cee3441e9bb1e6ed2d10173f90bd9b94b/src/mistral_common/protocol/instruct/validator.py#L299
        return "".join(choices(ALPHANUMERIC, k=9))

    @staticmethod
    def is_valid_id(id: str) -> bool:
        return id.isalnum() and len(id) == 9

id class-attribute instance-attribute

id: str = Field(
    default_factory=lambda: generate_random_id()
)

generate_random_id staticmethod

generate_random_id()
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@staticmethod
def generate_random_id():
    # Mistral Tool Call Ids must be alphanumeric with a length of 9.
    # https://github.com/mistralai/mistral-common/blob/21ee9f6cee3441e9bb1e6ed2d10173f90bd9b94b/src/mistral_common/protocol/instruct/validator.py#L299
    return "".join(choices(ALPHANUMERIC, k=9))

is_valid_id staticmethod

is_valid_id(id: str) -> bool
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@staticmethod
def is_valid_id(id: str) -> bool:
    return id.isalnum() and len(id) == 9

MistralToolParser

Bases: ToolParser

Tool call parser for Mistral 7B Instruct v0.3, intended for use with - mistral_common - the examples/tool_chat_template_mistral.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser mistral are all set

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
class MistralToolParser(ToolParser):
    """
    Tool call parser for Mistral 7B Instruct v0.3, intended for use with
    - [`mistral_common`](https://github.com/mistralai/mistral-common/)
    - the examples/tool_chat_template_mistral.jinja template.

    Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
    """

    def __init__(self, tokenizer: TokenizerLike):
        super().__init__(tokenizer)

        if not isinstance(self.model_tokenizer, MistralTokenizer):
            logger.info("Non-Mistral tokenizer detected when using a Mistral model...")

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.current_tool_id: int = -1
        self.streaming_state: StreamingState = StreamingState.WAITING_FOR_TOOL_START

        # For streaming pre v11 tokenizer tool calls
        self.current_tool_name: str | None = None
        self.current_tool_mistral_id: str | None = None
        self.starting_new_tool = False
        if _is_pre_v11_tokeniser(self.model_tokenizer):
            self.parse_coro = ijson.parse_coro(
                self.update_stream_state_pre_v11_tokenizer()
            )

        self.bot_token = "[TOOL_CALLS]"
        self.bot_token_id = self.vocab.get(self.bot_token)
        self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
        if not _is_pre_v11_tokeniser(self.model_tokenizer):
            self.fn_name_regex = re.compile(
                r"([a-zA-Z0-9_-]+)(\{[\s\S]*?\}+)", re.DOTALL
            )
        else:
            self.fn_name_regex = None

        if self.bot_token_id is None:
            raise RuntimeError(
                "Mistral Tool Parser could not locate the tool call token in "
                "the tokenizer!"
            )

    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        request = super().adjust_request(request)
        if (
            not isinstance(self.model_tokenizer, MistralTokenizer)
            and request.tools
            and request.tool_choice != "none"
        ):
            # Do not skip special tokens when using chat template
            # with Mistral parser as TOOL_CALL token is needed
            # for tool detection.
            # Note: we don't want skip_special_tokens=False
            # with MistralTokenizer as it is incompatible
            request.skip_special_tokens = False
        return request

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response. Requires
        find-and-replacing single quotes with double quotes for JSON parsing,
        make sure your tool call arguments don't ever include quotes!
        """

        # case -- if a tool call token is not present, return a text response
        if self.bot_token not in model_output:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )

        # first remove the BOT token
        tool_content = model_output.replace(self.bot_token, "").strip()

        try:
            # we first try to directly load the json as parsing very nested
            # jsons is difficult
            try:
                if self.fn_name_regex:
                    function_call_arr = []
                    for single_tool_content in model_output.split(self.bot_token):
                        matches = self.fn_name_regex.findall(single_tool_content)

                        for match in matches:
                            fn_name = match[0]
                            args = match[1]

                            # fn_name is encoded outside serialized json dump
                            # only arguments are serialized
                            function_call_arr.append(
                                {"name": fn_name, "arguments": json.loads(args)}
                            )
                else:
                    function_call_arr = json.loads(tool_content)
            except json.JSONDecodeError:
                # use a regex to find the part corresponding to the tool call.
                # NOTE: This use case should not happen if the model is trained
                # correctly. It's an easy possible fix so it's included, but
                # can be brittle for very complex / highly nested tool calls
                raw_tool_call = self.tool_call_regex.findall(tool_content)[0]
                function_call_arr = json.loads(raw_tool_call)

            # Tool Call
            tool_calls: list[MistralToolCall] = [
                MistralToolCall(
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
                            raw_function_call["arguments"], ensure_ascii=False
                        ),
                    ),
                )
                for raw_function_call in function_call_arr
            ]

            # get any content before  the tool call
            content = model_output.split(self.bot_token)[0]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if len(content) > 0 else None,
            )

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            # return information to just treat the tool call as regular JSON
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=tool_content
            )

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        if self.bot_token_id not in current_token_ids:
            # if the tool call token is not in the tokens generated so far,
            # append output to contents since it's not a tool
            return DeltaMessage(content=delta_text)

        # if the tool call token IS in the tokens generated so far, that
        # means we're parsing as tool calls now
        try:
            if _is_pre_v11_tokeniser(self.model_tokenizer):
                return self._extract_tool_calls_streaming_pre_v11_tokenizer(
                    delta_text=delta_text,
                    delta_token_ids=delta_token_ids,
                )
            else:
                return self._extract_tool_calls_streaming(
                    delta_text=delta_text, delta_token_ids=delta_token_ids
                )
        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            return None

    def _extract_tool_calls_streaming(
        self,
        delta_text: str,
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        """
        Extracts tool calls for Mistral models
        doing tool calls of the following format:
        `[TOOL_CALLS]add{"a": 3.5, "b": 4}`
        """
        additional_content: str = ""
        if self.streaming_state == StreamingState.WAITING_FOR_TOOL_START:
            # this is the first tool call
            assert self.bot_token_id in delta_token_ids
            if not delta_text.startswith(self.bot_token):
                additional_content += delta_text.split(self.bot_token)[0]
                delta_text = self.bot_token + "".join(
                    delta_text.split(self.bot_token)[1:]
                )

        delta_tool_calls = self._generate_delta_tool_call(delta_text)
        if not additional_content and len(delta_tool_calls) == 0:
            if self.streaming_state in [
                StreamingState.PARSING_ARGUMENTS,
                StreamingState.PARSING_ARGUMENTS_COMPLETED,
                StreamingState.TOOL_COMPLETE,
                StreamingState.ALL_TOOLS_COMPLETE,
            ]:
                # Return an empty DeltaMessage once the tool calls are all done
                # so that finish_reason gets set.
                return DeltaMessage()
            else:
                # return None when the tool is not likely to be finished
                # This can occur when the name is being parsed for example
                # and we wait for the name to be complete
                # before sending the function name
                return None

        delta = DeltaMessage()
        if additional_content:
            delta.content = additional_content
        if len(delta_tool_calls) > 0:
            delta.tool_calls = delta_tool_calls

        # HACK: serving_chat.py inspects the internal state of tool parsers
        # when determining its final streaming delta, automatically
        # adding autocompleted JSON.
        # These two lines avoid that nonsense while ensuring finish_reason
        # is set to tool_calls when at least one tool is called.
        if delta_tool_calls and not self.prev_tool_call_arr:
            self.prev_tool_call_arr = [{"arguments": {}}]
        return delta

    def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
        if delta_text == "" or delta_text is None:
            return []
        delta_function_name = None
        tool_id = None
        if self.streaming_state not in [
            StreamingState.PARSING_NAME,
            StreamingState.PARSING_ARGUMENTS,
        ] and delta_text.startswith(self.bot_token):
            self.current_tool_id += 1
            self.streaming_state = StreamingState.PARSING_NAME
            delta_text = delta_text.replace(self.bot_token, "", 1)
        if self.streaming_state == StreamingState.PARSING_NAME:
            if self.current_tool_name is None:
                self.current_tool_name = ""
            # The name stops where the arguments start
            # And the arguments start with the `{` char
            if "{" in delta_text:
                tool_id = MistralToolCall.generate_random_id()
                delta_function_name = delta_text.split("{")[0]
                self.current_tool_name += delta_function_name
                delta_text = delta_text[len(delta_function_name) :]
                self.streaming_state = StreamingState.PARSING_ARGUMENTS
            else:
                # we want to send the tool name once it's complete
                self.current_tool_name += delta_text
                return []
        if self.streaming_state == StreamingState.PARSING_ARGUMENTS:
            next_function_text = None
            if self.bot_token in delta_text:
                # current tool call is over
                delta_arguments = ""
                delta_arguments += delta_text.split(self.bot_token)[0]
                next_function_text = delta_text[len(delta_arguments) :]
                self.streaming_state = StreamingState.TOOL_COMPLETE
            else:
                delta_arguments = delta_text
            ret = []
            if self.current_tool_name or delta_arguments:
                ret += [
                    DeltaToolCall(
                        index=self.current_tool_id,
                        type="function",
                        id=tool_id,
                        function=DeltaFunctionCall(
                            name=self.current_tool_name, arguments=delta_arguments
                        ).model_dump(exclude_none=True),
                    )
                ]
                self.current_tool_name = None
            if next_function_text:
                ret += self._generate_delta_tool_call(next_function_text)
            return ret
        # Should not happen
        return []

    @ijson.coroutine
    def update_stream_state_pre_v11_tokenizer(self):
        while True:
            (prefix, event, value) = yield

            if prefix == "item" and event == "start_map":
                self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
            if prefix == "item" and event == "map_key" and value == "name":
                self.streaming_state = StreamingState.PARSING_NAME
            if prefix == "item.name" and event == "string":
                self.current_tool_name = value
                self.streaming_state = StreamingState.PARSING_NAME_COMPLETED
            if prefix == "item" and event == "map_key" and value == "arguments":
                self.streaming_state = StreamingState.WAITING_FOR_ARGUMENTS_START
            if prefix == "item.arguments" and event == "start_map":
                self.streaming_state = StreamingState.PARSING_ARGUMENTS
            if prefix == "item.arguments" and event == "end_map":
                self.streaming_state = StreamingState.PARSING_ARGUMENTS_COMPLETED
            if prefix == "item" and event == "end_map":
                self.streaming_state = StreamingState.TOOL_COMPLETE
            if prefix == "" and event == "end_array":
                self.streaming_state = StreamingState.ALL_TOOLS_COMPLETE

    def _extract_tool_calls_streaming_pre_v11_tokenizer(
        self,
        delta_text: str,
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        """
        Extracts tool calls for Mistral models
        doing tool calls of the following format:
        `[TOOL_CALLS][{"name": "add", "arguments":{"a": 3.5, "b": 4}}`
        """
        assert self.parse_coro is not None
        content = None
        delta_tool_calls: list[DeltaToolCall] = []
        current_tool_call: DeltaToolCall = DeltaToolCall(
            index=self.current_tool_id, type="function"
        )
        current_tool_call_modified = False
        if self.bot_token_id in delta_token_ids:
            # this is the first tool call
            if not delta_text.startswith(self.bot_token):
                content = delta_text.split(self.bot_token)[0]
            delta_text = "".join(delta_text.split(self.bot_token)[1:])

        # Cut smartly the delta text to catch the ijson events
        # as ijson does not give us the index in the text at each event.
        # We need to cut so that we know
        # where in the text the events are emitted from.
        while len(delta_text) > 0:
            streaming_state_before_parse = self.streaming_state

            if self.streaming_state == StreamingState.WAITING_FOR_TOOL_START:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_opening_curly_braces=1,
                )
            elif self.streaming_state == StreamingState.WAITING_FOR_TOOL_KEY:
                # Wait until another key is sent
                # or the current tool is completed
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_colon=1,
                    stop_after_opening_curly_braces=1,
                    # if the tool ends, we want to separate
                    # at the start of the next tool
                )
            elif self.streaming_state == StreamingState.PARSING_NAME:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_comma=1,
                    stop_after_closing_brackets=1,
                )
            elif self.streaming_state == StreamingState.WAITING_FOR_ARGUMENTS_START:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_opening_curly_braces=1,
                )
            elif self.streaming_state == StreamingState.PARSING_ARGUMENTS:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_closing_curly_braces=1,
                    # we could be more clever
                    # by listening to item.arguments.* start_map events
                    # and know how many curly braces we can allow
                )
            elif self.streaming_state in [
                StreamingState.PARSING_ARGUMENTS_COMPLETED,
                StreamingState.PARSING_NAME_COMPLETED,
            ]:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_closing_curly_braces=1,
                    stop_after_closing_brackets=1,
                )
            elif self.streaming_state == StreamingState.TOOL_COMPLETE:
                delta_to_be_parsed, delta_text = self._split_delta(
                    delta_text=delta_text,
                    stop_after_opening_curly_braces=1,
                    stop_after_closing_brackets=1,
                )
            elif self.streaming_state == StreamingState.ALL_TOOLS_COMPLETE:
                content = delta_text
                delta_text = ""
            else:
                delta_to_be_parsed = delta_text
                delta_text = ""

            if self.streaming_state != StreamingState.ALL_TOOLS_COMPLETE:
                self.parse_coro.send(delta_to_be_parsed.encode("utf-8"))

            # Given the parsed text and the possible streaming state change,
            # let's add to the tool delta
            if (
                (streaming_state_before_parse != self.streaming_state)
                and streaming_state_before_parse
                in [StreamingState.WAITING_FOR_TOOL_START, StreamingState.TOOL_COMPLETE]
                and self.streaming_state
                not in [
                    StreamingState.ALL_TOOLS_COMPLETE,
                    StreamingState.TOOL_COMPLETE,
                    StreamingState.WAITING_FOR_TOOL_START,
                ]
            ):
                # starting a new tool call
                if current_tool_call_modified:
                    if self.current_tool_mistral_id is not None:
                        current_tool_call.id = self.current_tool_mistral_id
                        self.current_tool_mistral_id = None
                    delta_tool_calls.append(current_tool_call)
                current_tool_call_modified = False
                self.current_tool_id += 1
                self.current_tool_mistral_id = MistralToolCall.generate_random_id()
                current_tool_call = DeltaToolCall(
                    index=self.current_tool_id,
                    type="function",
                )
            if current_tool_call.function is None:
                current_tool_call.function = DeltaFunctionCall()

            if self.current_tool_name is not None:
                # we have the complete tool name
                current_tool_call_modified = True
                current_tool_call.function.name = self.current_tool_name
                self.current_tool_name = None
            if self.streaming_state == StreamingState.PARSING_NAME_COMPLETED:
                self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
            if self.streaming_state in [
                StreamingState.PARSING_ARGUMENTS,
                StreamingState.PARSING_ARGUMENTS_COMPLETED,
            ]:
                if self.streaming_state == StreamingState.PARSING_ARGUMENTS_COMPLETED:
                    self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
                # the delta_to_be_parsed is part of arguments.
                current_tool_call_modified = True
                if current_tool_call.function.arguments is None:
                    current_tool_call.function.arguments = delta_to_be_parsed
                else:
                    current_tool_call.function.arguments += delta_to_be_parsed
                if streaming_state_before_parse != StreamingState.PARSING_ARGUMENTS:
                    # It's the first chunk of arg. let's lstrip it
                    current_tool_call.function.arguments = (
                        current_tool_call.function.arguments.lstrip()
                    )

        if current_tool_call_modified:
            if self.current_tool_mistral_id is not None:
                current_tool_call.id = self.current_tool_mistral_id
                self.current_tool_mistral_id = None
            delta_tool_calls.append(current_tool_call)

        # HACK: serving_chat.py inspects the internal state of tool parsers
        # when determining it's final streaming delta, automatically
        # adding autocompleted JSON.
        # These two lines avoid that nonsense while ensuring finish_reason
        # is set to tool_calls when at least one tool is called.
        if delta_tool_calls and not self.prev_tool_call_arr:
            self.prev_tool_call_arr = [{"arguments": {}}]

        if content or len(delta_tool_calls) > 0:
            delta_message = DeltaMessage()
            if content:
                delta_message.content = content
            if len(delta_tool_calls) > 0:
                delta_message.tool_calls = delta_tool_calls
            return delta_message
        else:
            if self.streaming_state == StreamingState.ALL_TOOLS_COMPLETE:
                return DeltaMessage()
            else:
                return None

    def _split_delta(
        self,
        delta_text: str,
        stop_after_quotes: int = -1,
        stop_after_opening_curly_braces: int = -1,
        stop_after_closing_curly_braces: int = -1,
        stop_after_closing_brackets: int = -1,
        stop_after_colon: int = -1,
        stop_after_comma=-1,
    ) -> tuple[str, str]:
        delta_to_be_parsed = ""
        for i, c in enumerate(delta_text):
            if c in ['"', "'"]:
                delta_to_be_parsed += c
                stop_after_quotes -= 1
                if stop_after_quotes == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            elif c == "{":
                delta_to_be_parsed += c
                stop_after_opening_curly_braces -= 1
                if stop_after_opening_curly_braces == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            elif c == "}":
                delta_to_be_parsed += c
                stop_after_closing_curly_braces -= 1
                if stop_after_closing_curly_braces == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            elif c == "]":
                delta_to_be_parsed += c
                stop_after_closing_brackets -= 1
                if stop_after_closing_brackets == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            elif c == ":":
                delta_to_be_parsed += c
                stop_after_colon -= 1
                if stop_after_colon == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            elif c == ",":
                delta_to_be_parsed += c
                stop_after_comma -= 1
                if stop_after_comma == 0:
                    return (delta_to_be_parsed, delta_text[i + 1 :])
            else:
                delta_to_be_parsed += c

        return (delta_to_be_parsed, "")

bot_token instance-attribute

bot_token = '[TOOL_CALLS]'

bot_token_id instance-attribute

bot_token_id = get(bot_token)

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_mistral_id instance-attribute

current_tool_mistral_id: str | None = None

current_tool_name instance-attribute

current_tool_name: str | None = None

fn_name_regex instance-attribute

fn_name_regex = compile(
    "([a-zA-Z0-9_-]+)(\\{[\\s\\S]*?\\}+)", DOTALL
)

parse_coro instance-attribute

parse_coro = parse_coro(
    update_stream_state_pre_v11_tokenizer()
)

starting_new_tool instance-attribute

starting_new_tool = False

streaming_state instance-attribute

tool_call_regex instance-attribute

tool_call_regex = compile('\\[{.*}\\]', DOTALL)

__init__

__init__(tokenizer: TokenizerLike)
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def __init__(self, tokenizer: TokenizerLike):
    super().__init__(tokenizer)

    if not isinstance(self.model_tokenizer, MistralTokenizer):
        logger.info("Non-Mistral tokenizer detected when using a Mistral model...")

    # initialize properties used for state when parsing tool calls in
    # streaming mode
    self.current_tool_id: int = -1
    self.streaming_state: StreamingState = StreamingState.WAITING_FOR_TOOL_START

    # For streaming pre v11 tokenizer tool calls
    self.current_tool_name: str | None = None
    self.current_tool_mistral_id: str | None = None
    self.starting_new_tool = False
    if _is_pre_v11_tokeniser(self.model_tokenizer):
        self.parse_coro = ijson.parse_coro(
            self.update_stream_state_pre_v11_tokenizer()
        )

    self.bot_token = "[TOOL_CALLS]"
    self.bot_token_id = self.vocab.get(self.bot_token)
    self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
    if not _is_pre_v11_tokeniser(self.model_tokenizer):
        self.fn_name_regex = re.compile(
            r"([a-zA-Z0-9_-]+)(\{[\s\S]*?\}+)", re.DOTALL
        )
    else:
        self.fn_name_regex = None

    if self.bot_token_id is None:
        raise RuntimeError(
            "Mistral Tool Parser could not locate the tool call token in "
            "the tokenizer!"
        )

_extract_tool_calls_streaming

_extract_tool_calls_streaming(
    delta_text: str, delta_token_ids: Sequence[int]
) -> DeltaMessage | None

Extracts tool calls for Mistral models doing tool calls of the following format: [TOOL_CALLS]add{"a": 3.5, "b": 4}

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def _extract_tool_calls_streaming(
    self,
    delta_text: str,
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
    """
    Extracts tool calls for Mistral models
    doing tool calls of the following format:
    `[TOOL_CALLS]add{"a": 3.5, "b": 4}`
    """
    additional_content: str = ""
    if self.streaming_state == StreamingState.WAITING_FOR_TOOL_START:
        # this is the first tool call
        assert self.bot_token_id in delta_token_ids
        if not delta_text.startswith(self.bot_token):
            additional_content += delta_text.split(self.bot_token)[0]
            delta_text = self.bot_token + "".join(
                delta_text.split(self.bot_token)[1:]
            )

    delta_tool_calls = self._generate_delta_tool_call(delta_text)
    if not additional_content and len(delta_tool_calls) == 0:
        if self.streaming_state in [
            StreamingState.PARSING_ARGUMENTS,
            StreamingState.PARSING_ARGUMENTS_COMPLETED,
            StreamingState.TOOL_COMPLETE,
            StreamingState.ALL_TOOLS_COMPLETE,
        ]:
            # Return an empty DeltaMessage once the tool calls are all done
            # so that finish_reason gets set.
            return DeltaMessage()
        else:
            # return None when the tool is not likely to be finished
            # This can occur when the name is being parsed for example
            # and we wait for the name to be complete
            # before sending the function name
            return None

    delta = DeltaMessage()
    if additional_content:
        delta.content = additional_content
    if len(delta_tool_calls) > 0:
        delta.tool_calls = delta_tool_calls

    # HACK: serving_chat.py inspects the internal state of tool parsers
    # when determining its final streaming delta, automatically
    # adding autocompleted JSON.
    # These two lines avoid that nonsense while ensuring finish_reason
    # is set to tool_calls when at least one tool is called.
    if delta_tool_calls and not self.prev_tool_call_arr:
        self.prev_tool_call_arr = [{"arguments": {}}]
    return delta

_extract_tool_calls_streaming_pre_v11_tokenizer

_extract_tool_calls_streaming_pre_v11_tokenizer(
    delta_text: str, delta_token_ids: Sequence[int]
) -> DeltaMessage | None

Extracts tool calls for Mistral models doing tool calls of the following format: [TOOL_CALLS][{"name": "add", "arguments":{"a": 3.5, "b": 4}}

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def _extract_tool_calls_streaming_pre_v11_tokenizer(
    self,
    delta_text: str,
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
    """
    Extracts tool calls for Mistral models
    doing tool calls of the following format:
    `[TOOL_CALLS][{"name": "add", "arguments":{"a": 3.5, "b": 4}}`
    """
    assert self.parse_coro is not None
    content = None
    delta_tool_calls: list[DeltaToolCall] = []
    current_tool_call: DeltaToolCall = DeltaToolCall(
        index=self.current_tool_id, type="function"
    )
    current_tool_call_modified = False
    if self.bot_token_id in delta_token_ids:
        # this is the first tool call
        if not delta_text.startswith(self.bot_token):
            content = delta_text.split(self.bot_token)[0]
        delta_text = "".join(delta_text.split(self.bot_token)[1:])

    # Cut smartly the delta text to catch the ijson events
    # as ijson does not give us the index in the text at each event.
    # We need to cut so that we know
    # where in the text the events are emitted from.
    while len(delta_text) > 0:
        streaming_state_before_parse = self.streaming_state

        if self.streaming_state == StreamingState.WAITING_FOR_TOOL_START:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_opening_curly_braces=1,
            )
        elif self.streaming_state == StreamingState.WAITING_FOR_TOOL_KEY:
            # Wait until another key is sent
            # or the current tool is completed
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_colon=1,
                stop_after_opening_curly_braces=1,
                # if the tool ends, we want to separate
                # at the start of the next tool
            )
        elif self.streaming_state == StreamingState.PARSING_NAME:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_comma=1,
                stop_after_closing_brackets=1,
            )
        elif self.streaming_state == StreamingState.WAITING_FOR_ARGUMENTS_START:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_opening_curly_braces=1,
            )
        elif self.streaming_state == StreamingState.PARSING_ARGUMENTS:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_closing_curly_braces=1,
                # we could be more clever
                # by listening to item.arguments.* start_map events
                # and know how many curly braces we can allow
            )
        elif self.streaming_state in [
            StreamingState.PARSING_ARGUMENTS_COMPLETED,
            StreamingState.PARSING_NAME_COMPLETED,
        ]:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_closing_curly_braces=1,
                stop_after_closing_brackets=1,
            )
        elif self.streaming_state == StreamingState.TOOL_COMPLETE:
            delta_to_be_parsed, delta_text = self._split_delta(
                delta_text=delta_text,
                stop_after_opening_curly_braces=1,
                stop_after_closing_brackets=1,
            )
        elif self.streaming_state == StreamingState.ALL_TOOLS_COMPLETE:
            content = delta_text
            delta_text = ""
        else:
            delta_to_be_parsed = delta_text
            delta_text = ""

        if self.streaming_state != StreamingState.ALL_TOOLS_COMPLETE:
            self.parse_coro.send(delta_to_be_parsed.encode("utf-8"))

        # Given the parsed text and the possible streaming state change,
        # let's add to the tool delta
        if (
            (streaming_state_before_parse != self.streaming_state)
            and streaming_state_before_parse
            in [StreamingState.WAITING_FOR_TOOL_START, StreamingState.TOOL_COMPLETE]
            and self.streaming_state
            not in [
                StreamingState.ALL_TOOLS_COMPLETE,
                StreamingState.TOOL_COMPLETE,
                StreamingState.WAITING_FOR_TOOL_START,
            ]
        ):
            # starting a new tool call
            if current_tool_call_modified:
                if self.current_tool_mistral_id is not None:
                    current_tool_call.id = self.current_tool_mistral_id
                    self.current_tool_mistral_id = None
                delta_tool_calls.append(current_tool_call)
            current_tool_call_modified = False
            self.current_tool_id += 1
            self.current_tool_mistral_id = MistralToolCall.generate_random_id()
            current_tool_call = DeltaToolCall(
                index=self.current_tool_id,
                type="function",
            )
        if current_tool_call.function is None:
            current_tool_call.function = DeltaFunctionCall()

        if self.current_tool_name is not None:
            # we have the complete tool name
            current_tool_call_modified = True
            current_tool_call.function.name = self.current_tool_name
            self.current_tool_name = None
        if self.streaming_state == StreamingState.PARSING_NAME_COMPLETED:
            self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
        if self.streaming_state in [
            StreamingState.PARSING_ARGUMENTS,
            StreamingState.PARSING_ARGUMENTS_COMPLETED,
        ]:
            if self.streaming_state == StreamingState.PARSING_ARGUMENTS_COMPLETED:
                self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
            # the delta_to_be_parsed is part of arguments.
            current_tool_call_modified = True
            if current_tool_call.function.arguments is None:
                current_tool_call.function.arguments = delta_to_be_parsed
            else:
                current_tool_call.function.arguments += delta_to_be_parsed
            if streaming_state_before_parse != StreamingState.PARSING_ARGUMENTS:
                # It's the first chunk of arg. let's lstrip it
                current_tool_call.function.arguments = (
                    current_tool_call.function.arguments.lstrip()
                )

    if current_tool_call_modified:
        if self.current_tool_mistral_id is not None:
            current_tool_call.id = self.current_tool_mistral_id
            self.current_tool_mistral_id = None
        delta_tool_calls.append(current_tool_call)

    # HACK: serving_chat.py inspects the internal state of tool parsers
    # when determining it's final streaming delta, automatically
    # adding autocompleted JSON.
    # These two lines avoid that nonsense while ensuring finish_reason
    # is set to tool_calls when at least one tool is called.
    if delta_tool_calls and not self.prev_tool_call_arr:
        self.prev_tool_call_arr = [{"arguments": {}}]

    if content or len(delta_tool_calls) > 0:
        delta_message = DeltaMessage()
        if content:
            delta_message.content = content
        if len(delta_tool_calls) > 0:
            delta_message.tool_calls = delta_tool_calls
        return delta_message
    else:
        if self.streaming_state == StreamingState.ALL_TOOLS_COMPLETE:
            return DeltaMessage()
        else:
            return None

_generate_delta_tool_call

_generate_delta_tool_call(
    delta_text: str,
) -> list[DeltaToolCall]
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
    if delta_text == "" or delta_text is None:
        return []
    delta_function_name = None
    tool_id = None
    if self.streaming_state not in [
        StreamingState.PARSING_NAME,
        StreamingState.PARSING_ARGUMENTS,
    ] and delta_text.startswith(self.bot_token):
        self.current_tool_id += 1
        self.streaming_state = StreamingState.PARSING_NAME
        delta_text = delta_text.replace(self.bot_token, "", 1)
    if self.streaming_state == StreamingState.PARSING_NAME:
        if self.current_tool_name is None:
            self.current_tool_name = ""
        # The name stops where the arguments start
        # And the arguments start with the `{` char
        if "{" in delta_text:
            tool_id = MistralToolCall.generate_random_id()
            delta_function_name = delta_text.split("{")[0]
            self.current_tool_name += delta_function_name
            delta_text = delta_text[len(delta_function_name) :]
            self.streaming_state = StreamingState.PARSING_ARGUMENTS
        else:
            # we want to send the tool name once it's complete
            self.current_tool_name += delta_text
            return []
    if self.streaming_state == StreamingState.PARSING_ARGUMENTS:
        next_function_text = None
        if self.bot_token in delta_text:
            # current tool call is over
            delta_arguments = ""
            delta_arguments += delta_text.split(self.bot_token)[0]
            next_function_text = delta_text[len(delta_arguments) :]
            self.streaming_state = StreamingState.TOOL_COMPLETE
        else:
            delta_arguments = delta_text
        ret = []
        if self.current_tool_name or delta_arguments:
            ret += [
                DeltaToolCall(
                    index=self.current_tool_id,
                    type="function",
                    id=tool_id,
                    function=DeltaFunctionCall(
                        name=self.current_tool_name, arguments=delta_arguments
                    ).model_dump(exclude_none=True),
                )
            ]
            self.current_tool_name = None
        if next_function_text:
            ret += self._generate_delta_tool_call(next_function_text)
        return ret
    # Should not happen
    return []

_split_delta

_split_delta(
    delta_text: str,
    stop_after_quotes: int = -1,
    stop_after_opening_curly_braces: int = -1,
    stop_after_closing_curly_braces: int = -1,
    stop_after_closing_brackets: int = -1,
    stop_after_colon: int = -1,
    stop_after_comma=-1,
) -> tuple[str, str]
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def _split_delta(
    self,
    delta_text: str,
    stop_after_quotes: int = -1,
    stop_after_opening_curly_braces: int = -1,
    stop_after_closing_curly_braces: int = -1,
    stop_after_closing_brackets: int = -1,
    stop_after_colon: int = -1,
    stop_after_comma=-1,
) -> tuple[str, str]:
    delta_to_be_parsed = ""
    for i, c in enumerate(delta_text):
        if c in ['"', "'"]:
            delta_to_be_parsed += c
            stop_after_quotes -= 1
            if stop_after_quotes == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        elif c == "{":
            delta_to_be_parsed += c
            stop_after_opening_curly_braces -= 1
            if stop_after_opening_curly_braces == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        elif c == "}":
            delta_to_be_parsed += c
            stop_after_closing_curly_braces -= 1
            if stop_after_closing_curly_braces == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        elif c == "]":
            delta_to_be_parsed += c
            stop_after_closing_brackets -= 1
            if stop_after_closing_brackets == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        elif c == ":":
            delta_to_be_parsed += c
            stop_after_colon -= 1
            if stop_after_colon == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        elif c == ",":
            delta_to_be_parsed += c
            stop_after_comma -= 1
            if stop_after_comma == 0:
                return (delta_to_be_parsed, delta_text[i + 1 :])
        else:
            delta_to_be_parsed += c

    return (delta_to_be_parsed, "")

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    request = super().adjust_request(request)
    if (
        not isinstance(self.model_tokenizer, MistralTokenizer)
        and request.tools
        and request.tool_choice != "none"
    ):
        # Do not skip special tokens when using chat template
        # with Mistral parser as TOOL_CALL token is needed
        # for tool detection.
        # Note: we don't want skip_special_tokens=False
        # with MistralTokenizer as it is incompatible
        request.skip_special_tokens = False
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response. Requires find-and-replacing single quotes with double quotes for JSON parsing, make sure your tool call arguments don't ever include quotes!

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response. Requires
    find-and-replacing single quotes with double quotes for JSON parsing,
    make sure your tool call arguments don't ever include quotes!
    """

    # case -- if a tool call token is not present, return a text response
    if self.bot_token not in model_output:
        return ExtractedToolCallInformation(
            tools_called=False, tool_calls=[], content=model_output
        )

    # first remove the BOT token
    tool_content = model_output.replace(self.bot_token, "").strip()

    try:
        # we first try to directly load the json as parsing very nested
        # jsons is difficult
        try:
            if self.fn_name_regex:
                function_call_arr = []
                for single_tool_content in model_output.split(self.bot_token):
                    matches = self.fn_name_regex.findall(single_tool_content)

                    for match in matches:
                        fn_name = match[0]
                        args = match[1]

                        # fn_name is encoded outside serialized json dump
                        # only arguments are serialized
                        function_call_arr.append(
                            {"name": fn_name, "arguments": json.loads(args)}
                        )
            else:
                function_call_arr = json.loads(tool_content)
        except json.JSONDecodeError:
            # use a regex to find the part corresponding to the tool call.
            # NOTE: This use case should not happen if the model is trained
            # correctly. It's an easy possible fix so it's included, but
            # can be brittle for very complex / highly nested tool calls
            raw_tool_call = self.tool_call_regex.findall(tool_content)[0]
            function_call_arr = json.loads(raw_tool_call)

        # Tool Call
        tool_calls: list[MistralToolCall] = [
            MistralToolCall(
                type="function",
                function=FunctionCall(
                    name=raw_function_call["name"],
                    # function call args are JSON but as a string
                    arguments=json.dumps(
                        raw_function_call["arguments"], ensure_ascii=False
                    ),
                ),
            )
            for raw_function_call in function_call_arr
        ]

        # get any content before  the tool call
        content = model_output.split(self.bot_token)[0]
        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=content if len(content) > 0 else None,
        )

    except Exception:
        logger.exception("Error in extracting tool call from response.")
        # return information to just treat the tool call as regular JSON
        return ExtractedToolCallInformation(
            tools_called=False, tool_calls=[], content=tool_content
        )

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None:
    if self.bot_token_id not in current_token_ids:
        # if the tool call token is not in the tokens generated so far,
        # append output to contents since it's not a tool
        return DeltaMessage(content=delta_text)

    # if the tool call token IS in the tokens generated so far, that
    # means we're parsing as tool calls now
    try:
        if _is_pre_v11_tokeniser(self.model_tokenizer):
            return self._extract_tool_calls_streaming_pre_v11_tokenizer(
                delta_text=delta_text,
                delta_token_ids=delta_token_ids,
            )
        else:
            return self._extract_tool_calls_streaming(
                delta_text=delta_text, delta_token_ids=delta_token_ids
            )
    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        return None

update_stream_state_pre_v11_tokenizer

update_stream_state_pre_v11_tokenizer()
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@ijson.coroutine
def update_stream_state_pre_v11_tokenizer(self):
    while True:
        (prefix, event, value) = yield

        if prefix == "item" and event == "start_map":
            self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
        if prefix == "item" and event == "map_key" and value == "name":
            self.streaming_state = StreamingState.PARSING_NAME
        if prefix == "item.name" and event == "string":
            self.current_tool_name = value
            self.streaming_state = StreamingState.PARSING_NAME_COMPLETED
        if prefix == "item" and event == "map_key" and value == "arguments":
            self.streaming_state = StreamingState.WAITING_FOR_ARGUMENTS_START
        if prefix == "item.arguments" and event == "start_map":
            self.streaming_state = StreamingState.PARSING_ARGUMENTS
        if prefix == "item.arguments" and event == "end_map":
            self.streaming_state = StreamingState.PARSING_ARGUMENTS_COMPLETED
        if prefix == "item" and event == "end_map":
            self.streaming_state = StreamingState.TOOL_COMPLETE
        if prefix == "" and event == "end_array":
            self.streaming_state = StreamingState.ALL_TOOLS_COMPLETE

StreamingState

Bases: Enum

Enum for tracking the current streaming parsing state.

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
class StreamingState(Enum):
    """Enum for tracking the current streaming parsing state."""

    WAITING_FOR_TOOL_START = auto()
    WAITING_FOR_TOOL_KEY = (
        auto()
    )  # waiting for the "name" or "arguments" key to be complete
    PARSING_NAME = auto()
    PARSING_NAME_COMPLETED = auto()
    WAITING_FOR_ARGUMENTS_START = auto()
    PARSING_ARGUMENTS = auto()
    PARSING_ARGUMENTS_COMPLETED = auto()
    TOOL_COMPLETE = auto()
    ALL_TOOLS_COMPLETE = auto()

ALL_TOOLS_COMPLETE class-attribute instance-attribute

ALL_TOOLS_COMPLETE = auto()

PARSING_ARGUMENTS class-attribute instance-attribute

PARSING_ARGUMENTS = auto()

PARSING_ARGUMENTS_COMPLETED class-attribute instance-attribute

PARSING_ARGUMENTS_COMPLETED = auto()

PARSING_NAME class-attribute instance-attribute

PARSING_NAME = auto()

PARSING_NAME_COMPLETED class-attribute instance-attribute

PARSING_NAME_COMPLETED = auto()

TOOL_COMPLETE class-attribute instance-attribute

TOOL_COMPLETE = auto()

WAITING_FOR_ARGUMENTS_START class-attribute instance-attribute

WAITING_FOR_ARGUMENTS_START = auto()

WAITING_FOR_TOOL_KEY class-attribute instance-attribute

WAITING_FOR_TOOL_KEY = auto()

WAITING_FOR_TOOL_START class-attribute instance-attribute

WAITING_FOR_TOOL_START = auto()

_is_pre_v11_tokeniser

_is_pre_v11_tokeniser(
    model_tokenizer: TokenizerLike,
) -> bool
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def _is_pre_v11_tokeniser(model_tokenizer: TokenizerLike) -> bool:
    return not (
        isinstance(model_tokenizer, MistralTokenizer) and model_tokenizer.version >= 11
    )