Skip to content

vllm.parser

Modules:

Name Description
abstract_parser
minimax_m2_parser

MiniMax M2 Parser - A unified parser for MiniMax M2 models.

parser_manager

_PARSERS_TO_REGISTER module-attribute

_PARSERS_TO_REGISTER = {
    "minimax_m2": ("minimax_m2_parser", "MiniMaxM2Parser")
}

__all__ module-attribute

__all__ = [
    "Parser",
    "DelegatingParser",
    "ParserManager",
    "_WrappedParser",
]

DelegatingParser

Bases: Parser

A Parser implementation that delegates to separate ReasoningParser and ToolParser instances.

This is the recommended base class for creating model-specific parsers that combine existing reasoning and tool parser implementations. Subclasses should set self._reasoning_parser and self._tool_parser in their __init__ method.

If either parser is None, the corresponding methods will return default values (no reasoning extraction, no tool calls).

Source code in vllm/parser/abstract_parser.py
class DelegatingParser(Parser):
    """
    A Parser implementation that delegates to separate ReasoningParser and
    ToolParser instances.

    This is the recommended base class for creating model-specific parsers
    that combine existing reasoning and tool parser implementations.
    Subclasses should set `self._reasoning_parser` and `self._tool_parser`
    in their `__init__` method.

    If either parser is None, the corresponding methods will return default
    values (no reasoning extraction, no tool calls).
    """

    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        if self._reasoning_parser is None:
            return None, model_output
        return self._reasoning_parser.extract_reasoning(model_output, request)

    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        if self._reasoning_parser is None:
            return DeltaMessage(content=delta_text)
        return self._reasoning_parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
        )

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        if self._tool_parser is None:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
        return self._tool_parser.extract_tool_calls(model_output, request)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        if self._tool_parser is None:
            return None
        return self._tool_parser.extract_tool_calls_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
            request,
        )

extract_reasoning

extract_reasoning(
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]
Source code in vllm/parser/abstract_parser.py
def extract_reasoning(
    self,
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]:
    if self._reasoning_parser is None:
        return None, model_output
    return self._reasoning_parser.extract_reasoning(model_output, request)

extract_reasoning_streaming

extract_reasoning_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None
Source code in vllm/parser/abstract_parser.py
def extract_reasoning_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
    if self._reasoning_parser is None:
        return DeltaMessage(content=delta_text)
    return self._reasoning_parser.extract_reasoning_streaming(
        previous_text,
        current_text,
        delta_text,
        previous_token_ids,
        current_token_ids,
        delta_token_ids,
    )

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/parser/abstract_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    if self._tool_parser is None:
        return ExtractedToolCallInformation(
            tools_called=False, tool_calls=[], content=model_output
        )
    return self._tool_parser.extract_tool_calls(model_output, request)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None
Source code in vllm/parser/abstract_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None:
    if self._tool_parser is None:
        return None
    return self._tool_parser.extract_tool_calls_streaming(
        previous_text,
        current_text,
        delta_text,
        previous_token_ids,
        current_token_ids,
        delta_token_ids,
        request,
    )

Parser

Abstract Parser class that unifies ReasoningParser and ToolParser into a single interface for parsing model output.

This class provides a unified way to handle both reasoning extraction (e.g., chain-of-thought content in tags) and tool call extraction (e.g., function calls in XML/JSON format) from model outputs.

Subclasses can either: 1. Override the abstract methods directly for custom parsing logic 2. Set reasoning_parser and tool_parser properties to delegate to existing parser implementations

Class Attributes

reasoning_parser_cls: The ReasoningParser class to use (for compatibility with code that needs the class, not instance). tool_parser_cls: The ToolParser class to use (for compatibility with code that needs the class, not instance).

Source code in vllm/parser/abstract_parser.py
class Parser:
    """
    Abstract Parser class that unifies ReasoningParser and ToolParser into
    a single interface for parsing model output.

    This class provides a unified way to handle both reasoning extraction
    (e.g., chain-of-thought content in <think> tags) and tool call extraction
    (e.g., function calls in XML/JSON format) from model outputs.

    Subclasses can either:
    1. Override the abstract methods directly for custom parsing logic
    2. Set `reasoning_parser` and `tool_parser` properties to delegate to
       existing parser implementations

    Class Attributes:
        reasoning_parser_cls: The ReasoningParser class to use (for compatibility
            with code that needs the class, not instance).
        tool_parser_cls: The ToolParser class to use (for compatibility with
            code that needs the class, not instance).
    """

    # Class-level parser classes for compatibility with existing patterns
    # Subclasses should override these if they use specific parser classes
    reasoning_parser_cls: type[ReasoningParser] | None = None
    tool_parser_cls: type[ToolParser] | None = None

    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
        """
        Initialize the Parser.

        Args:
            tokenizer: The tokenizer used by the model. This is required for
                token-based parsing operations.
        """
        self.model_tokenizer = tokenizer
        self._reasoning_parser: ReasoningParser | None = None
        self._tool_parser: ToolParser | None = None

    @cached_property
    def vocab(self) -> dict[str, int]:
        """Get the vocabulary mapping from tokens to IDs."""
        return self.model_tokenizer.get_vocab()

    @property
    def reasoning_parser(self) -> ReasoningParser | None:
        """The underlying reasoning parser, if any."""
        return self._reasoning_parser

    @reasoning_parser.setter
    def reasoning_parser(self, parser: ReasoningParser | None) -> None:
        self._reasoning_parser = parser

    @property
    def tool_parser(self) -> ToolParser | None:
        """The underlying tool parser, if any."""
        return self._tool_parser

    @tool_parser.setter
    def tool_parser(self, parser: ToolParser | None) -> None:
        self._tool_parser = parser

    # ========== Reasoning Parser Methods ==========

    @abstractmethod
    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        """
        Check if the reasoning content ends in the input_ids.

        Used by structured engines like `xgrammar` to check if the
        reasoning content ends in the model output.

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            True if the reasoning content ends in the input_ids.
        """

    def is_reasoning_end_streaming(
        self, input_ids: list[int], delta_ids: list[int]
    ) -> bool:
        """
        Check if the reasoning content ends during a decode step.

        Args:
            input_ids: The entire model output token IDs.
            delta_ids: The last few computed tokens at the current decode step.

        Returns:
            True if the reasoning content ends in the delta_ids.
        """
        return self.is_reasoning_end(input_ids)

    @abstractmethod
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        """
        Extract content token IDs from the input_ids.

        This extracts the non-reasoning content (e.g., everything after
        the </think> tag).

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            The extracted content token IDs.
        """

    @abstractmethod
    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        """
        Extract reasoning content from a complete model-generated string.

        Used for non-streaming responses where we have the entire model
        response available before sending to the client.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            A tuple of (reasoning_content, response_content).
        """

    @abstractmethod
    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        """
        Extract reasoning content from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.

        Returns:
            A DeltaMessage with reasoning and/or content fields, or None.
        """

    # ========== Tool Parser Methods ==========

    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        """
        Adjust the request parameters for tool calling.

        Can be overridden by subclasses to modify request parameters
        (e.g., setting structured output schemas for tool calling).

        Args:
            request: The original request.

        Returns:
            The adjusted request.
        """
        return request

    @abstractmethod
    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract tool calls from a complete model-generated string.

        Used for non-streaming responses.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            ExtractedToolCallInformation containing the tool calls.
        """

    @abstractmethod
    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        """
        Extract tool calls from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.
            request: The request object.

        Returns:
            A DeltaMessage with tool_calls field, or None.
        """

_reasoning_parser instance-attribute

_reasoning_parser: ReasoningParser | None = None

_tool_parser instance-attribute

_tool_parser: ToolParser | None = None

model_tokenizer instance-attribute

model_tokenizer = tokenizer

reasoning_parser property writable

reasoning_parser: ReasoningParser | None

The underlying reasoning parser, if any.

reasoning_parser_cls class-attribute instance-attribute

reasoning_parser_cls: type[ReasoningParser] | None = None

tool_parser property writable

tool_parser: ToolParser | None

The underlying tool parser, if any.

tool_parser_cls class-attribute instance-attribute

tool_parser_cls: type[ToolParser] | None = None

vocab cached property

vocab: dict[str, int]

Get the vocabulary mapping from tokens to IDs.

__init__

__init__(tokenizer: TokenizerLike, *args, **kwargs)

Initialize the Parser.

Parameters:

Name Type Description Default
tokenizer TokenizerLike

The tokenizer used by the model. This is required for token-based parsing operations.

required
Source code in vllm/parser/abstract_parser.py
def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
    """
    Initialize the Parser.

    Args:
        tokenizer: The tokenizer used by the model. This is required for
            token-based parsing operations.
    """
    self.model_tokenizer = tokenizer
    self._reasoning_parser: ReasoningParser | None = None
    self._tool_parser: ToolParser | None = None

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest

Adjust the request parameters for tool calling.

Can be overridden by subclasses to modify request parameters (e.g., setting structured output schemas for tool calling).

Parameters:

Name Type Description Default
request ChatCompletionRequest

The original request.

required

Returns:

Type Description
ChatCompletionRequest

The adjusted request.

Source code in vllm/parser/abstract_parser.py
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    """
    Adjust the request parameters for tool calling.

    Can be overridden by subclasses to modify request parameters
    (e.g., setting structured output schemas for tool calling).

    Args:
        request: The original request.

    Returns:
        The adjusted request.
    """
    return request

extract_content_ids abstractmethod

extract_content_ids(input_ids: list[int]) -> list[int]

Extract content token IDs from the input_ids.

This extracts the non-reasoning content (e.g., everything after the tag).

Parameters:

Name Type Description Default
input_ids list[int]

The token IDs of the model output.

required

Returns:

Type Description
list[int]

The extracted content token IDs.

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
    """
    Extract content token IDs from the input_ids.

    This extracts the non-reasoning content (e.g., everything after
    the </think> tag).

    Args:
        input_ids: The token IDs of the model output.

    Returns:
        The extracted content token IDs.
    """

extract_reasoning abstractmethod

extract_reasoning(
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]

Extract reasoning content from a complete model-generated string.

Used for non-streaming responses where we have the entire model response available before sending to the client.

Parameters:

Name Type Description Default
model_output str

The complete model-generated string.

required
request ChatCompletionRequest | ResponsesRequest

The request object used to generate the output.

required

Returns:

Type Description
tuple[str | None, str | None]

A tuple of (reasoning_content, response_content).

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def extract_reasoning(
    self,
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]:
    """
    Extract reasoning content from a complete model-generated string.

    Used for non-streaming responses where we have the entire model
    response available before sending to the client.

    Args:
        model_output: The complete model-generated string.
        request: The request object used to generate the output.

    Returns:
        A tuple of (reasoning_content, response_content).
    """

extract_reasoning_streaming abstractmethod

extract_reasoning_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None

Extract reasoning content from a streaming delta message.

Parameters:

Name Type Description Default
previous_text str

Text from all previous tokens.

required
current_text str

Text including the current delta.

required
delta_text str

The new text in this delta.

required
previous_token_ids Sequence[int]

Token IDs from previous generation.

required
current_token_ids Sequence[int]

All token IDs including current.

required
delta_token_ids Sequence[int]

The new token IDs in this delta.

required

Returns:

Type Description
DeltaMessage | None

A DeltaMessage with reasoning and/or content fields, or None.

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def extract_reasoning_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
    """
    Extract reasoning content from a streaming delta message.

    Args:
        previous_text: Text from all previous tokens.
        current_text: Text including the current delta.
        delta_text: The new text in this delta.
        previous_token_ids: Token IDs from previous generation.
        current_token_ids: All token IDs including current.
        delta_token_ids: The new token IDs in this delta.

    Returns:
        A DeltaMessage with reasoning and/or content fields, or None.
    """

extract_tool_calls abstractmethod

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from a complete model-generated string.

Used for non-streaming responses.

Parameters:

Name Type Description Default
model_output str

The complete model-generated string.

required
request ChatCompletionRequest

The request object used to generate the output.

required

Returns:

Type Description
ExtractedToolCallInformation

ExtractedToolCallInformation containing the tool calls.

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """
    Extract tool calls from a complete model-generated string.

    Used for non-streaming responses.

    Args:
        model_output: The complete model-generated string.
        request: The request object used to generate the output.

    Returns:
        ExtractedToolCallInformation containing the tool calls.
    """

extract_tool_calls_streaming abstractmethod

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None

Extract tool calls from a streaming delta message.

Parameters:

Name Type Description Default
previous_text str

Text from all previous tokens.

required
current_text str

Text including the current delta.

required
delta_text str

The new text in this delta.

required
previous_token_ids Sequence[int]

Token IDs from previous generation.

required
current_token_ids Sequence[int]

All token IDs including current.

required
delta_token_ids Sequence[int]

The new token IDs in this delta.

required
request ChatCompletionRequest

The request object.

required

Returns:

Type Description
DeltaMessage | None

A DeltaMessage with tool_calls field, or None.

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None:
    """
    Extract tool calls from a streaming delta message.

    Args:
        previous_text: Text from all previous tokens.
        current_text: Text including the current delta.
        delta_text: The new text in this delta.
        previous_token_ids: Token IDs from previous generation.
        current_token_ids: All token IDs including current.
        delta_token_ids: The new token IDs in this delta.
        request: The request object.

    Returns:
        A DeltaMessage with tool_calls field, or None.
    """

is_reasoning_end abstractmethod

is_reasoning_end(input_ids: list[int]) -> bool

Check if the reasoning content ends in the input_ids.

Used by structured engines like xgrammar to check if the reasoning content ends in the model output.

Parameters:

Name Type Description Default
input_ids list[int]

The token IDs of the model output.

required

Returns:

Type Description
bool

True if the reasoning content ends in the input_ids.

Source code in vllm/parser/abstract_parser.py
@abstractmethod
def is_reasoning_end(self, input_ids: list[int]) -> bool:
    """
    Check if the reasoning content ends in the input_ids.

    Used by structured engines like `xgrammar` to check if the
    reasoning content ends in the model output.

    Args:
        input_ids: The token IDs of the model output.

    Returns:
        True if the reasoning content ends in the input_ids.
    """

is_reasoning_end_streaming

is_reasoning_end_streaming(
    input_ids: list[int], delta_ids: list[int]
) -> bool

Check if the reasoning content ends during a decode step.

Parameters:

Name Type Description Default
input_ids list[int]

The entire model output token IDs.

required
delta_ids list[int]

The last few computed tokens at the current decode step.

required

Returns:

Type Description
bool

True if the reasoning content ends in the delta_ids.

Source code in vllm/parser/abstract_parser.py
def is_reasoning_end_streaming(
    self, input_ids: list[int], delta_ids: list[int]
) -> bool:
    """
    Check if the reasoning content ends during a decode step.

    Args:
        input_ids: The entire model output token IDs.
        delta_ids: The last few computed tokens at the current decode step.

    Returns:
        True if the reasoning content ends in the delta_ids.
    """
    return self.is_reasoning_end(input_ids)

ParserManager

Central registry for Parser implementations.

Supports two registration modes
  • Eager registration via register_module
  • Lazy registration via register_lazy_module
Source code in vllm/parser/parser_manager.py
class ParserManager:
    """
    Central registry for Parser implementations.

    Supports two registration modes:
      - Eager registration via `register_module`
      - Lazy registration via `register_lazy_module`
    """

    parsers: dict[str, type[Parser]] = {}
    lazy_parsers: dict[str, tuple[str, str]] = {}  # name -> (module_path, class_name)

    @classmethod
    def get_parser_internal(cls, name: str) -> type[Parser]:
        """
        Retrieve a registered or lazily registered Parser class.

        Args:
            name: The registered name of the parser.

        Returns:
            The Parser class.

        Raises:
            KeyError: If no parser is found under the given name.
        """
        if name in cls.parsers:
            return cls.parsers[name]

        if name in cls.lazy_parsers:
            return cls._load_lazy_parser(name)

        registered = ", ".join(cls.list_registered())
        raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")

    @classmethod
    def _load_lazy_parser(cls, name: str) -> type[Parser]:
        """Import and register a lazily loaded parser."""
        from vllm.parser.abstract_parser import Parser

        module_path, class_name = cls.lazy_parsers[name]
        try:
            mod = importlib.import_module(module_path)
            parser_cls = getattr(mod, class_name)
            if not issubclass(parser_cls, Parser):
                raise TypeError(
                    f"{class_name} in {module_path} is not a Parser subclass."
                )
            cls.parsers[name] = parser_cls  # cache
            return parser_cls
        except Exception as e:
            logger.exception(
                "Failed to import lazy parser '%s' from %s: %s",
                name,
                module_path,
                e,
            )
            raise

    @classmethod
    def _register_module(
        cls,
        module: type[Parser],
        module_name: str | list[str] | None = None,
        force: bool = True,
    ) -> None:
        """Register a Parser class immediately."""
        from vllm.parser.abstract_parser import Parser

        if not issubclass(module, Parser):
            raise TypeError(
                f"module must be subclass of Parser, but got {type(module)}"
            )

        if module_name is None:
            module_names = [module.__name__]
        elif isinstance(module_name, str):
            module_names = [module_name]
        elif is_list_of(module_name, str):
            module_names = module_name
        else:
            raise TypeError("module_name must be str, list[str], or None.")

        for name in module_names:
            if not force and name in cls.parsers:
                existed = cls.parsers[name]
                raise KeyError(f"{name} is already registered at {existed.__module__}")
            cls.parsers[name] = module

    @classmethod
    def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
        """
        Register a lazy module mapping for delayed import.

        Example:
            ParserManager.register_lazy_module(
                name="minimax_m2",
                module_path="vllm.parser.minimax_m2_parser",
                class_name="MiniMaxM2Parser",
            )
        """
        cls.lazy_parsers[name] = (module_path, class_name)

    @classmethod
    def register_module(
        cls,
        name: str | list[str] | None = None,
        force: bool = True,
        module: type[Parser] | None = None,
    ) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
        """
        Register a Parser class.

        Can be used as a decorator or called directly.

        Usage:
            @ParserManager.register_module("my_parser")
            class MyParser(Parser):
                ...

        Or:
            ParserManager.register_module(module=MyParser)
        """
        if not isinstance(force, bool):
            raise TypeError(f"force must be a boolean, but got {type(force)}")

        # Immediate registration
        if module is not None:
            cls._register_module(module=module, module_name=name, force=force)
            return module

        # Decorator usage
        def _decorator(obj: type[Parser]) -> type[Parser]:
            module_path = obj.__module__
            class_name = obj.__name__

            if isinstance(name, str):
                names = [name]
            elif is_list_of(name, str):
                names = name
            else:
                names = [class_name]

            for n in names:
                cls.lazy_parsers[n] = (module_path, class_name)

            return obj

        return _decorator

    @classmethod
    def list_registered(cls) -> list[str]:
        """Return names of all registered parsers."""
        return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))

    @classmethod
    def import_parser(cls, plugin_path: str) -> None:
        """Import a user-defined parser from an arbitrary path."""
        module_name = os.path.splitext(os.path.basename(plugin_path))[0]
        try:
            import_from_path(module_name, plugin_path)
        except Exception:
            logger.exception(
                "Failed to load module '%s' from %s.", module_name, plugin_path
            )

    @classmethod
    def get_tool_parser(
        cls,
        tool_parser_name: str | None = None,
        enable_auto_tools: bool = False,
        model_name: str | None = None,
    ) -> type[ToolParser] | None:
        """Get the tool parser based on the name."""
        from vllm.tool_parsers import ToolParserManager

        parser: type[ToolParser] | None = None
        if not enable_auto_tools or tool_parser_name is None:
            return parser
        logger.info('"auto" tool choice has been enabled.')

        try:
            if (
                tool_parser_name == "pythonic"
                and model_name
                and model_name.startswith("meta-llama/Llama-3.2")
            ):
                logger.warning(
                    "Llama3.2 models may struggle to emit valid pythonic tool calls"
                )
            parser = ToolParserManager.get_tool_parser(tool_parser_name)
        except Exception as e:
            raise TypeError(
                "Error: --enable-auto-tool-choice requires "
                f"tool_parser:'{tool_parser_name}' which has not "
                "been registered"
            ) from e
        return parser

    @classmethod
    def get_reasoning_parser(
        cls,
        reasoning_parser_name: str | None,
    ) -> type[ReasoningParser] | None:
        """Get the reasoning parser based on the name."""
        from vllm.reasoning import ReasoningParserManager

        parser: type[ReasoningParser] | None = None
        if not reasoning_parser_name:
            return None
        try:
            parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
            assert parser is not None
        except Exception as e:
            raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
        return parser

    @classmethod
    def get_parser(
        cls,
        tool_parser_name: str | None = None,
        reasoning_parser_name: str | None = None,
        enable_auto_tools: bool = False,
        model_name: str | None = None,
    ) -> type[Parser] | None:
        """
        Get a unified Parser that handles both reasoning and tool parsing.

        This method checks if a unified Parser exists that can handle both
        reasoning extraction and tool call parsing. If no unified parser
        exists, it creates a DelegatingParser that wraps the individual
        reasoning and tool parsers.

        Args:
            tool_parser_name: The name of the tool parser.
            reasoning_parser_name: The name of the reasoning parser.
            enable_auto_tools: Whether auto tool choice is enabled.
            model_name: The model name for parser-specific warnings.

        Returns:
            A Parser class, or None if neither parser is specified.
        """
        from vllm.parser.abstract_parser import _WrappedParser

        if not tool_parser_name and not reasoning_parser_name:
            return None

        # Strategy 1: If both names match, check for a unified parser with that name
        if tool_parser_name and tool_parser_name == reasoning_parser_name:
            try:
                parser = cls.get_parser_internal(tool_parser_name)
                logger.info(
                    "Using unified parser '%s' for both reasoning and tool parsing.",
                    tool_parser_name,
                )
                return parser
            except KeyError:
                pass  # No unified parser with this name

        # Strategy 2: Check for parser with either name
        for name in [tool_parser_name, reasoning_parser_name]:
            if name:
                try:
                    parser = cls.get_parser_internal(name)
                    logger.info(
                        "Using unified parser '%s' for reasoning and tool parsing.",
                        name,
                    )
                    return parser
                except KeyError:
                    pass

        # Strategy 3: Create a DelegatingParser with the individual parser classes
        reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
        tool_parser_cls = cls.get_tool_parser(
            tool_parser_name, enable_auto_tools, model_name
        )

        if reasoning_parser_cls is None and tool_parser_cls is None:
            return None

        # Set the class-level attributes on the imported _WrappedParser
        _WrappedParser.reasoning_parser_cls = reasoning_parser_cls
        _WrappedParser.tool_parser_cls = tool_parser_cls

        return _WrappedParser

lazy_parsers class-attribute instance-attribute

lazy_parsers: dict[str, tuple[str, str]] = {}

parsers class-attribute instance-attribute

parsers: dict[str, type[Parser]] = {}

_load_lazy_parser classmethod

_load_lazy_parser(name: str) -> type[Parser]

Import and register a lazily loaded parser.

Source code in vllm/parser/parser_manager.py
@classmethod
def _load_lazy_parser(cls, name: str) -> type[Parser]:
    """Import and register a lazily loaded parser."""
    from vllm.parser.abstract_parser import Parser

    module_path, class_name = cls.lazy_parsers[name]
    try:
        mod = importlib.import_module(module_path)
        parser_cls = getattr(mod, class_name)
        if not issubclass(parser_cls, Parser):
            raise TypeError(
                f"{class_name} in {module_path} is not a Parser subclass."
            )
        cls.parsers[name] = parser_cls  # cache
        return parser_cls
    except Exception as e:
        logger.exception(
            "Failed to import lazy parser '%s' from %s: %s",
            name,
            module_path,
            e,
        )
        raise

_register_module classmethod

_register_module(
    module: type[Parser],
    module_name: str | list[str] | None = None,
    force: bool = True,
) -> None

Register a Parser class immediately.

Source code in vllm/parser/parser_manager.py
@classmethod
def _register_module(
    cls,
    module: type[Parser],
    module_name: str | list[str] | None = None,
    force: bool = True,
) -> None:
    """Register a Parser class immediately."""
    from vllm.parser.abstract_parser import Parser

    if not issubclass(module, Parser):
        raise TypeError(
            f"module must be subclass of Parser, but got {type(module)}"
        )

    if module_name is None:
        module_names = [module.__name__]
    elif isinstance(module_name, str):
        module_names = [module_name]
    elif is_list_of(module_name, str):
        module_names = module_name
    else:
        raise TypeError("module_name must be str, list[str], or None.")

    for name in module_names:
        if not force and name in cls.parsers:
            existed = cls.parsers[name]
            raise KeyError(f"{name} is already registered at {existed.__module__}")
        cls.parsers[name] = module

get_parser classmethod

get_parser(
    tool_parser_name: str | None = None,
    reasoning_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[Parser] | None

Get a unified Parser that handles both reasoning and tool parsing.

This method checks if a unified Parser exists that can handle both reasoning extraction and tool call parsing. If no unified parser exists, it creates a DelegatingParser that wraps the individual reasoning and tool parsers.

Parameters:

Name Type Description Default
tool_parser_name str | None

The name of the tool parser.

None
reasoning_parser_name str | None

The name of the reasoning parser.

None
enable_auto_tools bool

Whether auto tool choice is enabled.

False
model_name str | None

The model name for parser-specific warnings.

None

Returns:

Type Description
type[Parser] | None

A Parser class, or None if neither parser is specified.

Source code in vllm/parser/parser_manager.py
@classmethod
def get_parser(
    cls,
    tool_parser_name: str | None = None,
    reasoning_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[Parser] | None:
    """
    Get a unified Parser that handles both reasoning and tool parsing.

    This method checks if a unified Parser exists that can handle both
    reasoning extraction and tool call parsing. If no unified parser
    exists, it creates a DelegatingParser that wraps the individual
    reasoning and tool parsers.

    Args:
        tool_parser_name: The name of the tool parser.
        reasoning_parser_name: The name of the reasoning parser.
        enable_auto_tools: Whether auto tool choice is enabled.
        model_name: The model name for parser-specific warnings.

    Returns:
        A Parser class, or None if neither parser is specified.
    """
    from vllm.parser.abstract_parser import _WrappedParser

    if not tool_parser_name and not reasoning_parser_name:
        return None

    # Strategy 1: If both names match, check for a unified parser with that name
    if tool_parser_name and tool_parser_name == reasoning_parser_name:
        try:
            parser = cls.get_parser_internal(tool_parser_name)
            logger.info(
                "Using unified parser '%s' for both reasoning and tool parsing.",
                tool_parser_name,
            )
            return parser
        except KeyError:
            pass  # No unified parser with this name

    # Strategy 2: Check for parser with either name
    for name in [tool_parser_name, reasoning_parser_name]:
        if name:
            try:
                parser = cls.get_parser_internal(name)
                logger.info(
                    "Using unified parser '%s' for reasoning and tool parsing.",
                    name,
                )
                return parser
            except KeyError:
                pass

    # Strategy 3: Create a DelegatingParser with the individual parser classes
    reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
    tool_parser_cls = cls.get_tool_parser(
        tool_parser_name, enable_auto_tools, model_name
    )

    if reasoning_parser_cls is None and tool_parser_cls is None:
        return None

    # Set the class-level attributes on the imported _WrappedParser
    _WrappedParser.reasoning_parser_cls = reasoning_parser_cls
    _WrappedParser.tool_parser_cls = tool_parser_cls

    return _WrappedParser

get_parser_internal classmethod

get_parser_internal(name: str) -> type[Parser]

Retrieve a registered or lazily registered Parser class.

Parameters:

Name Type Description Default
name str

The registered name of the parser.

required

Returns:

Type Description
type[Parser]

The Parser class.

Raises:

Type Description
KeyError

If no parser is found under the given name.

Source code in vllm/parser/parser_manager.py
@classmethod
def get_parser_internal(cls, name: str) -> type[Parser]:
    """
    Retrieve a registered or lazily registered Parser class.

    Args:
        name: The registered name of the parser.

    Returns:
        The Parser class.

    Raises:
        KeyError: If no parser is found under the given name.
    """
    if name in cls.parsers:
        return cls.parsers[name]

    if name in cls.lazy_parsers:
        return cls._load_lazy_parser(name)

    registered = ", ".join(cls.list_registered())
    raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")

get_reasoning_parser classmethod

get_reasoning_parser(
    reasoning_parser_name: str | None,
) -> type[ReasoningParser] | None

Get the reasoning parser based on the name.

Source code in vllm/parser/parser_manager.py
@classmethod
def get_reasoning_parser(
    cls,
    reasoning_parser_name: str | None,
) -> type[ReasoningParser] | None:
    """Get the reasoning parser based on the name."""
    from vllm.reasoning import ReasoningParserManager

    parser: type[ReasoningParser] | None = None
    if not reasoning_parser_name:
        return None
    try:
        parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
        assert parser is not None
    except Exception as e:
        raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
    return parser

get_tool_parser classmethod

get_tool_parser(
    tool_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[ToolParser] | None

Get the tool parser based on the name.

Source code in vllm/parser/parser_manager.py
@classmethod
def get_tool_parser(
    cls,
    tool_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[ToolParser] | None:
    """Get the tool parser based on the name."""
    from vllm.tool_parsers import ToolParserManager

    parser: type[ToolParser] | None = None
    if not enable_auto_tools or tool_parser_name is None:
        return parser
    logger.info('"auto" tool choice has been enabled.')

    try:
        if (
            tool_parser_name == "pythonic"
            and model_name
            and model_name.startswith("meta-llama/Llama-3.2")
        ):
            logger.warning(
                "Llama3.2 models may struggle to emit valid pythonic tool calls"
            )
        parser = ToolParserManager.get_tool_parser(tool_parser_name)
    except Exception as e:
        raise TypeError(
            "Error: --enable-auto-tool-choice requires "
            f"tool_parser:'{tool_parser_name}' which has not "
            "been registered"
        ) from e
    return parser

import_parser classmethod

import_parser(plugin_path: str) -> None

Import a user-defined parser from an arbitrary path.

Source code in vllm/parser/parser_manager.py
@classmethod
def import_parser(cls, plugin_path: str) -> None:
    """Import a user-defined parser from an arbitrary path."""
    module_name = os.path.splitext(os.path.basename(plugin_path))[0]
    try:
        import_from_path(module_name, plugin_path)
    except Exception:
        logger.exception(
            "Failed to load module '%s' from %s.", module_name, plugin_path
        )

list_registered classmethod

list_registered() -> list[str]

Return names of all registered parsers.

Source code in vllm/parser/parser_manager.py
@classmethod
def list_registered(cls) -> list[str]:
    """Return names of all registered parsers."""
    return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))

register_lazy_module classmethod

register_lazy_module(
    name: str, module_path: str, class_name: str
) -> None

Register a lazy module mapping for delayed import.

Example

ParserManager.register_lazy_module( name="minimax_m2", module_path="vllm.parser.minimax_m2_parser", class_name="MiniMaxM2Parser", )

Source code in vllm/parser/parser_manager.py
@classmethod
def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
    """
    Register a lazy module mapping for delayed import.

    Example:
        ParserManager.register_lazy_module(
            name="minimax_m2",
            module_path="vllm.parser.minimax_m2_parser",
            class_name="MiniMaxM2Parser",
        )
    """
    cls.lazy_parsers[name] = (module_path, class_name)

register_module classmethod

register_module(
    name: str | list[str] | None = None,
    force: bool = True,
    module: type[Parser] | None = None,
) -> type[Parser] | Callable[[type[Parser]], type[Parser]]

Register a Parser class.

Can be used as a decorator or called directly.

Usage

@ParserManager.register_module("my_parser") class MyParser(Parser): ...

Or

ParserManager.register_module(module=MyParser)

Source code in vllm/parser/parser_manager.py
@classmethod
def register_module(
    cls,
    name: str | list[str] | None = None,
    force: bool = True,
    module: type[Parser] | None = None,
) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
    """
    Register a Parser class.

    Can be used as a decorator or called directly.

    Usage:
        @ParserManager.register_module("my_parser")
        class MyParser(Parser):
            ...

    Or:
        ParserManager.register_module(module=MyParser)
    """
    if not isinstance(force, bool):
        raise TypeError(f"force must be a boolean, but got {type(force)}")

    # Immediate registration
    if module is not None:
        cls._register_module(module=module, module_name=name, force=force)
        return module

    # Decorator usage
    def _decorator(obj: type[Parser]) -> type[Parser]:
        module_path = obj.__module__
        class_name = obj.__name__

        if isinstance(name, str):
            names = [name]
        elif is_list_of(name, str):
            names = name
        else:
            names = [class_name]

        for n in names:
            cls.lazy_parsers[n] = (module_path, class_name)

        return obj

    return _decorator

_WrappedParser

Bases: DelegatingParser

A DelegatingParser subclass that instantiates parsers from class attributes.

This class is used to dynamically create a parser that wraps individual ReasoningParser and ToolParser classes. The class attributes reasoning_parser_cls and tool_parser_cls should be set before instantiation.

Usage

_WrappedParser.reasoning_parser_cls = MyReasoningParser _WrappedParser.tool_parser_cls = MyToolParser parser = _WrappedParser(tokenizer)

Source code in vllm/parser/abstract_parser.py
class _WrappedParser(DelegatingParser):
    """
    A DelegatingParser subclass that instantiates parsers from class attributes.

    This class is used to dynamically create a parser that wraps individual
    ReasoningParser and ToolParser classes. The class attributes
    `reasoning_parser_cls` and `tool_parser_cls` should be set before
    instantiation.

    Usage:
        _WrappedParser.reasoning_parser_cls = MyReasoningParser
        _WrappedParser.tool_parser_cls = MyToolParser
        parser = _WrappedParser(tokenizer)
    """

    reasoning_parser_cls: type[ReasoningParser] | None = None
    tool_parser_cls: type[ToolParser] | None = None

    def __init__(self, tokenizer: TokenizerLike):
        super().__init__(tokenizer)
        # Instantiate the underlying parsers from class attributes
        if self.__class__.reasoning_parser_cls is not None:
            self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
        if self.__class__.tool_parser_cls is not None:
            self._tool_parser = self.__class__.tool_parser_cls(tokenizer)

_reasoning_parser instance-attribute

_reasoning_parser = reasoning_parser_cls(tokenizer)

_tool_parser instance-attribute

_tool_parser = tool_parser_cls(tokenizer)

reasoning_parser_cls class-attribute instance-attribute

reasoning_parser_cls: type[ReasoningParser] | None = None

tool_parser_cls class-attribute instance-attribute

tool_parser_cls: type[ToolParser] | None = None

__init__

__init__(tokenizer: TokenizerLike)
Source code in vllm/parser/abstract_parser.py
def __init__(self, tokenizer: TokenizerLike):
    super().__init__(tokenizer)
    # Instantiate the underlying parsers from class attributes
    if self.__class__.reasoning_parser_cls is not None:
        self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
    if self.__class__.tool_parser_cls is not None:
        self._tool_parser = self.__class__.tool_parser_cls(tokenizer)

register_lazy_parsers

register_lazy_parsers()
Source code in vllm/parser/__init__.py
def register_lazy_parsers():
    for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items():
        module_path = f"vllm.parser.{file_name}"
        ParserManager.register_lazy_module(name, module_path, class_name)