vllm.model_executor.models.mistral_large_3_eagle ¶
EagleMistralLarge3ForCausalLM ¶
Bases: MistralLarge3ForCausalLM
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
model_cls instance-attribute ¶
model_cls = partial(
EagleMistralLarge3Model, start_layer_id=target_layer_num
)
quant_config instance-attribute ¶
quant_config = get_quantization_config(
draft_model_config, load_config
)
remapping class-attribute instance-attribute ¶
remapping = remapping | {
"eagle_linear\\.weight": "model.fc.weight",
"eagle_linear\\.qscale_act": "model.fc.input_scale",
"eagle_linear\\.qscale_weight": "model.fc.weight_scale",
}
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
embed_input_ids ¶
embed_input_ids(
input_ids: Tensor,
multimodal_embeddings: NestedTensors | None = None,
is_multimodal: Tensor | None = None,
) -> Tensor
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
hidden_states: Tensor,
inputs_embeds: Tensor | None = None,
) -> tuple[Tensor, Tensor]
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
get_input_embeddings ¶
get_input_embeddings(
input_ids: Tensor,
multimodal_embeddings: MultiModalEmbeddings
| None = None,
*,
is_multimodal: Tensor | None = None,
handle_oov_mm_token: bool = False,
) -> Tensor
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
load_weights ¶
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
EagleMistralLarge3Model ¶
Bases: DeepseekV2Model
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
embed_tokens instance-attribute ¶
embed_tokens = VocabParallelEmbedding(
vocab_size,
hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens",
)
fc instance-attribute ¶
fc = RowParallelLinear(
hidden_size * 2,
hidden_size,
bias=False,
input_is_parallel=False,
quant_config=quant_config,
return_bias=False,
)
layers instance-attribute ¶
layers = ModuleList(
[
(
DeepseekV2DecoderLayer(
vllm_config=vllm_config,
prefix=maybe_prefix(
prefix, f"layers.{i + start_layer_id}"
),
)
)
for i in (range(num_hidden_layers))
]
)
make_empty_intermediate_tensors instance-attribute ¶
make_empty_intermediate_tensors = (
make_empty_intermediate_tensors_factory(
["hidden_states", "residual"], hidden_size
)
)
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
start_layer_id: int = 0,
)
Source code in vllm/model_executor/models/mistral_large_3_eagle.py
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
hidden_states: Tensor,
inputs_embeds: Tensor | None = None,
) -> Tensor