Skip to content

vllm.entrypoints.openai.basic.api_router

logger module-attribute

logger = init_logger(__name__)

router module-attribute

router = APIRouter()

base

base(request: Request) -> OpenAIServing
Source code in vllm/entrypoints/openai/basic/api_router.py
def base(request: Request) -> OpenAIServing:
    # Reuse the existing instance
    return tokenization(request)

engine_client

engine_client(request: Request) -> EngineClient
Source code in vllm/entrypoints/openai/basic/api_router.py
def engine_client(request: Request) -> EngineClient:
    return request.app.state.engine_client

get_server_load_metrics async

get_server_load_metrics(request: Request)
Source code in vllm/entrypoints/openai/basic/api_router.py
@router.get("/load")
async def get_server_load_metrics(request: Request):
    # This endpoint returns the current server load metrics.
    # It tracks requests utilizing the GPU from the following routes:
    # - /v1/responses
    # - /v1/responses/{response_id}
    # - /v1/responses/{response_id}/cancel
    # - /v1/messages
    # - /v1/chat/completions
    # - /v1/completions
    # - /v1/audio/transcriptions
    # - /v1/audio/translations
    # - /v1/embeddings
    # - /pooling
    # - /classify
    # - /score
    # - /v1/score
    # - /rerank
    # - /v1/rerank
    # - /v2/rerank
    return JSONResponse(content={"server_load": request.app.state.server_load_metrics})

register_basic_api_routers

register_basic_api_routers(app: FastAPI)
Source code in vllm/entrypoints/openai/basic/api_router.py
def register_basic_api_routers(app: FastAPI):
    app.include_router(router)

show_version async

show_version()
Source code in vllm/entrypoints/openai/basic/api_router.py
@router.get("/version")
async def show_version():
    ver = {"version": VLLM_VERSION}
    return JSONResponse(content=ver)

tokenization

tokenization(request: Request) -> OpenAIServingTokenization
Source code in vllm/entrypoints/openai/basic/api_router.py
def tokenization(request: Request) -> OpenAIServingTokenization:
    return request.app.state.openai_serving_tokenization