@router.get("/load")
async def get_server_load_metrics(request: Request):
# This endpoint returns the current server load metrics.
# It tracks requests utilizing the GPU from the following routes:
# - /v1/responses
# - /v1/responses/{response_id}
# - /v1/responses/{response_id}/cancel
# - /v1/messages
# - /v1/chat/completions
# - /v1/completions
# - /v1/audio/transcriptions
# - /v1/audio/translations
# - /v1/embeddings
# - /pooling
# - /classify
# - /score
# - /v1/score
# - /rerank
# - /v1/rerank
# - /v2/rerank
return JSONResponse(content={"server_load": request.app.state.server_load_metrics})