# SPDX-License-Identifier: Apache-2.0# SPDX-FileCopyrightText: Copyright contributors to the vLLM project"""Minimal Gradio demo for real-time speech transcription using the vLLM Realtime API.Start the vLLM server first: vllm serve mistralai/Voxtral-Mini-3B-Realtime-2602 --enforce-eagerThen run this script: python openai_realtime_microphone_client.py --host localhost --port 8000Use --share to create a public Gradio link.Requirements: websockets, numpy, gradio"""importargparseimportasyncioimportbase64importjsonimportqueueimportthreadingimportgradioasgrimportnumpyasnpimportwebsocketsSAMPLE_RATE=16_000# Global stateaudio_queue:queue.Queue=queue.Queue()transcription_text=""is_running=Falsews_url=""model=""asyncdefwebsocket_handler():"""Connect to WebSocket and handle audio streaming + transcription."""globaltranscription_text,is_runningasyncwithwebsockets.connect(ws_url)asws:# Wait for session.createdawaitws.recv()# Validate modelawaitws.send(json.dumps({"type":"session.update","model":model}))# Signal readyawaitws.send(json.dumps({"type":"input_audio_buffer.commit"}))asyncdefsend_audio():whileis_running:try:chunk=awaitasyncio.get_event_loop().run_in_executor(None,lambda:audio_queue.get(timeout=0.1))awaitws.send(json.dumps({"type":"input_audio_buffer.append","audio":chunk}))exceptqueue.Empty:continueasyncdefreceive_transcription():globaltranscription_textasyncformessageinws:data=json.loads(message)ifdata.get("type")=="transcription.delta":transcription_text+=data["delta"]awaitasyncio.gather(send_audio(),receive_transcription())defstart_websocket():"""Start WebSocket connection in background thread."""globalis_runningis_running=Trueloop=asyncio.new_event_loop()asyncio.set_event_loop(loop)try:loop.run_until_complete(websocket_handler())exceptExceptionase:print(f"WebSocket error: {e}")defstart_recording():"""Start the transcription service."""globaltranscription_texttranscription_text=""thread=threading.Thread(target=start_websocket,daemon=True)thread.start()returngr.update(interactive=False),gr.update(interactive=True),""defstop_recording():"""Stop the transcription service."""globalis_runningis_running=Falsereturngr.update(interactive=True),gr.update(interactive=False),transcription_textdefprocess_audio(audio):"""Process incoming audio and queue for streaming."""globaltranscription_textifaudioisNoneornotis_running:returntranscription_textsample_rate,audio_data=audio# Convert to mono if stereoiflen(audio_data.shape)>1:audio_data=audio_data.mean(axis=1)# Normalize to floatifaudio_data.dtype==np.int16:audio_float=audio_data.astype(np.float32)/32767.0else:audio_float=audio_data.astype(np.float32)# Resample to 16kHz if neededifsample_rate!=SAMPLE_RATE:num_samples=int(len(audio_float)*SAMPLE_RATE/sample_rate)audio_float=np.interp(np.linspace(0,len(audio_float)-1,num_samples),np.arange(len(audio_float)),audio_float,)# Convert to PCM16 and base64 encodepcm16=(audio_float*32767).astype(np.int16)b64_chunk=base64.b64encode(pcm16.tobytes()).decode("utf-8")audio_queue.put(b64_chunk)returntranscription_text# Gradio interfacewithgr.Blocks(title="Real-time Speech Transcription")asdemo:gr.Markdown("# Real-time Speech Transcription")gr.Markdown("Click **Start** and speak into your microphone.")withgr.Row():start_btn=gr.Button("Start",variant="primary")stop_btn=gr.Button("Stop",variant="stop",interactive=False)audio_input=gr.Audio(sources=["microphone"],streaming=True,type="numpy")transcription_output=gr.Textbox(label="Transcription",lines=5)start_btn.click(start_recording,outputs=[start_btn,stop_btn,transcription_output])stop_btn.click(stop_recording,outputs=[start_btn,stop_btn,transcription_output])audio_input.stream(process_audio,inputs=[audio_input],outputs=[transcription_output])if__name__=="__main__":parser=argparse.ArgumentParser(description="Realtime WebSocket Transcription with Gradio")parser.add_argument("--model",type=str,default="mistralai/Voxtral-Mini-3B-Realtime-2602",help="Model that is served and should be pinged.",)parser.add_argument("--host",type=str,default="localhost",help="vLLM server host")parser.add_argument("--port",type=int,default=8000,help="vLLM server port")parser.add_argument("--share",action="store_true",help="Create public Gradio link")args=parser.parse_args()ws_url=f"ws://{args.host}:{args.port}/v1/realtime"model=args.modeldemo.launch(share=args.share)