
    /jc                       S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
Jr  S SKJr  S SKrS SKrS SKrS SKJr  S SKJrJrJrJrJrJrJrJr  S SKJrJr  S S	KJ r J!r!  S S
K"J#r#  S SK$J%r%  SSK&J'r'  SSK(J)r)J*r*  SSKJ+r+  Sr,Sr-Sr.Sr/\	 " S S5      5       r0 " S S\Rb                  5      r1 " S S\Rd                  5      r2g)    )annotationsN)	dataclass)Any)	urlencode)rtc)DEFAULT_API_CONNECT_OPTIONSAPIConnectionErrorAPIConnectOptionsAPIStatusErrorAPITimeoutErrorLanguageCodesttutils)	NOT_GIVEN
NotGivenOr)AudioBufferis_given)TranscriptionVerbose)SessionTurnDetection   logger)GroqAudioModels	STTModels)AsyncAzureADTokenProviderX        ?i]  c                  Z    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   \rS
\S'   \rS
\S'   Srg)_STTOptionsA   STTModels | strmodelr   languagebooldetect_languager   turn_detectionNotGivenOr[str]promptnoise_reduction_type N)	__name__
__module____qualname____firstlineno____annotations__r   r(   r)   __static_attributes__r*       K/app/agent/.venv/lib/python3.13/site-packages/livekit/plugins/openai/stt.pyr   r   A   s/    (('FO',5/5r1   r   c                  4  ^  \ rS rSrSSS\\\\\SSS.
                   SU 4S jjjr\SS j5       r\SS	 j5       r\	SSS\\\SSSSSSSSSSSS
.                                   SS jj5       r
\	S\\SSS\S.               SS jj5       r\	S\SSSS\S.               SS jj5       r\\S.     S S jjr\\\\\\S.             S!S jjrS"S jrS#S jrS$S jr\S.       S%S jjrSrU =r$ )&STTK   enFzgpt-4o-mini-transcribeN)
r#   r%   r"   r(   r&   r)   base_urlapi_keyclientuse_realtimec       
        $  > [         TU ]  [        R                  " XSS9S9  U(       a  Sn[	        U5      (       d  SSSSS	.n[        [        U5      UUUUS
9U l        [	        U5      (       a  X`R                  l        [	        U5      (       a  U(       d  [        S5      eU	=(       dx    [        R                  " S[	        U5      (       a  UOS[	        U5      (       a  UOS[        R                  " [        R                  " SSSSS9S[        R                  " SSSS9S9S9U l        [         R"                  [$           " 5       U l        SU l        [*        R,                  [.        R0                     " [2        U R4                  U R6                  S9U l        g)a  
Create a new instance of OpenAI STT.

Args:
    language: The language code to use for transcription (e.g., "en" for English).
    detect_language: Whether to automatically detect the language.
    model: The OpenAI model to use for transcription.
    prompt: Optional text prompt to guide the transcription. Only supported for whisper-1.
    turn_detection: When using realtime transcription, this controls how model detects the user is done speaking.
        Final transcripts are generated only after the turn is over. See: https://platform.openai.com/docs/guides/realtime-vad
    noise_reduction_type: Type of noise reduction to apply. "near_field" or "far_field"
        This isn't needed when using LiveKit's noise cancellation.
    base_url: Custom base URL for OpenAI API.
    api_key: Your OpenAI API key. If not provided, will use the OPENAI_API_KEY environment variable.
    client: Optional pre-configured OpenAI AsyncClient instance.
    use_realtime: Whether to use the realtime transcription API. (default: False)
F)	streaminginterim_resultsaligned_transcript)capabilities 
server_vadr   r   i^  )type	thresholdprefix_padding_mssilence_duration_ms)r#   r%   r"   r(   r&   zYOpenAI API key is required, either as argument or set OPENAI_API_KEY environment variabler   N      .@      @connectreadwritepoolT2   x   )max_connectionsmax_keepalive_connectionskeepalive_expiry)timeoutfollow_redirectslimits)max_retriesr8   r7   http_client)max_session_duration
connect_cbclose_cb)super__init__r   STTCapabilitiesr   r   r   _optsr)   
ValueErroropenaiAsyncClienthttpxTimeoutLimits_clientweakrefWeakSetSpeechStream_streams_sessionr   ConnectionPoolaiohttpClientWebSocketResponse_max_session_duration_connect_ws	_close_ws_pool)selfr#   r%   r"   r(   r&   r)   r7   r8   r9   r:   	__class__s              r2   r[   STT.__init__L   sm   @ 	,,&Y^ 	 	

 H''$ %('*	N !!(++)

 ()).BJJ+GW7 
  
!3!3'00Gd!)(!3!3X))dCcR!%||$&.0%(	"
  576:))'*I*IJ!6''^^

r1   c                .    U R                   R                  $ N)r]   r"   rq   s    r2   r"   	STT.model   s    zzr1   c                `    U R                   R                  R                  R                  S5      $ )Nutf-8)rd   	_base_urlnetlocdecoderv   s    r2   providerSTT.provider   s#    ||%%,,33G<<r1   )r#   r%   r"   r(   r&   r)   azure_endpointazure_deploymentapi_versionr8   azure_ad_tokenazure_ad_token_providerorganizationprojectr7   r:   rR   c                    [         R                  " SUUUU	U
UUUUU(       a  UO[        R                  " SSSSS9S9n[	        U UUUUUUUS9$ )a  
Create a new instance of Azure OpenAI STT.

This automatically infers the following arguments from their corresponding environment variables if they are not provided:
- `api_key` from `AZURE_OPENAI_API_KEY`
- `organization` from `OPENAI_ORG_ID`
- `project` from `OPENAI_PROJECT_ID`
- `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
- `api_version` from `OPENAI_API_VERSION`
- `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
r   rF   rG   rH   )rU   r   r   r   r8   r   r   r   r   r7   rR   )r#   r%   r"   r(   r&   r)   r9   r:   )r_   AsyncAzureOpenAIra   rb   r4   )r#   r%   r"   r(   r&   r)   r   r   r   r8   r   r   r   r   r7   r:   rR   azure_clients                     r2   
with_azureSTT.with_azure   st    B ..)-#)$;% t#SsK
  +)!5%	
 		
r1   zwhisper-large-v3-turbo)r"   r8   r7   r9   r#   r%   r(   c                    [        U5      (       a  UO[        R                  R                  S5      nU(       d  [	        S5      e[        U5      (       d  Sn[        U UUUUUUSS9$ )z
Create a new instance of Groq STT.

``api_key`` must be set to your Groq API key, either using the argument or by setting
the ``GROQ_API_KEY`` environmental variable.
GROQ_API_KEYzGroq API key is requiredzhttps://api.groq.com/openai/v1Fr"   r8   r7   r9   r#   r%   r(   r:   r   osenvirongetr^   r4   )r"   r8   r7   r9   r#   r%   r(   groq_api_keys           r2   	with_groqSTT.with_groq   sf    " #+7"3"3w9W788!!7H +	
 		
r1   z0https://oai.endpoints.kepler.ai.cloud.ovh.net/v1c                    [        U5      (       a  UO[        R                  R                  S5      nU(       d  [	        S5      e[        U UUUUUUSS9$ )z
Create a new instance of OVHcloud AI Endpoints STT.

``api_key`` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting
the ``OVHCLOUD_API_KEY`` environmental variable.
OVHCLOUD_API_KEYz)OVHcloud AI Endpoints API key is requiredFr   r   )r"   r8   r7   r9   r#   r%   r(   ovhcloud_api_keys           r2   with_ovhcloudSTT.with_ovhcloud
  sX    " '/w&7&77RZZ^^L^=_HII$+	
 		
r1   )r#   conn_optionsc                   [        U5      (       a  [        U5      U R                  l        [	        U U R
                  US9nU R                  R                  U5        U$ )N)r   rL   r   )r   r   r]   r#   rg   rp   rh   add)rq   r#   r   streams       r2   r   
STT.stream*  sP     H".x"8DJJ%

 	&!r1   )r"   r#   r%   r(   r&   r)   c               4   [        U5      (       a  XR                  l        [        U5      (       a  [        U5      U R                  l        [        U5      (       a*  X0R                  l        [        S5      U R                  l        [        U5      (       a  X@R                  l        [        U5      (       a  XPR                  l        [        U5      (       a  X`R                  l        U R                   H$  n[        U5      (       d  M  UR                  US9  M&     g)aT  
Update the options for the speech stream. Most options are updated at the
connection level. SpeechStreams will be recreated when options are updated.

Args:
    language: The language to transcribe in.
    detect_language: Whether to automatically detect the language.
    model: The model to use for transcription.
    prompt: Optional text prompt to guide the transcription. Only supported for whisper-1.
    turn_detection: When using realtime, this controls how model detects the user is done speaking.
    noise_reduction_type: Type of noise reduction to apply. "near_field" or "far_field"
r@   r#   N)r   r]   r"   r   r#   r%   r(   r&   r)   rh   update_options)rq   r"   r#   r%   r(   r&   r)   r   s           r2   r   STT.update_options:  s    , E??$JJH".x"8DJJO$$)8JJ&".r"2DJJF &JJN##(6JJ%()).BJJ+mmF!!%%x%8 $r1   c                  #    [        U R                  R                  5      (       a  U R                  R                  OSnSU R                  R                  0nU(       a  X#S'   U R                  R                  (       a#  U R                  R                  R                  US'   S[
        S.UU R                  R                  S.nU R                  R                  (       a  SU R                  R                  0US	'   S
SSU0S.S.nSS0nSSU R                  R                   3S.n[        U R                  R                  5      R                  S5       S[        U5       3nUR                  S5      (       a  UR                  SSS5      nU R!                  5       n	["        R$                  " U	R'                  XS9U5      I S h  vN n
U
R)                  U5      I S h  vN   U
$  N N7f)Nr@   r"   r(   r#   z	audio/pcm)rB   rate)formattranscriptionr&   rB   noise_reductionzsession.updater   inputrB   audio)rB   sessionintentzLiveKit AgentszBearer )z
User-AgentAuthorization/z
/realtime?httpwsr   )headers)r   r]   r(   r"   r#   SAMPLE_RATEr&   r)   rd   r8   strr7   rstripr   
startswithreplace_ensure_sessionasynciowait_for
ws_connect	send_json)rq   rR   r(   transcription_configinput_configrealtime_configquery_paramsr   urlr   r   s              r2   rn   STT._connect_wsb  s    &.tzz/@/@&A&A""rTZZ%%0
 -3*::/3zz/B/B/K/K , $# 2"jj77(
 ::**/5tzz7V7V.WL*+ %'\+
 o(
 +&t||';';&<=
 T\\**+22378
9\CZB[\>>&!!++fdA.C&&(##G$6$6s$6$LgVVll?+++	 W+s$   F;G!=G>G!GG!G!c                @   #    UR                  5       I S h  vN   g  N7fru   )close)rq   r   s     r2   ro   STT._close_ws  s     hhjs   c                    U R                   (       d#  [        R                  R                  5       U l         U R                   $ ru   )ri   r   http_contexthttp_sessionrv   s    r2   r   STT._ensure_session  s)    }}!..;;=DM}}r1   r   c                 #     [        U5      (       a  [        U5      U R                  l        [        R
                  " U5      R                  5       n[        U R                  R                  5      (       a  U R                  R                  O[        R                  nSnU R                  R                  S:X  a  SnU R                  R                  R                  R                  SUS4U R                  R                  U R                  R                  (       a   U R                  R                  R                  OSUU[        R                   " SUR"                  S9S	9I S h  vN n[$        R&                  " UR(                  U R                  R                  S
9n[+        U[,        5      (       a+  UR                  (       a  [        UR                  5      Ul        [$        R.                  " [$        R0                  R2                  U/S9$  N! [        R4                   a    [5        5       S e[        R6                   a:  n	[7        U	R8                  U	R:                  U	R<                  U	R>                  S9S eS n	A	f[@         a  n	[C        5       U	eS n	A	ff = f7f)Njsonz	whisper-1verbose_jsonzfile.wavz	audio/wavr@      )rI   )filer"   r#   r(   response_formatrR   textr#   rB   alternatives)status_code
request_idbody)"r   r   r]   r#   r   combine_audio_framesto_wav_bytesr(   r_   omitr"   rd   r   transcriptionscreatera   rb   rR   r   
SpeechDatar   
isinstancer   SpeechEventSpeechEventTypeFINAL_TRANSCRIPTr   r   messager   r   r   	Exceptionr	   )
rq   bufferr#   r   datar(   r   respsdes
             r2   _recognize_implSTT._recognize_impl  s    (	.!!&28&<

#++F3@@BD*24::3D3D*E*ETZZ&&6;;FFzz;.'++::AA
 jj&&9=9L9L,,55RT &b,2F2FG B  D TYY9L9LMB$ 455$--*4==9??((99 T #, %% 	.!#-$$ 	 		q}}TUTZTZ  	.$&A-	.sH   JEH HB$H JH 2J
85I--J
:JJ

J)rd   r]   rp   ri   rh   )r#   r   r%   r$   r"   r!   r(   r'   r&    NotGivenOr[SessionTurnDetection]r)   r'   r7   r'   r8   r'   r9   openai.AsyncClient | Noner:   r$   )returnr   )$r#   r   r%   r$   r"   r!   r(   r'   r&   r   r)   r'   r   
str | Noner   r   r   r   r8   r   r   r   r   z AsyncAzureADTokenProvider | Noner   r   r   r   r7   r   r:   r$   rR   zhttpx.Timeout | Noner   r4   )r"   zGroqAudioModels | strr8   r'   r7   r'   r9   r   r#   r   r%   r$   r(   r'   r   r4   )r"   r   r8   r'   r7   r   r9   r   r#   r   r%   r$   r(   r'   r   r4   )r#   r'   r   r
   r   rg   )r"   z-NotGivenOr[STTModels | GroqAudioModels | str]r#   r'   r%   zNotGivenOr[bool]r(   r'   r&   r   r)   r'   r   None)rR   floatr   aiohttp.ClientWebSocketResponser   r   r   r   )r   zaiohttp.ClientSession)r   r   r#   r'   r   r
   r   zstt.SpeechEvent)r+   r,   r-   r.   r   r[   propertyr"   r}   staticmethodr   r   r   r   r   r   rn   ro   r   r   r0   __classcell__rr   s   @r2   r4   r4   K   s     %!9"+;D09$-#,,0"U
 U
 	U

 U
  U
 9U
 .U
 "U
 !U
 *U
 U
 U
n     = =   %!9"+;D09%)'+"&"%)DH#'"#"(,%9
9
 9
 	9

  9
 99
 .9
 #9
 %9
  9
 9
 #9
 "B9
 !9
 9
  !9
" #9
$ &%9
& 
'9
 9
v  (@#,$-,0 %"+ 
$ 
 ! 
 "	 

 * 
  
  
   
 
 
  
D  .#,J,0 %"+

 !
 	

 *
 
 
  
 

 
D %.*E	 " (	
 
& @I$-,5"+;D09&9 =&9 "	&9
 *&9  &9 9&9 .&9 
&9P.` %.	/./. "	/.
 (/. 
/. /.r1   r4   c                  ~   ^  \ rS rSr        SU 4S jjr    SS jr\R                  " \S9S	S j5       r	Sr
U =r$ )
rg   i  c                  > [         TU ]  X[        S9  X0l        UR                  R
                  U l        SU l        [        R                  " 5       U l
        g )N)r   r   sample_rater@   )rZ   r[   r   rp   r]   r#   	_language_request_idr   Event_reconnect_event)rq   r   r   rL   rr   s       r2   r[   SpeechStream.__init__  sC     	SU
++ 'r1   c                   [        U5      U l        U R                  R                  5         U R                  R                  5         g ru   )r   r   rp   
invalidater   set)rq   r#   s     r2   r   SpeechStream.update_options  s2    
 &h/

!!#r1   r   c                  ^ ^
#    Sm
[         R                  " [        S9SU
U 4S jj5       n[         R                  " [        S9SU
U 4S jj5       n Sm
T R                  R	                  T R
                  R                  S9 IS h  vN nT R                  T R                  R                  T R                  R                  5        [        R                  " U" U5      5      [        R                  " U" U5      5      /n[        R                  " U6 n[        R                  " T R                  R                  5       5      n [        R                  " XV4[        R                  S9I S h  vN u  pxU H  n	X:w  d  M
  U	R!                  5         M     Xg;  a\   [         R"                  R$                  " / UQUP76 I S h  vN   UR'                  5         UR)                  5         S S S 5      IS h  vN   g T R                  R+                  5         [         R"                  R$                  " / UQUP76 I S h  vN   UR'                  5         UR)                  5         S S S 5      IS h  vN   GM   GN GN N N NA! [         R"                  R$                  " / UQUP76 I S h  vN    UR'                  5         UR)                  5         f = f Nc! , IS h  vN  (       d  f       Nx= f7f)NFr   c                  >#    [         R                  R                  [        [        [        S-  S9nTR
                    S h  vN n/ n[        U[        R                  5      (       a9  UR                  UR                  UR                  R                  5       5      5        O:[        UTR                  5      (       a  UR                  UR                  5       5        U H\  nS[        R                   " UR                  R                  5       5      R#                  S5      S.nU R%                  U5      I S h  vN   M^     GM   N N
 Smg 7f)N   )r   num_channelssamples_per_channelzinput_audio_buffer.appendry   r   T)r   r   AudioByteStreamr   NUM_CHANNELS	_input_chr   r   
AudioFrameextendrK   r   tobytes_FlushSentinelflushbase64	b64encoder|   r   )r   audio_bstreamr   framesframeencoded_frame
closing_wsrq   s         r2   	send_task$SpeechStream._run.<locals>.send_task  s    
 "KK77')$/2$5 8 M #nn 6d/1dCNN33MM-"5"5dii6G6G6I"JKd&9&9::MM-"5"5"78#E ;!'!1!1%**2D2D2F!G!N!Nw!W%M ,,}555 $6 6 - Js;   ;EEE EC.E4E5E EEEc                  >#    SnSn[         R                   " 5       n0 n U R                  5       I S h  vN nUR                  [        R                  R
                  [        R                  R                  [        R                  R                  4;   aB  T(       a  g [        SU R                  =(       d    SSUR                  < SUR                  < 3S9eUR                  [        R                  R                  :w  a$  [        R                  " SUR                  5        GM   [        R                   " UR                  5      nUR#                  S	5      nUS
:X  a,  UR#                  SS5      nUR#                  SS5      n	SU	0XH'   GOUS:X  a2  UR#                  SS5      nUR#                  SS5      n
X;   a  XU   S'   GOUS:X  a  UR#                  SS5      nU(       a  X-  n[         R                   " 5       U-
  [$        :  ay  TR&                  R)                  [*        R,                  " [*        R.                  R0                  [*        R2                  " UTR4                  S9/S95        [         R                   " 5       nGOUS:X  Ga  SnUR#                  SS5      nUR#                  SS5      nU(       ad  TR&                  R)                  [*        R,                  " [*        R.                  R6                  [*        R2                  " UTR4                  S9/S95        SnX;   a8  XH   nUR#                  SS5      nUR#                  SS5      nUU:  a  UU-
  S-  nXH	 UR#                  S0 5      nUR#                  SS5      nUR#                  SS5      nTR&                  R)                  [*        R,                  " [*        R.                  R8                  / [*        R:                  " UUUS9S95        [         R                   " 5       U-
  [<        :  aL  [        R>                  " S5        TR@                  RC                  U 5        TRD                  RG                  5         g GM2   GN! [H         a    [        RJ                  " S5         N(f = f7f) Nr@   r   z2OpenAI Realtime STT connection closed unexpectedlyz	msg.data=z msg.extra=)r   r   r   z!unexpected OpenAI message type %srB   z!input_audio_buffer.speech_starteditem_idaudio_start_msstart_msz!input_audio_buffer.speech_stoppedaudio_end_msend_msz1conversation.item.input_audio_transcription.deltadeltar   r   z5conversation.item.input_audio_transcription.completed
transcriptg        g     @@usageinput_tokensoutput_tokens)audio_durationr!  r"  )rB   r   recognition_usagez-resetting Realtime STT session due to timeoutz failed to process OpenAI message)&timereceiverB   rk   	WSMsgTypeCLOSEDCLOSECLOSINGr   
close_coder   extraTEXTr   warningr   loadsr   _delta_transcript_interval	_event_chsend_nowaitr   r   r   INTERIM_TRANSCRIPTr   r   r   RECOGNITION_USAGERecognitionUsagerm   inforp   remover   r   r   	exception)r   current_textlast_interim_atconnected_atitem_audio_timingmsgr   msg_typer  r  r  r  r  r#  timingr  r  r   r!  r"  r  rq   s                       r2   	recv_task$SpeechStream._run.<locals>.recv_task  s     L%&O99;L;=JJL(88%%,,%%++%%--  
 " ) T$&MM$7R ){,CII<8  88w00555NN#FQTI::chh/D#xx/H#FF"&((9b"9)-2BA)F6@.5Q)2!%HH"&((9b"9'+xx'B"7COg6x@!%XX $" 5 (1L#yy{_<?YY $ : :$'OO-0-@-@-S-S,/NN5A9=-.6*%&
!" 37))+!%\\')%)XXlB%?
"&((9b"9% NN66 #),)<)<)M)M(+1;59^^)*2&!"
 *-"7%6%?F'-zz*a'@H%+ZZ!%<F%02882Cv1M 1 : !%" 5',yy'C(-		/1(E22OO%(%8%8%J%J-/252F2F3A1=2?3"
  99;58MM"KK(WX JJ--b1 11557"M (N ! I$$%GHIs[   0Q;QC-Q;"A!Q Q;6Q ;Q;=B<Q 9Q;;GQ Q; Q85Q;7Q88Q;)rR   )return_whenr   )r   log_exceptionsr   rp   
connection_conn_optionsrR   _report_connection_acquiredlast_acquire_timelast_connection_reusedr   create_taskgatherr   waitFIRST_COMPLETEDresultaiogracefully_cancelcancelr8  clear)rq   r  r@  r   taskstasks_groupwait_reconnect_taskdone_taskr  s   `         @r2   _runSpeechStream._run  sU    
			V	,	 
-	4 
		V	,o	I 
-o	Ib Jzz,,T5G5G5O5O,PPTV00JJ00$**2S2S ''	"6''	"6 &nne4&-&9&9$:O:O:T:T:V&W#,$+LL$:$+$;$;% GD !%6 KKM !% +6  ))55RuR>QRRR&&())+9 QPP0 ))//1))55RuR>QRRR&&())+9 QP P S5 Q4 S%))55RuR>QRRR&&())+9 QPPPs   A?KI"KB4K <'I.#I%$I.6I.%K 5I(6$K K%I*&K+I.%K *I,+$K KJ>K%I.(K *K,K .&J;J
&J;;K >K KK	KK)r   rp   r   r   )r   r4   r   r
   rL   z5utils.ConnectionPool[aiohttp.ClientWebSocketResponse]r   r   )r#   r   r   r   )r   r   )r+   r,   r-   r.   r[   r   r   rC  r   rX  r0   r   r   s   @r2   rg   rg     sk    0 0 (	0
 D0 
0$ $ 
	$ (n, )n,r1   rg   )3
__future__r   r   r  r   r   r%  re   dataclassesr   typingr   urllib.parser   rk   ra   r_   livekitr   livekit.agentsr   r	   r
   r   r   r   r   r   livekit.agents.typesr   r   livekit.agents.utilsr   r   openai.types.audior   =openai.types.beta.realtime.transcription_session_update_paramr   logr   modelsr   r   r   rm   r0  r   r  r   r4   rg   r*   r1   r2   <module>rf     s    #    	   !  "    	 	 	 7 3  . ,      6 6 6.#'' .DG,3## G,r1   