o
    i*_                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZmZmZ d dlZd dlmZ d dlmZ ddlmZmZ dd	lmZmZmZmZmZ dd
lmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ ddlm%Z% ddl&m'Z' ed Z(ed Z)ed Z*ed Z+G dd deddZ,G dd deddZ-G dd deddZ.G dd deddZ/ed Z0G dd deddZ1e1e2B Z3d9d$d%Z4d:d)d*Z5e(e)B e*B e+B ed+ B Z6ed, Z7d,Z8d-e9d.< d/Z:d0e9d1< d2Z;eG d3d4 d4Z<G d5d6 d6ej=Z=G d7d8 d8ej>Z>dS );    )annotationsN)	dataclassreplace)AnyLiteral	TypedDictoverload)Required)rtc   )sttutils)APIConnectionErrorAPIErrorAPIStatusErrorAPITimeoutErrorcreate_api_error_from_httplogger)DEFAULT_API_CONNECT_OPTIONS	NOT_GIVENAPIConnectOptions
NotGivenOrTimedString)is_given   )create_access_token)zdeepgram/flux-generalzdeepgram/flux-general-enzdeepgram/nova-3zdeepgram/nova-3-medicalzdeepgram/nova-2zdeepgram/nova-2-medicalz deepgram/nova-2-conversationalaizdeepgram/nova-2-phonecall)zcartesia/ink-whisper)zassemblyai/universal-streamingz+assemblyai/universal-streaming-multilingual)zelevenlabs/scribe_v2_realtimec                   @  s   e Zd ZU ded< ded< dS )CartesiaOptionsfloat
min_volumemax_silence_duration_secsN__name__
__module____qualname____annotations__ r&   r&   ^/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/inference/stt.pyr   5   s   
 r   F)totalc                   @  sf   e Zd ZU ded< ded< ded< ded< ded< ded	< d
ed< ded< ded< ded< ded< dS )DeepgramOptionsboolfiller_wordsinterim_resultsintendpointing	punctuatesmart_formatzlist[tuple[str, float]]keywordszstr | list[str]keytermprofanity_filternumeralsmip_opt_out
vad_eventsNr!   r&   r&   r&   r'   r)   :   s   
 r)   c                   @  s6   e Zd ZU ded< ded< ded< ded< ded	< d
S )AssemblyaiOptionsr*   format_turnsr    end_of_turn_confidence_thresholdr-   &min_end_of_turn_silence_when_confidentmax_turn_silencez	list[str]keyterms_promptNr!   r&   r&   r&   r'   r7   H   s   
 r7   c                   @  s>   e Zd ZU ded< ded< ded< ded< ded	< ded
< dS )ElevenlabsOptionszLiteral['manual', 'vad']commit_strategyr*   include_timestampsr   vad_silence_threshold_secsvad_thresholdr-   min_speech_duration_msmin_silence_duration_msNr!   r&   r&   r&   r'   r=   P   s   
 r=   )	multiendeesfrjaptzhhic                   @  s$   e Zd ZU dZded< 	 ded< dS )FallbackModelzA fallback model with optional extra configuration.

    Extra fields are passed through to the provider.

    Example:
        >>> FallbackModel(model="deepgram/nova-3", extra_kwargs={"keyterm": ["livekit"]})
    zRequired[str]modeldict[str, Any]extra_kwargsN)r"   r#   r$   __doc__r%   r&   r&   r&   r'   rM   \   s   
 rM   rN   strreturntuple[str, NotGivenOr[str]]c                 C  s:   t }| d }dkr| |d d  }| d | } | |fS )N:r   )r   rfind)rN   languageidxr&   r&   r'   _parse_model_stringo   s
   rZ   fallback+list[FallbackModelType] | FallbackModelTypelist[FallbackModel]c                   s0   d	dd t | tr fdd| D S  | gS )
NrN   FallbackModelTyperS   rM   c                 S  s$   t | trt| \}}t|dS | S )N)rN   )
isinstancerR   rZ   rM   )rN   name_r&   r&   r'   _make_fallbackz   s   

z+_normalize_fallback.<locals>._make_fallbackc                   s   g | ]} |qS r&   r&   .0mrb   r&   r'   
<listcomp>   s    z'_normalize_fallback.<locals>.<listcomp>)rN   r^   rS   rM   )r_   list)r[   r&   rf   r'   _normalize_fallbackw   s   


ri   auto	pcm_s16leSTTEncodingDEFAULT_ENCODINGi>  r-   DEFAULT_SAMPLE_RATEz&https://agent-gateway.livekit.cloud/v1c                   @  s^   e Zd ZU ded< ded< ded< ded< d	ed
< d	ed< d	ed< ded< ded< ded< dS )
STTOptionsNotGivenOr[STTModels | str]rN   NotGivenOr[str]rX   rl   encodingr-   sample_raterR   base_urlapi_key
api_secretrO   rP   zNotGivenOr[list[FallbackModel]]r[   NotGivenOr[APIConnectOptions]conn_optionsNr!   r&   r&   r&   r'   ro      s   
 ro   c                      sp  e Zd Zeeeeeeedeeed
dDddZeeeeeeedeeed
dEddZeeeeeeedeeed
dFddZeeeeeeedeeed
dGd"dZeeeeeeedeeed
dHd%dZefeeeeeedeeed
dI fd(dZedJd)d*ZedKd+d,Z	edKd-d.Z
dLd0d1Zed2dMd7d8Zeed9dNd<d=Zeed>dOd?d@Zed2dPdBdCZ  ZS )QSTTN)
rX   rt   rr   rs   ru   rv   http_sessionrP   r[   rx   rN   CartesiaModelsrX   rq   rt   rr   NotGivenOr[STTEncoding]rs   NotGivenOr[int]ru   rv   rz   aiohttp.ClientSession | NonerP   NotGivenOr[CartesiaOptions]r[   7NotGivenOr[list[FallbackModelType] | FallbackModelType]rx   rw   rS   Nonec       
         C     d S Nr&   selfrN   rX   rt   rr   rs   ru   rv   rz   rP   r[   rx   r&   r&   r'   __init__      zSTT.__init__DeepgramModelsNotGivenOr[DeepgramOptions]c       
         C  r   r   r&   r   r&   r&   r'   r      r   AssemblyAIModelsNotGivenOr[AssemblyaiOptions]c       
         C  r   r   r&   r   r&   r&   r'   r      r   ElevenlabsModelsNotGivenOr[ElevenlabsOptions]c       
         C  r   r   r&   r   r&   r&   r'   r      r   rR   NotGivenOr[dict[str, Any]]c       
         C  r   r   r&   r   r&   r&   r'   r      r   rp   fNotGivenOr[dict[str, Any] | CartesiaOptions | DeepgramOptions | AssemblyaiOptions | ElevenlabsOptions]c       
           sB  t  jtjdddddd t|r)t|tr)t|\}}|}t|r)t|s)|}t|r/|ntj	
dt}t|r<|n	tdtdd	}|sLtd
t|rR|n	tdtdd	}|sbtdt}t|
rlt|
}t||t|ru|ntt|r||nt|||t|	rt|	ni |t|r|ntd
| _|| _tjt  | _dS )a?  Livekit Cloud Inference STT

        Args:
            model (STTModels | str, optional): STT model to use, in "provider/model[:language]" format.
            language (str, optional): Language of the STT model.
            encoding (STTEncoding, optional): Encoding of the STT model.
            sample_rate (int, optional): Sample rate of the STT model.
            base_url (str, optional): LIVEKIT_URL, if not provided, read from environment variable.
            api_key (str, optional): LIVEKIT_API_KEY, if not provided, read from environment variable.
            api_secret (str, optional): LIVEKIT_API_SECRET, if not provided, read from environment variable.
            http_session (aiohttp.ClientSession, optional): HTTP session to use.
            extra_kwargs (dict, optional): Extra kwargs to pass to the STT model.
            fallback (FallbackModelType, optional): Fallback models - either a list of model names,
                a list of FallbackModel instances.
            conn_options (APIConnectOptions, optional): Connection options for request attempts.
        TwordF)	streamingr,   aligned_transcriptoffline_recognize)capabilitiesLIVEKIT_INFERENCE_URLLIVEKIT_INFERENCE_API_KEYLIVEKIT_API_KEY zUapi_key is required, either as argument or set LIVEKIT_API_KEY environmental variableLIVEKIT_INFERENCE_API_SECRETLIVEKIT_API_SECRETz[api_secret is required, either as argument or set LIVEKIT_API_SECRET environmental variable)
rN   rX   rr   rs   rt   ru   rv   rP   r[   rx   N)superr   r   STTCapabilitiesr   r_   rR   rZ   osenvirongetDEFAULT_BASE_URLgetenv
ValueErrorr   ri   ro   rm   rn   dictr   _opts_sessionweakrefWeakSetSpeechStream_streams)r   rN   rX   rt   rr   rs   ru   rv   rz   rP   r[   rx   parsed_modelparsed_languagelk_base_url
lk_api_keylk_api_secretfallback_models	__class__r&   r'   r      sd   %
c                 C  s   t |\}}| ||dS )zCreate a STT instance from a model string

        Args:
            model (str): STT model to use, in "provider/model[:language]" format

        Returns:
            STT: STT instance
        rN   rX   )rZ   )clsrN   
model_namerX   r&   r&   r'   from_model_string[  s   
zSTT.from_model_stringc                 C  s   t | jjr
| jjS dS )Nunknown)r   r   rN   r   r&   r&   r'   rN   h  s   z	STT.modelc                 C  s   dS )Nlivekitr&   r   r&   r&   r'   providerl  s   zSTT.provideraiohttp.ClientSessionc                 C  s   | j s	tj | _ | j S r   )r   r   http_contextrz   r   r&   r&   r'   _ensure_sessionp  s   zSTT._ensure_sessionrX   bufferutils.AudioBufferr   stt.SpeechEventc                  s
   t d)NzNLiveKit Inference STT does not support batch recognition, use stream() instead)NotImplementedError)r   r   rX   rx   r&   r&   r'   _recognize_implu  s   zSTT._recognize_impl)rX   rx   NotGivenOr[STTLanguages | str]r   c                C  s*   | j |d}t| ||d}| j| |S )z)Create a streaming transcription session.r   )r   optsrx   )_sanitize_optionsr   r   add)r   rX   rx   optionsstreamr&   r&   r'   r     s   z
STT.streamr   c                C  s>   t |r|| j_t |r|| j_| jD ]	}|j||d qdS )z!Update STT configuration options.r   N)r   r   rN   rX   r   update_options)r   rN   rX   r   r&   r&   r'   r     s   
zSTT.update_optionsro   c                C  s   t | j}t|r||_|S )zFCreate a sanitized copy of options with language override if provided.)r   r   r   rX   )r   rX   r   r&   r&   r'   r     s   
zSTT._sanitize_options)rN   r{   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   r   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   r   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   r   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   rR   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   rp   rX   rq   rt   rq   rr   r|   rs   r}   ru   rq   rv   rq   rz   r~   rP   r   r[   r   rx   rw   rS   r   )rN   rR   rS   ry   )rS   rR   )rS   r   )r   r   rX   rq   rx   r   rS   r   )rX   r   rx   r   rS   r   rN   rp   rX   r   rS   r   )rX   r   rS   ro   )r"   r#   r$   r   r   r   classmethodr   propertyrN   r   r   r   r   r   r   r   __classcell__r&   r&   r   r'   ry      s    b
	ry   c                      sN   e Zd Zd fd	d
ZeeddddZdddZd ddZd!ddZ  Z	S )"r   r   ry   r   ro   rx   r   rS   r   c                  sN   t  j|||jd || _| | _ttd| _	t
 | _d| _d| _d S )N)r   rx   rs   stt_request_Fr   )r   r   rs   r   r   r   rR   r   	shortuuid_request_idasyncioEvent_reconnect_event	_speaking_speech_duration)r   r   r   rx   r   r&   r'   r     s   


zSpeechStream.__init__r   rN   rp   rX   r   c                C  s.   t |r|| j_t |r|| j_| j  dS )z'Update streaming transcription options.N)r   r   rN   rX   r   set)r   rN   rX   r&   r&   r'   r     s
   zSpeechStream.update_optionsc           
   	     s  d t jtdd fdd}t jtdd fd	d
}d}	 z I dH }t||t||g}tj| }tj }zdtj||ftj	dI dH \}}|D ]
}	|	|kra|	
  qW||vrW t jjg ||R  I dH  |  |  W |dur| I dH  dS dS j  W t jjg ||R  I dH  |  |  nt jjg ||R  I dH  |  |  w W |dur| I dH  n|dur| I dH  w w q")z&Main loop for streaming transcription.Fr   wsaiohttp.ClientWebSocketResponserS   r   c           	        s   t jjjjdjjd d}j2 zO3 d H W }g }t|tjr+|	|
|j nt|jr8|	|  |D ]'} j|j7  _|j }t|d}d|d}| t|I d H  q:q6 d dd	i}| t|I d H  d S )
Nr      )rs   num_channelssamples_per_channelzutf-8input_audio)typeaudioTr   zsession.finalize)r   r   AudioByteStreamr   rs   	_input_chr_   r
   
AudioFrameextendpushdata_FlushSentinelflushr   durationtobytesbase64	b64encodedecodesend_strjsondumps)	r   audio_bstreamevframesframeaudio_bytesbase64_audio	audio_msgfinalize_msg
closing_wsr   r&   r'   	send_task  s4   

z$SpeechStream._run.<locals>.send_taskc                   s   	 |   I d H }|jtjjtjjtjjfv r$ sjjrd S t	dd|jtjj
kr3td|j qt|j}|d}|dkrCn4|dkrOj|dd	 n(|d
kr[j|dd	 n|dkr`n|dkren|dkrqtd|j td| q)NTz4LiveKit Inference STT connection closed unexpectedly)messagez0unexpected LiveKit Inference STT message type %sr   zsession.createdinterim_transcriptF)is_finalfinal_transcriptzsession.finalizedzsession.closederrorz&LiveKit Inference STT returned error: z:received unexpected message from LiveKit Inference STT: %s)receiver   aiohttp	WSMsgTypeCLOSEDCLOSECLOSINGr   closedr   TEXTr   warningr   loadsr   r   _process_transcriptr   )r   msgr   msg_typer   r&   r'   	recv_task  sD   
z$SpeechStream._run.<locals>.recv_taskNT)return_when)r   r   rS   r   )r   log_exceptionsr   _connect_wsr   create_taskgatherr   waitFIRST_COMPLETEDresultaiogracefully_cancelcancel	exceptioncloseclear)
r   r   r  r   taskstasks_groupwait_reconnect_taskdonera   taskr&   r   r'   _run  s\   

 &


zSpeechStream._runr   c              
     s  dt | jj| jj| jjdi}| jjr!| jjdkr!| jj|d< | jjr-| jj|d d< | jjr@dd | jjD }d|i|d	< | jjrQ| jjj	| jjj
d
|d< | jj}|dra|ddd}ddt| jj| jj i}z*t| jj| d| jj |d| jj	I dH }d|d< |t|I dH  W |S  tjy } z	t|j|jd|d}~w tjy } ztd|d}~w tj y } zt!d|d}~ww )z/Connect to the LiveKit Inference STT WebSocket.settings)rs   rr   extrarj   rN   rX   c                 S  s"   g | ]}| d | ddqS )rN   rP   )rN   r"  )r   rc   r&   r&   r'   rg   D  s    z,SpeechStream._connect_ws.<locals>.<listcomp>modelsr[   )timeoutretries
connection)zhttp://zhttps://httpr   r   AuthorizationzBearer z/stt?model=)headersNzsession.creater   )statusz+LiveKit Inference STT connection timed out.z*failed to connect to LiveKit Inference STT)"rR   r   rs   rr   rP   rN   rX   r[   rx   r$  	max_retryrt   
startswithr   r   ru   rv   r   wait_forr   
ws_connect_conn_optionsr   r   r   r   ClientResponseErrorr   r   r*  TimeoutErrorr   ClientConnectorErrorr   )r   paramsr#  rt   r)  r   er&   r&   r'   r  3  sZ   





zSpeechStream._connect_wsr   r   r   r*   c              	     sx  | d j}| dd}| d jjpd}| dg pg }|s%|s%d S  js9d _tjtjjd} j	
| tj| j| d	d
  j| d	d
 | dd
 | dd| fdd|D d}|r jd
kr j	
tjtjj|tj jdd d
 _tjtjj||gd}	 j	
|	  jrd _tjtjjd}
 j	
|
 d S d S tjtjj||gd}	 j	
|	 d S )N
request_id
transcriptr   rX   rE   wordsT)r   startr   r   
confidenceg      ?c                   sL   g | ]"}t |d d|dd j |dd j  j|dddqS )r   r   r8  r   endr9  g        )text
start_timeend_timestart_time_offsetr9  )r   r   r>  )rd   r   r   r&   r'   rg   {  s    

z4SpeechStream._process_transcript.<locals>.<listcomp>)rX   r<  r=  r9  r;  r7  )audio_duration)r   r5  recognition_usage)r   r5  alternativesF)r   r   r   rX   r   r   SpeechEventSpeechEventTypeSTART_OF_SPEECH	_event_chsend_nowait
SpeechDatar>  r   RECOGNITION_USAGERecognitionUsageFINAL_TRANSCRIPTEND_OF_SPEECHINTERIM_TRANSCRIPT)r   r   r   r5  r;  rX   r7  start_eventspeech_dataevent	end_eventr&   r   r'   r	  g  sb   


	z SpeechStream._process_transcript)r   ry   r   ro   rx   r   rS   r   r   )rS   r   )rS   r   )r   r   r   r*   rS   r   )
r"   r#   r$   r   r   r   r   r  r	  r   r&   r&   r   r'   r     s    

n4r   )rN   rR   rS   rT   )r[   r\   rS   r]   )?
__future__r   r   r   r   r   r   dataclassesr   r   typingr   r   r   r   r   typing_extensionsr	   r   r
   r   r   r   _exceptionsr   r   r   r   r   logr   typesr   r   r   r   r   r   _utilsr   r   r{   r   r   r   r)   r7   r=   STTLanguagesrM   rR   r^   rZ   ri   	STTModelsrl   rm   r%   rn   r   ro   ry   r   r&   r&   r&   r'   <module>   sl    
	

  