o
    iW                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZmZmZ d dlZd dlmZ ddlmZmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZm Z m!Z!m"Z" ddlm#Z# ddl$m%Z% ed Z&ed Z'ed Z(ed Z)ed Z*e&e'B e(B e)B e*B Z+d9ddZ,G dd deZ-e-e.B Z/d:ddZ0G d d! d!ed"d#Z1G d$d% d%ed"d#Z2G d&d' d'ed"d#Z3G d(d) d)ed"d#Z4G d*d+ d+ed"d#Z5ed, Z6d,Z7d-e8d.< d/Z9d0e8d1< d2Z:eG d3d4 d4Z;G d5d6 d6ej<Z<G d7d8 d8ej=Z=dS );    )annotationsN)	dataclassreplace)AnyLiteral	TypedDictoverload)NotRequired   )tokenizettsutils)APIConnectionErrorAPIErrorAPIStatusErrorAPITimeoutErrorcreate_api_error_from_http)logger)DEFAULT_API_CONNECT_OPTIONS	NOT_GIVENAPIConnectOptions
NotGivenOr)is_given   )create_access_token)cartesiazcartesia/sonic-3zcartesia/sonic-2zcartesia/sonic-turbozcartesia/sonic)deepgramzdeepgram/aurazdeepgram/aura-2)
elevenlabszelevenlabs/eleven_flash_v2zelevenlabs/eleven_flash_v2_5zelevenlabs/eleven_turbo_v2zelevenlabs/eleven_turbo_v2_5z!elevenlabs/eleven_multilingual_v2)rimezrime/arcanazrime/mistv2)inworldzinworld/inworld-tts-1.5-maxzinworld/inworld-tts-1.5-minizinworld/inworld-tts-1-maxzinworld/inworld-tts-1modelstrreturntuple[str, str | None]c                 C  s:   d}|  d }dkr| |d d }| d| } | |fS )zParse a model string into a model and voice
    Args:
        model (str): Model string to parse
    Returns:
        tuple[str, str | None]: Model and voice (voice is None if not specified)
    N:r   )rfind)r    voiceidx r)   ^/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/inference/tts.py_parse_model_string?   s
   r+   c                   @  s.   e Zd ZU dZded< 	 ded< 	 ded< dS )FallbackModelzA fallback model with optional extra configuration.

    Extra fields are passed through to the provider.

    Example:
        >>> FallbackModel(model="cartesia/sonic", voice="")
    r!   r    r'   zNotRequired[dict[str, Any]]extra_kwargsN)__name__
__module____qualname____doc____annotations__r)   r)   r)   r*   r,   M   s   
 r,   fallback+list[FallbackModelType] | FallbackModelTypelist[FallbackModel]c                   s0   d	dd t | tr fdd| D S  | gS )
Nr    FallbackModelTyper"   r,   c                 S  s2   t | trt| \}}t||r|dS ddS | S )N r    r'   )
isinstancer!   r+   r,   )r    
model_namer'   r)   r)   r*   _make_fallbackf   s   
z+_normalize_fallback.<locals>._make_fallbackc                   s   g | ]} |qS r)   r)   .0mr;   r)   r*   
<listcomp>m   s    z'_normalize_fallback.<locals>.<listcomp>)r    r6   r"   r,   )r9   list)r3   r)   r?   r*   _normalize_fallbackc   s   


rB   c                   @  s&   e Zd ZU ded< ded< ded< dS )CartesiaOptionsr!   emotionz!Literal['slow', 'normal', 'fast']speedfloatvolumeNr.   r/   r0   r2   r)   r)   r)   r*   rC   r   s   
 rC   F)totalc                   @     e Zd ZdS )DeepgramOptionsNr.   r/   r0   r)   r)   r)   r*   rK   x       rK   c                   @  s   e Zd ZU ded< ded< dS )ElevenlabsOptionsintinactivity_timeoutzLiteral['auto', 'off', 'on']apply_text_normalizationNrH   r)   r)   r)   r*   rN   |   s   
 rN   c                   @  rJ   )RimeOptionsNrL   r)   r)   r)   r*   rR      rM   rR   c                   @  rJ   )InworldOptionsNrL   r)   r)   r)   r*   rS      rM   rS   	pcm_s16leTTSEncodingDEFAULT_ENCODINGi]  rO   DEFAULT_SAMPLE_RATEz&https://agent-gateway.livekit.cloud/v1c                   @  sf   e Zd ZU ded< ded< ded< ded< ded	< d
ed< d
ed< d
ed< ded< ded< ded< dS )_TTSOptionsTTSModels | strr    NotGivenOr[str]r'   languagerU   encodingrO   sample_rater!   base_urlapi_key
api_secretzdict[str, Any]r-   zNotGivenOr[list[FallbackModel]]r3   NotGivenOr[APIConnectOptions]conn_optionsNrH   r)   r)   r)   r*   rX      s   
 rX   c                      s  e Zd ZeeeeeeeedeeeddOddZeeeeeeeedeeeddPddZeeeeeeeedeeeddQd dZeeeeeeeedeeeddRd#dZeeeeeeeedeeeddSd&dZeeeeeeeedeeeddTd)dZeeeeeeedeeeddU fd,dZedVd-d.ZedWd/d0Z	edWd1d2Z
dXd6d7ZdYd9d:ZdZd<d=Zd[d>d?Zeeeed@d\dBdCZedDd]dHdIZedDd^dKdLZd[dMdNZ  ZS )_TTSN)r'   r[   r\   r]   r^   r_   r`   http_sessionr-   r3   rb   r    CartesiaModelsr'   rZ   r[   r\   NotGivenOr[TTSEncoding]r]   NotGivenOr[int]r^   r_   r`   rd   aiohttp.ClientSession | Noner-   NotGivenOr[CartesiaOptions]r3   7NotGivenOr[list[FallbackModelType] | FallbackModelType]rb   ra   r"   Nonec                C     d S Nr)   selfr    r'   r[   r\   r]   r^   r_   r`   rd   r-   r3   rb   r)   r)   r*   __init__      zTTS.__init__DeepgramModelsNotGivenOr[DeepgramOptions]c                C  rl   rm   r)   rn   r)   r)   r*   rp      rq   ElevenlabsModelsNotGivenOr[ElevenlabsOptions]c                C  rl   rm   r)   rn   r)   r)   r*   rp      rq   
RimeModelsNotGivenOr[RimeOptions]c                C  rl   rm   r)   rn   r)   r)   r*   rp      rq   InworldModelsNotGivenOr[InworldOptions]c                C  rl   rm   r)   rn   r)   r)   r*   rp      rq   r!   NotGivenOr[dict[str, Any]]c                C  rl   rm   r)   rn   r)   r)   r*   rp      rq   rY   qNotGivenOr[dict[str, Any] | CartesiaOptions | DeepgramOptions | ElevenlabsOptions | RimeOptions | InworldOptions]c                  s^  t |r|nt}t jtjddd|dd t|tr-t|\}}|}|dur-t |s-|}t |r3|nt	j
dt}t |r@|n	t	dt	d	d
}|sPtdt |rV|n	t	dt	dd
}|sftdt}t |rpt|}t|||t |rz|nt||||t |
rt|
ni |t |r|ntd| _|	| _tjtj | j| jddd| _tjt   | _!dS )a  Livekit Cloud Inference TTS

        Args:
            model (TTSModels | str): TTS model to use, in "provider/model[:voice]" format
            voice (str, optional): Voice to use, use a default one if not provided
            language (str, optional): Language of the TTS model.
            encoding (TTSEncoding, optional): Encoding of the TTS model.
            sample_rate (int, optional): Sample rate of the TTS model.
            base_url (str, optional): LIVEKIT_URL, if not provided, read from environment variable.
            api_key (str, optional): LIVEKIT_API_KEY, if not provided, read from environment variable.
            api_secret (str, optional): LIVEKIT_API_SECRET, if not provided, read from environment variable.
            http_session (aiohttp.ClientSession, optional): HTTP session to use.
            extra_kwargs (dict, optional): Extra kwargs to pass to the TTS model.
            fallback (FallbackModelType, optional): Fallback models - either a list of model names,
                a list of FallbackModel instances.
            conn_options (APIConnectOptions, optional): Connection options for request attempts.
        TF)	streamingaligned_transcriptr   )capabilitiesr]   num_channelsNLIVEKIT_INFERENCE_URLLIVEKIT_INFERENCE_API_KEYLIVEKIT_API_KEYr7   zUapi_key is required, either as argument or set LIVEKIT_API_KEY environmental variableLIVEKIT_INFERENCE_API_SECRETLIVEKIT_API_SECRETz[api_secret is required, either as argument or set LIVEKIT_API_SECRET environmental variable)r    r'   r[   r\   r]   r^   r_   r`   r-   r3   rb   i,  )
connect_cbclose_cbmax_session_durationmark_refreshed_on_get)"r   rW   superrp   r   TTSCapabilitiesr9   r!   r+   osenvirongetDEFAULT_BASE_URLgetenv
ValueErrorr   rB   rX   rV   dictr   _opts_sessionr   ConnectionPoolaiohttpClientWebSocketResponse_connect_ws	_close_ws_poolweakrefWeakSetSynthesizeStream_streams)ro   r    r'   r[   r\   r]   r^   r_   r`   rd   r-   r3   rb   parsed_modelparsed_voicelk_base_url
lk_api_keylk_api_secretfallback_models	__class__r)   r*   rp     sn   (

c                 C  s$   t |\}}| ||r|dS tdS )zCreate a TTS instance from a model string

        Args:
            model (str): TTS model to use, in "provider/model[:voice_id]" format

        Returns:
            TTS: TTS instance
        r8   )r+   r   )clsr    r'   r)   r)   r*   from_model_string|  s   
zTTS.from_model_stringc                 C  s   | j jS rm   )r   r    ro   r)   r)   r*   r      s   z	TTS.modelc                 C  s   dS )Nlivekitr)   r   r)   r)   r*   provider  s   zTTS.providertimeoutrF   aiohttp.ClientWebSocketResponsec           	   
     s  |   }| jj}|dr|ddd}ddt| jj| jj i}d }zt	|j
| d| jj |d|I d H }W n8 tjyR } z	t|j|jd	|d }~w tjyc } ztd
|d }~w tjyt } ztd|d }~ww dt| jj| jj| jjd}| jjr| jj|d< | jjr| jj|d< | jjr| jj|d< | jjrdd | jjD }d|i|d< | jjr| jjj| jjjd|d< z|t !|I d H  W |S  t"y } z|# I d H  td|d }~ww )N)zhttp://zhttps://httpwsr   AuthorizationzBearer z/tts?model=)headersstatusz+LiveKit Inference TTS connection timed out.z*failed to connect to LiveKit Inference TTSzsession.create)typer]   r\   extrar'   r    r[   c                 S  s,   g | ]}| d | d| di dqS )r    r'   r-   )r    r'   r   )r   r<   r)   r)   r*   r@     s    
z#TTS._connect_ws.<locals>.<listcomp>modelsr3   )r   retries
connectionz>failed to send session.create message to LiveKit Inference TTS)$_ensure_sessionr   r^   
startswithr   r   r_   r`   asynciowait_for
ws_connectr    r   ClientResponseErrorr   messager   TimeoutErrorr   ClientConnectorErrorr   r!   r]   r\   r-   r'   r[   r3   rb   r   	max_retrysend_strjsondumps	Exceptionclose)	ro   r   sessionr^   r   r   eparamsr   r)   r)   r*   r     sn   




zTTS._connect_wsr   c                   s   |  I d H  d S rm   )r   )ro   r   r)   r)   r*   r     s   zTTS._close_wsaiohttp.ClientSessionc                 C  s   | j s	tj | _ | j S rm   )r   r   http_contextrd   r   r)   r)   r*   r     s   zTTS._ensure_sessionc                 C  s   | j   d S rm   )r   prewarmr   r)   r)   r*   r     s   zTTS.prewarm)r'   r    r[   r-   NotGivenOr[TTSModels | str]c                C  sN   t |r|| j_t |r|| j_t |r|| j_t |r%| jj| dS dS )a  
        Args:
            voice (str, optional): Voice.
            model (TTSModels | str, optional): TTS model to use.
            language (str, optional): Language code for the TTS model.
            extra_kwargs (dict, optional): Extra kwargs to pass to the TTS model.
        N)r   r   r    r'   r[   r-   update)ro   r'   r    r[   r-   r)   r)   r*   update_options  s   zTTS.update_optionsrb   textr   tts.ChunkedStreamc                C  s   | j ||dS )Nr   )_synthesize_with_stream)ro   r   rb   r)   r)   r*   
synthesize  s   zTTS.synthesizer   c                C  s   t | |d}| j| |S N)r   rb   )r   r   add)ro   rb   streamr)   r)   r*   r     s   z
TTS.streamc                   s>   t | jD ]	}| I d H  q| j  | j I d H  d S rm   )rA   r   acloseclearr   )ro   r   r)   r)   r*   r     s
   
z
TTS.aclose)r    re   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   ri   r3   rj   rb   ra   r"   rk   )r    rr   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   rs   r3   rj   rb   ra   r"   rk   )r    rt   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   ru   r3   rj   rb   ra   r"   rk   )r    rv   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   rw   r3   rj   rb   ra   r"   rk   )r    rx   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   ry   r3   rj   rb   ra   r"   rk   )r    r!   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   rz   r3   rj   rb   ra   r"   rk   )r    rY   r'   rZ   r[   rZ   r\   rf   r]   rg   r^   rZ   r_   rZ   r`   rZ   rd   rh   r-   r{   r3   rj   rb   ra   r"   rk   )r    r!   r"   rc   )r"   r!   )r   rF   r"   r   r   r   r"   rk   )r"   r   r"   rk   )
r'   rZ   r    r   r[   rZ   r-   rz   r"   rk   )r   r!   rb   r   r"   r   )rb   r   r"   r   )r.   r/   r0   r   r   rp   classmethodr   propertyr    r   r   r   r   r   r   r   r   r   r   __classcell__r)   r)   r   r*   rc      s    j

>

rc   c                      s,   e Zd ZdZd fddZdddZ  ZS )r   zStreamed API using websocketsr   rc   rb   r   c                  s&   t  j||d || _t|j| _d S r   )r   rp   _ttsr   r   )ro   r   rb   r   r)   r*   rp   
  s   zSynthesizeStream.__init__output_emittertts.AudioEmitterr"   rk   c              
     s  t  jjjdddd tj  t	
  dfdd}d fdd}d fdd}zpjjjjjd4 I d H T}t	| t	||t	||g}zt	j| I d H  W     I d H  t jj| I d H  n    I d H  t jj| I d H  w W d   I d H  W d S 1 I d H sw   Y  W d S  t	jy   t d  tjy } z	t|j|jdd d }~w ty     ty } zt |d }~ww )Nr   Tz	audio/pcm)
request_idr]   r   r   	mime_typer"   rk   c                    sD    j 2 z3 d H W } t|  jr  q|  q6   d S rm   )	_input_chr9   _FlushSentinelflush	push_text	end_input)data)ro   sent_tokenizer_streamr)   r*   _input_task  s   z*SynthesizeStream._run.<locals>._input_taskr   r   c                   s   i }d|d< 2 zT3 d H W }|  }|jd |d< i }jjr&jj|d< jjr0jj|d< jjr:jj|d< ||d< jjrFjjni |d	<   | t	
|I d H     q	6 dd
i}| t	
|I d H     d S )Ninput_transcriptr    
transcriptr'   r    r[   generation_configr   zsession.flush)copytokenr   r'   r    r[   r-   _mark_startedr   r   r   set)r   base_pktev	token_pktr   end_pkt)input_sent_eventro   r   r)   r*   _sentence_stream_task&  s.   
z4SynthesizeStream._run.<locals>._sentence_stream_taskc                   s&  d }   I d H  	 | jjjdI d H }|jtjjtjjtjj	fv r*t
dd|jtjjkr9td|j q
t|j}|d}|d u rT|d urT|}j|d |dd	kr\n6|dd
krpt|d }| n"|ddkr}  d S |ddkrtd|j td| q)NTr   z&Gateway connection closed unexpectedly)r   z"unexpected Gateway message type %s
session_id)
segment_idr   zsession.createdoutput_audioaudiodoneerrorz&LiveKit Inference TTS returned error: zunexpected message %s)waitreceive_conn_optionsr   r   r   	WSMsgTypeCLOSEDCLOSECLOSINGr   TEXTr   warningr   loadsr   r   start_segmentbase64	b64decodepushr   r   )r   current_session_idmsgr   r   b64data)r   r   r   ro   r)   r*   
_recv_task@  sB   
z)SynthesizeStream._run.<locals>._recv_taskr   r   r   r   ) r   	shortuuid
initializer   r]   r   basicSentenceTokenizerr   r   Eventr   r   r   r  r   create_taskgatherr   r   aiogracefully_cancelr   r   r   r   r   r   r   r   r   r   )ro   r   r   r   r  r   tasksr   r)   )r   r   r   ro   r   r*   _run  sP   	&
2zSynthesizeStream._run)r   rc   rb   r   )r   r   r"   rk   )r.   r/   r0   r1   rp   r  r   r)   r)   r   r*   r     s    r   )r    r!   r"   r#   )r3   r4   r"   r5   )>
__future__r   r   r  r   r   r   dataclassesr   r   typingr   r   r   r   r   typing_extensionsr	   r7   r   r   r   _exceptionsr   r   r   r   r   logr   typesr   r   r   r   r   _utilsr   re   rr   rt   rv   rx   	TTSModelsr+   r,   r!   r6   rB   rC   rK   rN   rR   rS   rU   rV   r2   rW   r   rX   rc   r   r)   r)   r)   r*   <module>   sf    

  j