o
    iHu                     @  sh  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZ d dlmZ d dlmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZ ddlmZmZ ddlm Z m!Z! ddlm"Z" ddl#m$Z$ ddl%m&Z& er~ddl'm(Z( dZ)dZ*e	G dd dZ+e	G dd dZ,G dd deZ-ed e-B Z.	 G dd deZ/G dd dZ0dS )     )annotationsN)AsyncIterable)	dataclass)TYPE_CHECKINGAnyLiteralProtocol)trace)rtc   )llmsttutilsvadlogger)trace_typestracer)	NOT_GIVEN
NotGivenOr)aiois_given   )io)_set_participant_attributes)ModelSettings)AgentSession      c                   @  sH   e Zd ZU ded< 	 ded< ded< ded< ded	< ded
< ded< dS )_EndOfTurnInfobool
skip_replystrnew_transcriptfloattranscript_confidencefloat | Nonestarted_speaking_atstopped_speaking_attranscription_delayend_of_turn_delayN__name__
__module____qualname____annotations__ r0   r0   h/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/voice/audio_recognition.pyr       s   
 r   c                   @  s&   e Zd ZU ded< ded< ded< dS )_PreemptiveGenerationInfor"   r#   r$   r%   r&   r'   Nr+   r0   r0   r0   r1   r2   .   s   
 r2   c                   @  sL   e Zd ZedddZedddZdd
dZdddZdddddZdS )_TurnDetectorreturnr"   c                 C     dS Nunknownr0   selfr0   r0   r1   model6      z_TurnDetector.modelc                 C  r5   r6   r0   r8   r0   r0   r1   provider:   r;   z_TurnDetector.providerlanguage
str | Noner&   c                      d S Nr0   r9   r=   r0   r0   r1   unlikely_threshold?       z _TurnDetector.unlikely_thresholdr    c                   r?   r@   r0   rA   r0   r0   r1   supports_language@   rC   z_TurnDetector.supports_languageNtimeoutchat_ctxllm.ChatContextrF   r$   c                  r?   r@   r0   )r9   rG   rF   r0   r0   r1   predict_end_of_turnB   s   z!_TurnDetector.predict_end_of_turnr4   r"   )r=   r>   r4   r&   )r=   r>   r4   r    )rG   rH   rF   r&   r4   r$   )	r,   r-   r.   propertyr:   r<   rB   rD   rI   r0   r0   r0   r1   r3   5   s    

r3   )r   r   realtime_llmmanualc                   @  sb   e Zd Zd ddZd!dd	Zd d
dZd"ddZddd"ddZd#ddZd$ddZ	d%ddZ
dS )&RecognitionHooksevvad.VADEvent | Noner4   Nonec                 C     d S r@   r0   r9   rO   r0   r0   r1   on_start_of_speechX       z#RecognitionHooks.on_start_of_speechvad.VADEventc                 C  rR   r@   r0   rS   r0   r0   r1   on_vad_inference_doneY   rU   z&RecognitionHooks.on_vad_inference_donec                 C  rR   r@   r0   rS   r0   r0   r1   on_end_of_speechZ   rU   z!RecognitionHooks.on_end_of_speechstt.SpeechEventspeakingbool | Nonec                C  rR   r@   r0   r9   rO   rZ   r0   r0   r1   on_interim_transcript[   rU   z&RecognitionHooks.on_interim_transcriptNrZ   c                C  rR   r@   r0   r\   r0   r0   r1   on_final_transcript\   rU   z$RecognitionHooks.on_final_transcriptinfor   r    c                 C  rR   r@   r0   r9   r`   r0   r0   r1   on_end_of_turn]   rU   zRecognitionHooks.on_end_of_turnr2   c                 C  rR   r@   r0   ra   r0   r0   r1   on_preemptive_generation^   rU   z)RecognitionHooks.on_preemptive_generationrH   c                 C  rR   r@   r0   r8   r0   r0   r1   retrieve_chat_ctx`   rU   z"RecognitionHooks.retrieve_chat_ctx)rO   rP   r4   rQ   rO   rV   r4   rQ   )rO   rY   rZ   r[   r4   rQ   )r`   r   r4   r    )r`   r2   r4   rQ   )r4   rH   )r,   r-   r.   rT   rW   rX   r]   r_   rb   rc   rd   r0   r0   r0   r1   rN   W   s    





rN   c                   @  s  e Zd ZdSddZeeeddTddZdUddZdUddZdddVd!d"ZdUd#d$Z	dWd%d&Z
dXd'd(ZdUd)d*Zd+dd,dYd1d2ZedZd4d5Zd[d8d9Zejed:d\d<d=Zd]d^d@dAZejed:d_dHdIZejed:d`dKdLZejed:dadbdQdRZdMS )cAudioRecognitionsessionr   hooksrN   r   io.STTNode | Noner   vad.VAD | Noneturn_detectionTurnDetectionMode | Nonemin_endpointing_delayr$   max_endpointing_delayr4   rQ   c                C  s   || _ || _d | _d | _d | _d | _d | _|| _|| _t	|t
s"|nd | _|| _|| _t	|t
r2|nd | _| jdv | _d| _d | _d| _d | _d | _d | _t | _g | _d| _d| _d| _d | _d | _d | _t  | _!d | _"t | _#d S )Nr   NF )$_session_hooks_audio_input_atask_commit_user_turn_atask
_stt_atask
_vad_atask_end_of_turn_task_min_endpointing_delay_max_endpointing_delay
isinstancer"   _turn_detector_stt_vad_turn_detection_mode_vad_base_turn_detection_user_turn_committed_sample_rate	_speaking_last_final_transcript_time_last_speaking_time_speech_start_timeasyncioEvent_final_transcript_received_final_transcript_confidence_audio_transcript_audio_interim_transcript_audio_preflight_transcript_last_language_stt_ch_vad_chset_tasks_user_turn_span_closing)r9   rg   rh   r   r   rk   rm   rn   r0   r0   r1   __init__d   s>   
zAudioRecognition.__init__)rm   rn   rk   NotGivenOr[float]$NotGivenOr[TurnDetectionMode | None]c                C  s   t |r|| _t |r|| _t |rTt|ts|nd | _t|tr#|nd }| j|krV| j}|| _| jdv | _| jdks?|dkrX| jrL| j	 sL| j
  d | _d| _d S d S d S d S )Nro   rM   F)r   rx   ry   rz   r"   r{   r~   r   rw   donecancelr   )r9   rm   rn   rk   modeprevious_moder0   r0   r1   update_options   s(   



zAudioRecognition.update_optionsc                 C  s   |  | j | | j d S r@   )
update_sttr|   
update_vadr}   r8   r0   r0   r1   start   s   zAudioRecognition.startc                 C  s   |  d  | d  d S r@   )r   r   r8   r0   r0   r1   stop   s   
zAudioRecognition.stopF)skip_sttframertc.AudioFramer   r    c                C  s@   |j | _|s| jd ur| j| | jd ur| j| d S d S r@   )sample_rater   r   send_nowaitr   )r9   r   r   r0   r0   r1   
push_audio   s   
zAudioRecognition.push_audioc                   s   | j   | jd ur| jI d H  tj| j I d H  | jd ur(t| jI d H  | jd ur6t| jI d H  | jd urC| jI d H  d S d S r@   )	r   r   rt   r   cancel_and_waitr   ru   rv   rw   r8   r0   r0   r1   aclose   s   




zAudioRecognition.aclosec                      | _ |rtjtj   _t | j j	 _	d S  j	d urBtt
 j	 fdd  j d  _	d  _d S d S )Nc                       j S r@   r   discard_r9   taskr0   r1   <lambda>       z-AudioRecognition.update_stt.<locals>.<lambda>)r|   r   Chanr
   
AudioFramer   r   create_task	_stt_taskru   r   add_done_callbackr   addr9   r   r0   r   r1   r         


zAudioRecognition.update_sttc                   r   )Nc                   r   r@   r   r   r   r0   r1   r      r   z-AudioRecognition.update_vad.<locals>.<lambda>)r}   r   r   r
   r   r   r   r   	_vad_taskrv   r   r   r   r   )r9   r   r0   r   r1   r      r   zAudioRecognition.update_vadc                 C  s<   d| _ d| _d| _g | _d| _| j}| d  | | d S )Nrp   F)r   r   r   r   r   r|   r   r   r0   r0   r1   clear_user_turn   s   
z AudioRecognition.clear_user_turng       @)stt_flush_durationr!   audio_detachedtranscript_timeoutr   r!   c                  sP   j rj r
d S d fdd}jd urj  t| _d S )Nr4   rQ   c                    sL  j d u st j  dkrmj   rEjrEtjd } tjd|  jd| d}tdtt	
|j }t|D ]}| q=ztjj dI d H  W n tjyl   jrjtjdjd	d
 Y nw jrjtjtjjtjdjdgd j dj  _d_j  }j |d d_!d S )Ng      ?g?s     r   )r   num_channelssamples_per_channelr   rE   z+final transcript not received after timeout)r   interim_transcriptextrarp   )r=   text)typealternatives )r!   T)"r   timer   clearr   intr
   r   maxmathceildurationranger   r   wait_forwaitTimeoutErrorr   r   warningrr   r_   r   SpeechEventSpeechEventTypeFINAL_TRANSCRIPT
SpeechDatar   striprd   copy_run_eou_detectionr   )num_samplessilence_frame
num_framesr   rG   r   r9   r!   r   r   r0   r1   _commit_user_turn  sX   




z<AudioRecognition.commit_user_turn.<locals>._commit_user_turnr4   rQ   )r|   r   is_setrt   r   r   r   )r9   r   r   r   r!   r   r0   r   r1   commit_user_turn   s   
;
z!AudioRecognition.commit_user_turnr"   c                 C  s   | j r| jd | j  S | jS )zV
        Transcript for this turn, including interim transcript if available.
        r   )r   r   r8   r0   r0   r1   current_transcriptF  s   z#AudioRecognition.current_transcriptrO   rY   c           	        s  | j dkr| jr| jd u s| j s|jtjjkrd S |jtjjkr|j	d j
}|j	d j}|j	d j}| jr@|rCt|tkrC|| _|sGd S | jj|| jsS| j dkrV| jnd d || jd}| jrlt | j |d< tjd|d t | _|  jd	| 7  _| j | _| j| | j| jk}d
| _d
| _| j   | jr| jd u rt | _| j!s| jr|r| j"t#| j| jrt$| jt| j nd| j%d | js| j& ' }| (| d S d S d S |jtjj)kr{| jj*|| js| j dkr| jnd d |j	d j
}|j	d j}|j	d j}| jr|r!t|tkr!|| _|s&d S tjd|| jdd t | _| jd	 |  | _|| _| jrM| jd u rRt | _| j dks\| jryt+| j|g }| j"t#| jt$|t| | j%d d S d S |jtjjkr| jj*|| js| j dkr| jnd d |j	d j
| _d S |jtjj,kr| j dkrt-.| /  | j0d  W d    n	1 sw   Y  d| _d| _| jr| jd u rt | _| j& ' }| (| d S |jtjj1kr<| j dkr>t-.| /  | j2d  W d    n	1 sw   Y  d| _| j%d u r*t | _%t | _| jd ur@| j3  d S d S d S d S )NrM   r   r   r^   )user_transcriptr=   transcript_delayzreceived user transcriptr   r   rp   )r#   r%   r'   z"received user preflight transcriptFT)4r~   r   rw   r   r   r   r   INTERIM_TRANSCRIPTr   r   r   r=   
confidencer   lenMIN_LANGUAGE_DETECTION_LENGTHrr   r_   r}   r   r   r   r   debugr   r   lstripr   appendr   r   r   r   r   rc   r2   sumr   rd   r   r   PREFLIGHT_TRANSCRIPTr]   listEND_OF_SPEECHr	   use_span_ensure_user_turn_spanrX   START_OF_SPEECHrT   r   )	r9   rO   
transcriptr=   r   r   transcript_changedrG   confidence_valsr0   r0   r1   _on_stt_eventO  s  














	zAudioRecognition._on_stt_eventr   rV   c                   sT  |j tjjkr=t| jt |j d | j	
| W d    n1 s'w   Y  d| _| jd ur;| j  d S d S |j tjjkrd| j	| |jdkr`t | _| jd u rbt | _d S d S d S |j tjjkrt|   | j	| W d    n1 sw   Y  d| _| js| jdkr| jr| j	  }| | d S d S d S d S )N
start_timeT        Fr   )r   r   VADEventTyper   r	   r   r   r   speech_durationrr   rT   r   rw   r   INFERENCE_DONErW   raw_accumulated_speechr   r   r   rX   r   r~   r   rd   r   r   )r9   rO   rG   r0   r0   r1   _on_vad_event  s<   



zAudioRecognition._on_vad_eventrG   rH   c                   s   j rjsjdkrd S     jdjd jr$jdkr$jnd tjtd	 	 	 dd fdd}j	d urDj	
  t|jjj_	d S )NrM   user)rolecontentr   last_speaking_timer&   last_final_transcript_timespeech_start_timer4   rQ   c                   s  j } }d urjI d H stdj nt| t	dm}d}d }z
 I d H }jI d H }|d urJ||k rJj}W n tyY   td Y nw |tjtt jt d  jddddddjdddddtj|tj|pdtj|tjjpd	i W d    n1 sw   Y  W d    n1 sw   Y  |}| r|| t  7 }|dkrztj j!" |d
I d H  W n
 tj#y   Y nw j$rt%j$t&j$ nd}	d }
d }d }d }|d ur| d ur|d ur|}
| }t'||  d}t |  }j()t*j+|	|pd||
|d}|rQ|tj,j+tj-|	tj.|p2dtj/|p8di |0  d _1d	_+g _$d _2d _3d _4d_5d S )Nz*Turn detector does not support language %seou_detectionr   zError predicting end of turnT)exclude_function_callexclude_instructionsexclude_empty_messageexclude_handoffexclude_config_update)exclude_audioexclude_imageexclude_timestampexclude_metricsr   rp   rE   )r!   r#   r%   r)   r*   r'   r(   F)6rx   r   rD   r   r   r`   r	   r   r   start_as_current_spanrI   rB   ry   	Exception	exceptionset_attributesr   ATTR_CHAT_CTXjsondumpsr   ChatContextitems_EOU_MAX_HISTORY_TURNSr   to_dictATTR_EOU_PROBABILITYATTR_EOU_UNLIKELY_THRESHOLDATTR_EOU_DELAYATTR_EOU_LANGUAGEr   r   r   r   r   r   r   r   r   r   rr   rb   r   r   ATTR_USER_TRANSCRIPTATTR_TRANSCRIPT_CONFIDENCEATTR_TRANSCRIPTION_DELAYATTR_END_OF_TURN_DELAYendr   r   r   r   r   )r  r  r  endpointing_delayuser_turn_spaneou_detection_spanend_of_turn_probabilityrB   extra_sleepconfidence_avgr'   r(   r)   r*   	committedrG   r9   r!   turn_detectorr0   r1   _bounce_eou_task  s   


 /



z=AudioRecognition._run_eou_detection.<locals>._bounce_eou_task)NNN)r  r&   r  r&   r  r&   r4   rQ   )r|   r   r~   r   add_messager{   r   log_exceptionsr   rw   r   r   r   r   r   r   )r9   rG   r!   r-  r0   r+  r1   r   	  s0   


{

z#AudioRecognition._run_eou_detectionstt_node
io.STTNodeaudio_inputAsyncIterable[rtc.AudioFrame]r   asyncio.Task[None] | Nonec                   s   |d urt |I d H  ||t }t|r|I d H }|d u r#d S t|trK|2 z3 d H W }t|tjs?J dt	| | 
|I d H  q*6 d S d S )Nz&STT node must yield SpeechEvent, got: )r   r   r   r   iscoroutinerz   r   r   r   r   r   )r9   r0  r2  r   noderO   r0   r0   r1   r     s    


zAudioRecognition._stt_taskvad.VADc              	     s   |d urt |I d H  | tjtdd fdd}t| }z$2 z3 d H W }| |I d H  q(6 W t |I d H  	 I d H  d S t |I d H  	 I d H  w )Nr   r4   rQ   c                    s$    2 z3 d H W }  |  q6 d S r@   )
push_frame)r   r2  streamr0   r1   _forward  s   z,AudioRecognition._vad_task.<locals>._forwardr   )
r   r   r:  r   r/  r   r   r   r   r   )r9   r   r2  r   r;  forward_taskrO   r0   r9  r1   r     s   
zAudioRecognition._vad_taskNr   r&   
trace.Spanc                 C  s`   | j r| j  r| j S |rt|d nd }tjd|d| _ | jj }r-|jr-t| j |j | j S )Ni ʚ;	user_turnr   )	r   is_recordingr   r   
start_spanrq   _room_iolinked_participantr   )r9   r   start_time_nsroom_ior0   r0   r1   r     s   z'AudioRecognition._ensure_user_turn_span)rg   r   rh   rN   r   ri   r   rj   rk   rl   rm   r$   rn   r$   r4   rQ   )rm   r   rn   r   rk   r   r4   rQ   r   )r   r   r   r    r4   rQ   )r   ri   r4   rQ   )r   rj   r4   rQ   )
r   r    r   r$   r   r$   r!   r    r4   rQ   rJ   )rO   rY   r4   rQ   re   )F)rG   rH   r!   r    r4   rQ   )r0  r1  r2  r3  r   r4  r4   rQ   )r   r7  r2  r3  r   r4  r4   rQ   r@   )r   r&   r4   r=  )r,   r-   r.   r   r   r   r   r   r   r   r   r   r   r   rK   r   r   r   r/  r   r   r   r   r   r   r0   r0   r0   r1   rf   c   s<    
5





K
 
" 


rf   )1
__future__r   r   r  r   r   collections.abcr   dataclassesr   typingr   r   r   r   opentelemetryr	   livekitr
   rp   r   r   r   r   logr   	telemetryr   r   typesr   r   r   r   r   _utilsr   agentr   agent_sessionr   r   r  r   r2   r3   TurnDetectionModerN   rf   r0   r0   r0   r1   <module>   s>    