o
    i                    @  sh  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZmZm Z  d
dl!m"Z"m#Z#m$Z$m%Z%m&Z& d
dl'm(Z( d
dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d
dl0m1Z1m2Z2mZ3 d
dl4m5Z5 d
dl6m7Z7m8Z8m9Z9 d
dl:m;Z; ddl<m=Z= ddl>m?Z?m@Z@mAZAmBZB ddlCmDZDmEZEmFZFmGZGmHZH ddlImJZJmKZKmLZLmMZMmNZNmOZO ddlPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[ ddl\m]Z]m^Z^m_Z_ erd
dlm`Z` ddlambZb ejcd dZdejcd dZeeG d d! d!Zfejcd! d"ZgeG d#d$ d$ZhG d%d deEZidS )&    )annotationsN)AsyncIterable	CoroutineSequence)	dataclass)TYPE_CHECKINGAnycast)contexttrace)rtc)MessageGeneration)Metadata   )llmsttttsutilsvad)FunctionToolInfoRawFunctionToolInfoStopResponseToolFlagget_fnc_tool_nameslogger)
EOUMetrics
LLMMetricsRealtimeModelMetrics
STTMetrics
TTSMetrics
VADMetrics)trace_typestracerr   )split_words)	NOT_GIVENFlushSentinel
NotGivenOr)is_given   )_set_participant_attributes)AgentModelSettings_get_activity_task_info_set_activity_task_info)AudioRecognitionRecognitionHooksTurnDetectionMode_EndOfTurnInfo_PreemptiveGenerationInfo)AgentFalseInterruptionEvent
ErrorEventFunctionToolsExecutedEventMetricsCollectedEventSpeechCreatedEventUserInputTranscribedEvent)ToolExecutionOutput_AudioOutput_TextOutput_TTSGenerationDataperform_audio_forwardingperform_llm_inferenceperform_text_forwardingperform_tool_executionsperform_tts_inferenceremove_instructionsupdate_instructions)DEFAULT_INPUT_DETAILSInputDetailsSpeechHandle)mcp)AgentSessionAgentActivityagents_activityrG   agents_speech_handlec                   @  s   e Zd ZU ded< ded< dS )_OnEnterDatarI   sessionr+   agentN__name__
__module____qualname____annotations__ rU   rU   e/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/voice/agent_activity.pyrM   T   s   
 rM   agents_activity_on_enterc                   @  sF   e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< dS )_PreemptiveGenerationrG   speech_handlellm.ChatMessageuser_messager3   infollm.ChatContextchat_ctxlist[llm.Tool | llm.Toolset]toolszllm.ToolChoice | Nonetool_choicefloat
created_atNrP   rU   rU   rU   rV   rX   ]   s   
 rX   c                   @  sp  e Zd ZdddZdddZedddZedddZedddZedddZ	edddZ
edddZedddZeddd Zed	d"d#Zed
d%d&Zedd'd(Zedd)d*Zdd-d.Zdd0d1Zd2d3dd7d8Zeeeed9dd@dAZdBdBdCddJdKZddLdMZddNdOZedPddQdRZdBdSddVdWZddXdYZddZd[Zdd\d]Z dd_d`Z!ddadbZ"ddcddZ#ddgdhZ$ddjdkZ%eed2dlddudvZ&eeeeed2e'dwdddZ(dddZ)ddddZ*dddddZ+dddZ,dddZ-ddddZ.e/j0e1ddddZ2dddZ3dddZ4dddZ5dddZ6d ddZ7d!ddZ8dddZ9d"ddZ:d"ddZ;d#ddZ<d$ddZ=dBdd$ddZ>d%ddńZ?d&ddȄZ@e/j0e1dd'dd̄ZAd(dd΄ZBd)ddфZCe/j0e1dd*ddքZDd*dd؄ZEe/j0e1ddBdBdBdBdٜd+ddZFdBdBdBdBdٜd+ddZGe/j0e1ddBdBdd,ddZHe/j0e1ddBdd-ddZIdBdd-ddZJd.ddZK	Bd/d2dd0ddZLed1ddZMed2ddZNed3ddZOed4dd ZPdBS (5  rJ   rO   r+   sessrI   returnNonec                 C  s  ||| _ | _d | _d | _d | _t | _d | _d| _	d| _
d| _d | _g | _t | _| j  d | _d | _d | _d| _t | _d | _d | _g | _d | _g | _g | _d | _d | _t| jtj rm| jj!j"rm| j#smt$dt%| j j"rw| j j"n| jj"}| &|| _'t | _(d S )NFTzthe RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False, disable turn_detection in the RealtimeModel and use VAD on the AgentSession instead))_agent_session_rt_session_realtime_spans_audio_recognitionasyncioLock_lock_tool_choice_started_closed_scheduling_paused_current_speech	_speech_qEvent_user_silence_eventset_paused_speech_false_interruption_timer_cancel_speech_pause_task_stt_eos_received
_q_updated_scheduling_atask_user_turn_completed_atask_speech_tasks_preemptive_generation_drain_blocked_tasks
_mcp_tools_on_enter_task_on_exit_task
isinstancer   RealtimeModelcapabilitiesturn_detectionallow_interruptions
ValueErrorr(   _validate_turn_detection_turn_detection_background_speeches)selfrO   rd   r   rU   rU   rV   __init__j   sR   




	
zAgentActivity.__init__r   TurnDetectionMode | Nonec                 C  sB  |d urt |ts|S t |tr|nd }| j}| j}| j}|dkr*|s*td d }|dkr7|s7td d }t |tjrz|dkrL|jj	sLtd d }|dkrXtd d }n|rm|dkrm|jj	rmtd| d	 d }|jj	sy|ry|d u ryd}n|dkrtd
 d }|s|r|jj
st |tjr| jr|d u rtd |S )Nr   ztturn_detection is set to 'vad', but no VAD model is provided. Pass a VAD instance, e.g. Agent(vad=silero.VAD.load())r   zrturn_detection is set to 'stt', but no STT model is provided. Pass an STT instance, e.g. Agent(stt=deepgram.STT())realtime_llmzturn_detection is set to 'realtime_llm', but the LLM is not a RealtimeModel or the server-side turn detection is not supported/enabled, ignoring the turn_detection settingzcturn_detection is set to 'stt', but the LLM is a RealtimeModel, ignoring the turn_detection settingzturn_detection is set to 'zm', but the LLM is a RealtimeModel and server-side turn detection enabled, ignoring the turn_detection settingzKturn_detection is set to 'realtime_llm', but the LLM is not a RealtimeModelz{VAD is not set. Enabling VAD is recommended when using LLM and non-streaming STT for more responsive interruption handling.)r   strr   r   r   r   warningr   r   r   	streamingLLMr   )r   r   mode	vad_model	stt_model	llm_modelrU   rU   rV   r      sn   

z&AgentActivity._validate_turn_detectionboolc                 C     | j S N)rr   r   rU   rU   rV   scheduling_paused      zAgentActivity.scheduling_pausedc                 C  r   r   )rh   r   rU   rU   rV   rN      r   zAgentActivity.sessionc                 C  r   r   )rg   r   rU   rU   rV   rO      r   zAgentActivity.agentlist[mcp.MCPServer] | Nonec                 C     t | jjr
| jjS | jjS r   )r(   rg   mcp_serversrh   r   rU   rU   rV   r      s
   
zAgentActivity.mcp_serversc                 C     t | jjr
| jjS | jjjS r   )r(   rg   r   rh   optionsr   rU   rU   rV   r     
   
z!AgentActivity.allow_interruptionsrb   c                 C  r   r   )r(   rg   min_endpointing_delayrh   r   r   rU   rU   rV   r     r   z#AgentActivity.min_endpointing_delayc                 C  r   r   )r(   rg   max_endpointing_delayrh   r   r   rU   rU   rV   r     r   z#AgentActivity.max_endpointing_delayllm.RealtimeSession | Nonec                 C  r   r   )ri   r   rU   rU   rV   realtime_llm_session  r   z"AgentActivity.realtime_llm_sessionSpeechHandle | Nonec                 C  r   r   )rs   r   rU   rU   rV   current_speech#  r   zAgentActivity.current_speechr_   c                 C  s   | j j| jj | j S r   )rh   r`   rg   r   r   rU   rU   rV   r`   '  s   zAgentActivity.toolsc                 C  r   r   )r(   rg   min_consecutive_speech_delayrh   r   r   rU   rU   rV   r   -  r   z*AgentActivity.min_consecutive_speech_delayc                 C  s&   t | jjr
| jjn| jjj}|du S )NT)r(   rg   use_tts_aligned_transcriptrh   r   )r   use_aligned_transcriptrU   rU   rV   r   5  s
   

z(AgentActivity.use_tts_aligned_transcriptinstructionsr   c                   s`   || j _tj|| j jd}| j j| | jd ur%| j|I d H  d S t| j j|dd d S )N)r   agent_idTr   add_if_missing)	rg   _instructionsr   AgentConfigUpdateid	_chat_ctxinsertri   rD   )r   r   config_updaterU   rU   rV   rD   ?  s   


z!AgentActivity.update_instructionsr`   c                   s   t t| jj}t t|}t|| pd }t|| pd }tdd |D  }|| j_tj||| jjd}t	|
 |_| jj| | jd ur[| jt	| j
 I d H  t| jtjrr| | jjj|dI d H  d S d S )Nc                 S  s   i | ]}|j |qS rU   )r   ).0toolrU   rU   rV   
<dictcomp>W  s    z.AgentActivity.update_tools.<locals>.<dictcomp>)tools_addedtools_removedr   r`   )rw   r   rg   _toolslistvaluesr   r   r   ToolContextflattenr   r   ri   update_toolsr`   r   r   update_chat_ctxcopy)r   r`   old_tool_namesnew_tool_namesr   r   r   rU   rU   rV   r   P  s&   
 zAgentActivity.update_toolsT)exclude_invalid_function_callsr^   r]   r   c                  s^   |j |r| jntd}|| j_| jd ur$t| | j|I d H  d S t|| jj	dd d S )Nr   Tr   )
r   r`   r%   rg   r   ri   rC   r   rD   r   )r   r^   r   rU   rU   rV   r   k  s   


zAgentActivity.update_chat_ctx)ra   r   r   r   ra   !NotGivenOr[llm.ToolChoice | None]r   NotGivenOr[float]r   $NotGivenOr[TurnDetectionMode | None]c                C  s   t |r|| _| jd ur| jj| jd t |r5| |}| jdks(|dkr5| jd ur5| j  d | _| j	rC| j	j|||d d S d S )Nra   manual)r   r   r   )
r   r(   ro   ri   update_optionsr   r   ry   cancelrk   )r   ra   r   r   r   rU   rU   rV   r   y  s"   






zAgentActivity.update_optionsNrY   namecoroCoroutine[Any, Any, Any]rY   r   
str | Noneasyncio.Task[Any]c                  s   t }d}durt}t d fdd}tj| |dj 	fdd t
d	 durPj dfdd}	| 	fdd t | |ret| S )z
        This method must only be used for tasks that "could" create a new SpeechHandle.
        When draining, every task created with this method will be awaited.
        Nre   r   c               	     s0   t } z I d H W t |  S t |  w r   )otel_contextattachdetach)token)r   current_contextrU   rV   _context_aware_coro  s
   

z>AgentActivity._create_speech_task.<locals>._context_aware_coror   c                   s    j S r   )r   remove_)r   taskrU   rV   <lambda>      z3AgentActivity._create_speech_task.<locals>.<lambda>rY   r   asyncio.Taskrf   c                   s$   t dd  jD r   d S d S )Nc                 s      | ]}|  V  qd S r   done)r   r   rU   rU   rV   	<genexpr>      zRAgentActivity._create_speech_task.<locals>._mark_done_if_needed.<locals>.<genexpr>)all_tasks
_mark_doner   r   rU   rV   _mark_done_if_needed  s   z?AgentActivity._create_speech_task.<locals>._mark_done_if_neededc                   s      S r   )_wake_up_scheduling_taskr   r   rU   rV   r     s    )re   r   )r   r   re   rf   )_AgentActivityContextVarrw   _SpeechHandleContextVarr   get_currentrl   create_taskr   appendadd_done_callbackr.   r   reset)r   r   rY   r   tktk1r   r   rU   )r   r   r   rY   r   rV   _create_speech_task  s&   




z!AgentActivity._create_speech_taskc              	     s~   j 4 I d H   jr	 W d   I d H  d S tjdtj jjid}z}  j_	t
j|dd, t jtjr= j  t jtjrI j  t jtjrU j  W d    n1 s_w   Y    I d H  d _tjdt
|tj jjidtjtdd fdd} j| dd  _}t|dd W |  n|  w W d   I d H  d S 1 I d H sw   Y  d S )Nstart_agent_activity
attributesF)end_on_exitTon_enter)r
   r   r   re   rf   c               	     sJ   t  j jd} zt| } j I d H  W t| d S t| w )N)rN   rO   )rM   rh   rg   _OnEnterContextVarrw   r   r   )datar   r   rU   rV   _traceable_on_enter  s   
z0AgentActivity.start.<locals>._traceable_on_enterAgentTask_on_enterr   inline_taskre   rf   )rn   rp   r#   
start_spanr"   ATTR_AGENT_LABELrO   labelrg   	_activityr   use_spanr   r   r   prewarmr   STTr   TTS_start_sessionstart_as_current_spanset_span_in_contextr   log_exceptionsr   r   r   r.   end)r   r  r  r   rU   r   rV   start  sF   



.zAgentActivity.startc              	     s  | j  s
J dt| jtjr!| jd| j | jd| j t| jtj	r8| jd| j | jd| j t| j
t
jrO| j
d| j | j
d| j t| jtjr^| jd| j | jrtjtdd d	d
 tj fdd| jD ddiI d H }g }t| j|ddD ]\}}t|trtjd| |d q|| q|| _t| jtjrj| j | _| jd| j | jd| j | jd| j | jd| j | jd| j | jd| j t | j!j" z| j#| j!j$I d H  W n tj%y   t&d Y nw z| j'| j!j(I d H  W n tj%y%   t&d Y nw z| j)t*| j+, I d H  W n tj%yG   t&d Y nw tj-t.t/j0f dd| _1| jj2j3si| j
si| j4j5j6ritd n%t| jtjrzt#| j!j"| j!j$dd W n t7y   t&d Y nw tj8| j!j$t9| j+pd | j!j:d}t*| j+, |_;| j!j"<| | = I d H  t>| j4| | jr| j!j?nd | j| j@| jA| jBd| _C| jCD  d S )!Nz/_start_session should only be used when locked.metrics_collectederrorr   
mcp_servermcp.MCPServerre   list[mcp.MCPTool]c                   s$   | j s|  I d H  |  I d H S r   )initialized
initialize
list_tools)r  rU   rU   rV   _list_mcp_tools_task  s   z:AgentActivity._start_session.<locals>._list_mcp_tools_taskc                 3  s    | ]} |V  qd S r   rU   )r   sr  rU   rV   r     r   z/AgentActivity._start_session.<locals>.<genexpr>return_exceptionsTF)strictz%failed to list tools from MCP server )exc_infogeneration_createdinput_speech_startedinput_speech_stopped#input_audio_transcription_completed!failed to update the instructionszfailed to update the chat_ctxzfailed to update the toolsd   )maxsizezaudio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model.r   )r   r   r   )hooksr   r   r   r   r   )r  r  re   r  )Ern   lockedr   r   r   on_on_metrics_collected	_on_errorr   r  r   r  r   VADr   r   r  r   rl   gatherzipBaseExceptionr  extendr   r   rN   ri   _on_generation_created_on_input_speech_started_on_input_speech_stopped'_on_input_audio_transcription_completedrC   rg   r   rD   r   RealtimeError	exceptionr   r^   r   r   r`   r   BoundedDictr   r   Spanrj   r   audio_outputrh   outputaudior   r   r   r   r   r   _resume_scheduling_taskr/   stt_noder   r   r   rk   r  )r   gatheredr`   r  resinitial_configrU   r  rV   r    s   

"
	zAgentActivity._start_sessiondrain_agent_activityc              	     s   t  }|tj jj tjdtj jjidt	j
tdd fdd} j4 I d H :  j| dd	  _}t|d
d    z jI d H  W n	 tyU   Y nw   I d H  W d   I d H  d S 1 I d H snw   Y  d S )Non_exitr   r   re   rf   c                     s    j  I d H  d S r   )rg   rD  rU   r   rU   rV   _traceable_on_exitz  s   z/AgentActivity.drain.<locals>._traceable_on_exitAgentTask_on_exitr   Tr  r  )r   get_current_spanset_attributer"   r  rg   r  r#   r  r   r  r   rn   r   r   r.   _cancel_preemptive_generation	Exception_pause_scheduling_task)r   current_spanrE  r   rU   r   rV   drains  s*   
.zAgentActivity.drainblocked_tasksrO  list[asyncio.Task] | Nonec                  sZ   | j  s
J d| jrd S d| _|pg | _|   | jd ur+t| jI d H  d S d S )N4_finalize_main_task should only be used when locked.T)rn   r*  rr   r   r   r}   rl   shield)r   rO  rU   rU   rV   rK    s   

z$AgentActivity._pause_scheduling_taskc                   s<   | j  s
J d| jsd S d| _tj|  dd| _d S )NrQ  F_scheduling_taskr   )rn   r*  rr   rl   r   rS  r}   r   rU   rU   rV   r>    s   z%AgentActivity._resume_scheduling_taskc              	     s   | j 4 I d H + tjdtj| jjid}z|  I d H  W |  n|  w W d   I d H  d S 1 I d H s9w   Y  d S )Nresume_agent_activityr   )	rn   r#   r  r"   r  rO   r  r  r  )r   spanrU   rU   rV   resume  s   .zAgentActivity.resumec                 C  s   | j   d S r   )r|   rw   r   rU   rU   rV   r     s   z&AgentActivity._wake_up_scheduling_tasklist[asyncio.Task]c             	     s   | j 4 I d H 4 tjdtj| jjid}z| j|dI d H  |  I d H  W |	  n|	  w W d   I d H  d S 1 I d H sBw   Y  d S )Npause_agent_activityr   rN  )
rn   r#   r  r"   r  rg   r  rK  _close_sessionr  )r   rO  rU  rU   rU   rV   pause  s   .zAgentActivity.pausec                   s  | j  s
J dt| jtjr!| jd| j | jd| j t| jtjr]| j	d ur]| j	d| j
 | j	d| j | j	d| j | j	d| j | j	d| j | j	d| j t| jtjrt| jd| j | jd| j t| jtjr| jd| j | jd| j t| jtjr| jd| j | j	d ur| j	 I d H  | jd ur| j  | jd ur| j I d H  | jrtjdd	 | jD d
diI d H  | j| jddI d H  d | _d S )Nz/_close_session should only be used when locked.r  r  r"  r#  r$  r%  c                 s  r   r   )aclose)r   r  rU   rU   rV   r     r   z/AgentActivity._close_session.<locals>.<genexpr>r  TF)old_task	interrupt)rn   r*  r   r   r   offr,  r-  r   ri   r3  r4  r5  r6  r   r  r   r  r   r.  r[  rj   clearrk   r   rl   r/  _cancel_speech_pauserz   r   rU   rU   rV   rY    sP   




zAgentActivity._close_sessionc              	     s   | j 4 I d H H | jr	 W d   I d H  d S d| _|   |  I d H  tj| jdd I d H  | jd urAtj	
| jI d H  d | j_W d   I d H  d S 1 I d H sVw   Y  d S )NTFforce)rn   rq   rI  rY  rl   r/  _interrupt_background_speechesr}   r   aiocancel_and_waitrg   r	  r   rU   rU   rV   r[    s   

.zAgentActivity.acloseframertc.AudioFramec                 C  sd   | j sd S t| jo| jj o| jjj}|s!| jd ur!| j| | j	d ur0| j	j||d d S d S )N)skip_stt)
rp   r   rs   r   rh   r    discard_audio_if_uninterruptibleri   
push_audiork   )r   rf  should_discardrU   rU   rV   rj    s   

zAgentActivity.push_audiortc.VideoFramec                 C  s(   | j sd S | jd ur| j| d S d S r   )rp   ri   
push_video)r   rf  rU   rU   rV   rm  &  s
   
zAgentActivity.push_video)r=  r   add_to_chat_ctxtextstr | AsyncIterable[str]r=  )NotGivenOr[AsyncIterable[rtc.AudioFrame]]r   NotGivenOr[bool]rn  rG   c                C  s   t |s| js| jjjr| jjjrtdt| jtj	r,| jj
jr,|du r,td t}tjt |r4|n| jd}| jdt|ddd | j| j|||pNd |t d	|d
d}|| j | |tj |S )Nz7trying to generate speech from text without a TTS modelFzthe RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.say(), disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead)r   speech_createdTsayrY   user_initiatedsourcerY   ro  r=  rn  model_settingszAgentActivity.tts_sayr   )r(   r   rh   r<  r=  audio_enabledRuntimeErrorr   r   r   r   r   r   r   r%   rG   creater   emitr8   r   	_tts_taskr,   r   _on_pipeline_reply_done_schedule_speechSPEECH_PRIORITY_NORMAL)r   ro  r=  r   rn  handler   rU   rU   rV   rt  -  sR   	zAgentActivity.say)r[   r^   r   ra   r   schedule_speechinput_detailsr[   "NotGivenOr[llm.ChatMessage | None]"NotGivenOr[llm.ChatContext | None]NotGivenOr[str]NotGivenOr[llm.ToolChoice]r  r  rF   c                C  s  t | jtjr| jjjr|du rtd t}| jd u r tdt	
 }t|s9|d ur9t| }	r9|	jd ur9d}| j}
td  }rt|j| jkrt|j| jkrtg }|
D ]}d }t |tjtjfrc|j}|rl|jtj@ rlqS|| qS|}
tjt|r||n| j|d}| jdt |ddd	 t | jtjr| j!| j"||r|j#nd |pd t$|d
d|dd nEt | jtj%r|rd&| jj'|g}| j!| j(||p| jj)|
t|r|nd |pd t$t*|s| j+d u r|n| j+d
d|dd}|,| j- |r| .|tj/ |S )NFzthe RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.generate_reply(), disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent insteadz-trying to generate reply without an LLM modelnoner   r  rs  Tgenerate_replyru  r   )rY   
user_inputr   ry  AgentActivity.realtime_replyr   
)rY   r^   r`   new_messager   ry  AgentActivity.pipeline_reply)0r   r   r   r   r   r   r   r%   r{  rl   current_taskr(   r-   function_callr`   r   getrO   rg   rN   rh   RawFunctionToolFunctionToolr\   flagsr   IGNORE_ON_ENTERr   rG   r|  r   r}  r8   r   _realtime_reply_tasktext_contentr,   r   joinr   _pipeline_reply_taskr   r   ro   r   r  r  r  )r   r[   r^   r   ra   r   r  r  r   	task_infor`   on_enter_datafiltered_toolsr   r\   r  rU   rU   rV   _generate_replya  s   



zAgentActivity._generate_replyc                 C  s$   | j d ur| j j  d | _ d S d S r   )r   rY   _cancelr   rU   rU   rV   rI    s   

z+AgentActivity._cancel_preemptive_generationFrb  list[SpeechHandle]c                 C  s0   g }| j D ]}|s|jr||j|d q|S )Nra  )r   r   r   r]  )r   rb  interrupted_speechesspeechrU   rU   rV   rc    s   

z,AgentActivity._interrupt_background_speechesra  asyncio.Future[None]c                  s   |    tjd   | j|d| jdur"| jj|d | j | jD ]\}}}|j|d | q%| jdur@| j  sI 	d  S d	 fdd}D ]}|
| qS S )
zInterrupt the current speech generation and any queued speeches.

        Returns:
            An asyncio.Future that completes when the interruption is fully processed
            and chat context has been updated
        Nra  r   rG   re   rf   c                   s0      stdd D r d  d S d S d S )Nc                 s  r   r   r   )r   r  rU   rU   rV   r     r   zCAgentActivity.interrupt.<locals>.on_playout_done.<locals>.<genexpr>)r   r   
set_resultr   futurer  rU   rV   on_playout_done  s   z0AgentActivity.interrupt.<locals>.on_playout_done)r   rG   re   rf   )rI  rl   Futurerc  rs   r]  r   rt   ri   r  r   )r   rb  r   r  r  rU   r  rV   r]    s$   




zAgentActivity.interruptc                 C  s,   | j r| j   | jd ur| j  d S d S r   )rk   clear_user_turnri   clear_audior   rU   rU   rV   r    s
   

zAgentActivity.clear_user_turntranscript_timeoutstt_flush_durationc                C  sT   d}| j d ur| j   | j  d}| jd usJ | jj| jjj |||d d S )NFT)audio_detachedr  r  
skip_reply)ri   commit_audiorh   r  rk   commit_user_turninputrz  )r   r  r  r  rU   rU   rV   r    s   




zAgentActivity.commit_user_turnr  priorityintc                 C  sz   | j r	|s	td| jr| j rtd d S 	 zt| j| t	
 |f W n
 ty1   Y nw q|  |   d S )NzEcannot schedule new speech, the speech scheduling is draining/pausingzRattempting to schedule a new SpeechHandle, but the scheduling_task is not running.)rr   r{  r}   r   r   r   heapqheappushrt   timeperf_counter_ns	TypeError_mark_scheduledr   )r   r  r  rb  rU   rU   rV   r    s&   

zAgentActivity._schedule_speechr   c                   sD  d}	 | j  I d H  | j   | jrYt| j\}}}| r%d | _q|| _| jdkrDt	
| jt |  I d H  | rDd | _q|  | I d H  d | _t }| jsg }| jD ]}t|}|sltd q^|jspq^||j q^g }| jD ]}|| jv rq|t|}|r|j|v rq||| q|| jrt|dkrd S q)N        Tz-blocked task without activity info; skipping.r   )r|   waitr_  rt   r  heappopr   rs   r   rl   sleepr  _authorize_generation_wait_for_generationr   r-   r   r  rY   r   r   rr   len)r   last_playout_tsr   r  blocked_handlesr   r\   to_waitrU   rU   rV   rS  3  sV   






zAgentActivity._scheduling_taskevHSTTMetrics | TTSMetrics | VADMetrics | LLMMetrics | RealtimeModelMetricsc                 C  sv   t d  }rt|tst|tr|j|_t|tr/| jd ur/| j	|j
d  }r/t|| | jdt|d d S )Nr  metrics)r   r  r   r   r    r   	speech_idr   rj   pop
request_idtrace_utilsrecord_realtime_metricsrh   r}  r7   )r   r  rY   realtime_spanrU   rU   rV   r,  n  s   
z#AgentActivity._on_metrics_collectedr  Cllm.LLMError | stt.STTError | tts.TTSError | llm.RealtimeModelErrorc                 C  s   t |tjrt|| jd}| jd| n>t |tjr*t|| jd}| jd| n)t |tjr?t|| jd}| jd| nt |t	j
rSt|| j	d}| jd| | j| d S )N)r  rw  r  )r   r   LLMErrorr5   rh   r}  RealtimeModelErrorr   STTErrorr   TTSErrorr-  )r   r  error_eventrU   rU   rV   r-  ~  s   zAgentActivity._on_errorr   llm.InputSpeechStartedEventc                 C  sD   | j d u r| jd z|   W d S  ty!   td Y d S w )NspeakingzdRealtimeAPI input_speech_started, but current speech is not interruptable, this should never happen!)r   rh   _update_user_stater]  r{  r   r8  r   r   rU   rU   rV   r4    s   

z&AgentActivity._on_input_speech_startedllm.InputSpeechStoppedEventc                 C  s8   | j d u r| jd |jr| jtddd d S d S )N	listening F
transcriptis_final)r   rh   r  user_transcription_enabled_user_input_transcribedr9   )r   r  rU   rU   rV   r5    s   

z&AgentActivity._on_input_speech_stoppedllm.InputTranscriptionCompletedc                 C  sX   | j t|j|jd |jr*tjd|jg|jd}| jj	j
| | j | d S d S )Nr  user)rolecontentr   )rh   r  r9   r  r  r   ChatMessageitem_idrg   r   itemsr   _conversation_item_added)r   r  msgrU   rU   rV   r6    s   z5AgentActivity._on_input_audio_transcription_completedllm.GenerationCreatedEventc                 C  s|   |j rd S | jrtd d S tj| jtddd}| j	dt
|ddd | j| j||t d	|d
d | |tj d S )NzFskipping new realtime generation, the speech scheduling is not runningr=  modalityr  rs  Fr  ru  )rY   generation_evry  z!AgentActivity.realtime_generationr   )rv  rr   r   r   rG   r|  r   rF   rh   r}  r8   r   _realtime_generation_taskr,   r  r  )r   r  r  rU   rU   rV   r3    s.   
	z$AgentActivity._on_generation_createdc                 C  s&  | j j}|jo|jd u}t| jtjr| jjjrd S | j	d ur:|j
dkr:| jd ur:| jj}tt|dd|j
k r:d S | jd urD| j  | jd ur| jjs| jjr| j| _| jr`| j  d | _|r|| j jjr|| j jjjr|| j jj  | j d d S | jd ur| j  | j  d S d S d S d S )Nr   Tsplit_characterr  )rh   r   resume_false_interruptionfalse_interruption_timeoutr   r   r   r   r   r   min_interruption_wordsrk   current_transcriptr  r$   ri   start_user_activityrs   interruptedr   rx   ry   r   r<  r=  	can_pauserZ  _update_agent_stater]  )r   opt	use_pausero  rU   rU   rV   _interrupt_by_audio_activity  s8   








z*AgentActivity._interrupt_by_audio_activityvad.VADEvent | Nonec                 C  sT   t   }|r||j }| jjd|d | j  d| _| jr(| j  d | _d S d S )Nr  last_speaking_timeF)	r  speech_durationrh   r  rv   r_  r{   ry   r   )r   r  speech_start_timerU   rU   rV   on_start_of_speech  s   



z AgentActivity.on_start_of_speechc                 C  sf   t   }|r||j }nd| _| jjd|d | j  | jr/| jjj	 }d ur1| 
| d S d S d S )NTr  r  )r  silence_durationr{   rh   r  rv   rw   rx   r   r  _start_false_interruption_timer)r   r  speech_end_timetimeoutrU   rU   rV   on_end_of_speech
  s   
zAgentActivity.on_end_of_speechvad.VADEventc                 C  sz   | j dv rd S |j| jjjk}|r"| j dks| jr|jdkr"|   |jr6|j| jjj	d kr6| j
  d S | j
  d S )Nr   r   r   r   r   )r   r  rh   r   min_interruption_durationr{   raw_accumulated_silencer  r  r   rv   r_  rw   )r   r  active_speechrU   rU   rV   on_vad_inference_done  s   


z#AgentActivity.on_vad_inference_donestt.SpeechEventr  bool | Nonec                C  s   t | jtjr| jjjrd S | jt|jd j	|jd j
d|jd jd |jd j
rL| jdvrN|   |du rP| jrR| jjj }d urT| | d S d S d S d S d S d S )Nr   Flanguager  r  
speaker_idr  )r   r   r   r   user_transcriptionrh   r  r9   alternativesr  ro  r  r   r  rx   r   r  r  r   r  r  r  rU   rU   rV   on_interim_transcript8  s(   


	z#AgentActivity.on_interim_transcript)r  c                C  s   t | jtjr| jjjrd S | jt|jd j	|jd j
d|jd jd | jrG| jdvrG|   |du rG| jrG| jjj }d urG| | t| j| jd| _d S )Nr   Tr  r  F)r\  )r   r   r   r   r  rh   r  r9   r  r  ro  r  rk   r   r  rx   r   r  r  rl   r   r`  rz   r  rU   rU   rV   on_final_transcriptT  s(   




z!AgentActivity.on_final_transcriptr\   r3   c              	   C  s   | j jjr| js| jd ur| jjrt| jtjsd S | 	  tj
d|jg|jd}| jj }| j||dtddd}t|||| | j | jt d| _d S )Nr  r  r  transcript_confidenceFr=  r  )r[   r^   r  r  )rY   r[   r\   r^   r`   ra   rc   )rh   r   preemptive_generationrr   rs   r  r   r   r   rI  r  new_transcriptr  rg   r^   r   r  rF   rX   r`   ro   r  r   )r   r\   r[   r^   rY   rU   rU   rV   on_preemptive_generationw  s<   z&AgentActivity.on_preemptive_generationr2   c                 C  s   | j r0|   tjdd|jid | jjr.tjd|jg|j	d}| j
jj| | j| dS | jd urb| jdkrb| jd urb| jjrb| jjsb| jjjdkrbtt|jdd	| jjjk rb|   d
S | j}| j| ||dd| _dS )Nz0skipping user input, speech scheduling is pausedr  extrar  r  Tr   r   r  F'AgentActivity._user_turn_completed_taskr   )rr   rI  r   r   r  rh   _closingr   r  r  rg   r   r  r   r  r   r   rs   r   r  r   r  r  r$   r~   r   _user_turn_completed_task)r   r\   r[   r\  rU   rU   rV   on_end_of_turn  sD   



zAgentActivity.on_end_of_turnr\  asyncio.Task[None] | Nonec                   s  |d ur
|I d H  t j| jdd I d H  tjd|jg|jd}t| jtjrQ| jj	j
r/d S | jd urQ|jrL|jdkrJ| jjj| | j| d S | j  | j }d ur|jsgtjdd|jid d S | | jI d H  | I d H  | jd ur| j  | jrtjd	d|jid | jjr| jjj| | j| d S | jj }t  }z| jj!||d
I d H  W n t"y   Y d S  t#y   t$d Y d S w t  | }t| jtjrd }n| jd u rd S | jrtjdd|jid |r| jjr| jjj| | j| d S i }|j%d ur|j%|d< |j&d ur'|j&|d< |j'd ur2|j'|d< |j(d ur=|j(|d< ||d< |d urI||_)d }	| j* }
r|
j+j|j,kr|
j-|r|
j.| j.kr|
j/| j0kr|
j1}	||
j2_)| j3|	t4j5d tj6ddt |
j7 id n
td |
j18  d | _*|	d u r| j9||t:ddd}	| j;t < kr|	 I d H  d }t| j=t>rt?d| j=d}n| j=d urt?| j=j@| j=jAd}tBt |j(pd|j'pd||	jC|d}| jDdtE|d d S )NFra  r  r  r  zMskipping reply to user input, current speech generation cannot be interruptedr  r  z<skipping on_user_turn_completed, speech scheduling is paused)r  z,error occurred during on_user_turn_completedz9skipping reply to user input, speech scheduling is pausedstarted_speaking_atstopped_speaking_attranscription_delayend_of_turn_delayon_user_turn_completed_delay)r  zusing preemptive generationpreemptive_lead_timezcpreemptive generation enabled but chat context or tools have changed after `on_user_turn_completed`r=  r  )r[   r^   r  unknown)
model_namemodel_providerr  )	timestampend_of_utterance_delayr%  r'  r  metadatar  r  )Frl   r/  rc  r   r  r  r  r   r   r   r   ri   r  rg   r   r  r   rh   r  r  rs   r   r   r   r`  rz   r]  rr   r  r^   r   r  perf_counteron_user_turn_completedr   rJ  r8  r#  r$  r%  r&  r  r   r\   r  is_equivalentr`   ra   ro   rY   r[   r  rG   r  debugrc   r  r  rF   r~   r  r   r   r   modelproviderr   r   r}  r7   )r   r\  r\   r[   r   temp_mutable_chat_ctx
start_timer'  metrics_reportrY   
preemptiver.  eou_metricsrU   rU   rV   r     s   



















r  c                 C  s   | j jS r   )rg   r^   r   rU   rU   rV   retrieve_chat_ctxz  s   zAgentActivity.retrieve_chat_ctxasyncio.Task[None]c                 C  s.   | j s| jr| j r| jd d S d S d S )Nr  )rt   rs   r   rh   r  r  rU   rU   rV   r    s   z%AgentActivity._on_pipeline_reply_done$AsyncIterable[rtc.AudioFrame] | Nonery  r,   c                   s   t jd| jjd/}|tj|j |j }r|tj	| t
 |_| j|||||dI d H  W d    d S 1 s<w   Y  d S )N
agent_turnr
   rx  )r#   r  rh   _root_span_contextrH  r"   ATTR_AGENT_TURN_ID_generation_id_parent_generation_idATTR_AGENT_PARENT_TURN_IDr   r   _agent_turn_context_tts_task_impl)r   rY   ro  r=  rn  ry  rL  	parent_idrU   rU   rV   r~    s"   	

"zAgentActivity._tts_taskc                   s  t jjd}|tjj  jjj	r jjj
nd } jjjr& jjjnd }t g}	jr>|	t j  |	I d H    jr_|tjd tjj|	 I d H  d S d }
d }d }ttrwtjjd}|\}
}nttrdfdd}| }
| }g }d d }d fdd}d }d }|d ur|d u rt  j!j"|| jj#j$d\}}||  j%r҈ j& }r|j'j(s|j'j)s|j*I d H  }r|}
t+||j,d\}}|| nt+||d\}}|| |j-.|  j!/|
|}t0|r|I d H n|}d }|d ur't1||d\}}|| |d u r'|j2.| g |I d H  |d urDt|3 gI d H  t44 }|tjj jrmtjj| I d H  |d urm|5  |3 I d H  |d ury|6 I d H  |r|j7nd}jr|d ur|3 I d H }|d ur|j-8 r|j-9 s|j:d ur|j:}nd}|tj;| |r|ri }|r|j<d ur|j<|d< |rوrو|d< ||d<  j!j=j>d|j|d}?|g  j@|  jjAdkr  jBd d S d S )Nr>  Tr   re   AsyncIterable[str]c                    s    V  d S r   rU   rU   )ro  rU   rV   
_read_text  s   
z0AgentActivity._tts_task_impl.<locals>._read_textfut,asyncio.Future[float] | asyncio.Future[None]rf   c                   B   z
|   pt W n
 ty   Y dS w  jjdjd dS z
            Callback to update the agent state when the first frame is captured:
            1. _AudioOutput.first_frame_fut (float)
            2. _TextOutput.first_text_fut (None)
            Nr  )r6  r   resultr  r1  rh   r  rD  rI  r   rY   r#  rU   rV   _on_first_frame     
z5AgentActivity._tts_task_impl.<locals>._on_first_framenoder  ry  text_transformsr;  
tts_outputtext_outputrw  r  tts_node_ttfbr#  r$  	assistant)r  r  r  r  r  r  )re   rG  rI  rJ  re   rf   )Cr   rG  rD  rH  r"   ATTR_SPEECH_IDr   rh   r<  transcription_enabledtranscriptionrz  r=  rl   ensure_future_wait_for_authorizationr   r   rv   r  wait_if_not_interrupted_clear_authorizationr  ATTR_SPEECH_INTERRUPTEDr   rd  re  r   r   	itertoolsteer   rB   rg   tts_noder   tts_text_transformsr   r   r   aligned_transcriptr   timed_texts_futr>   audio_chfirst_frame_futr   transcription_nodeiscoroutiner@   first_text_futwait_for_playoutr  clear_bufferr[  ro  r   	cancelledsynchronized_transcriptATTR_RESPONSE_TEXTttfbr   add_message_item_addedr  agent_stater  )r   rY   ro  r=  rn  ry  rL  	tr_outputr;  authorization_taskstext_sourceaudio_sourcerf  rH  tasksr$  rQ  	audio_outtts_gen_datatts_taskr   timed_textsforward_tasktr_nodetr_node_resulttext_outforward_textforwarded_textplayback_evassistant_metricsr  rU   )r   rY   r#  ro  rV   rE    s   

















zAgentActivity._tts_task_impl)r  r   _previous_user_metrics_previous_tools_messagesr  llm.ChatMessage | Noner  llm.MetricsReport | Noner  :Sequence[llm.FunctionCall | llm.FunctionCallOutput] | Nonec                  s   t jd| jjd2}	|	tj|j |j }
r|	tj	|
 t
 |_| j||||||||dI d H  W d    d S 1 s?w   Y  d S )Nr=  r>  )rY   r^   r`   ry  r  r   r  r  )r#   r  rh   r?  rH  r"   r@  rA  rB  rC  r   r   rD  _pipeline_reply_task_impl)r   rY   r^   r`   ry  r  r   r  r  rL  rF  rU   rU   rV   r  I  s(   

"z"AgentActivity._pipeline_reply_taskc          8   	     s  ddl m}	 tjjd}
|
tjj |d ur!|
tj	| |r-|
tj
|jp+d  jj }r?|j r?t|
|jj  jjjrI jjjnd } jjjrU jjjnd }| }t|}|d uri|| |d urz	t||dd W n ty   td Y nw g }t j j!|||d\}}|"| t#j$j%&|j'd	}|\}}d }d }d
}|d ur|j(I d H  t) j j*|| jj+j,d\}}|"|  j-r j. }r|j/j0s|j/j1s|j2I d H  }r|}d}t345 }6|gI d H  |}|d urj7r j j8|  j9| |j:}j;r2|
tj<d t#j$j=g ||R  I d H  |> I d H  d S  j?d t34@ g}jArO|"t34 jBC  6|I d H  D  j;r}|
tj<d t#j$j=g ||R  I d H  |> I d H  d S tEE }d;dd} j F|||}t3G|r|I d H n|} d }!d }"| d urtH|| d\}"}!|"|" d d }#d< fdd}$d }%|d ur|d usJ tI||jJd\}&}%|"|& |%jKL|$ n|!d ur|!jML|$ d=fdd}'d>fd d!}(tN j||jO|jP|'|(d"\})}*6g |I d H  |d ur(6t34|Q gI d H  tEE }#i }+|jRd ur9|jR|+d#< |rG|jSd urG|jS|+d$< |#rere|+d%< |#|+d&< |red&|v re|d&  |+d'< |
tj<j; |r} j j8|  jT| |!r|!jUnd},j;rt#j$j=| I d H  |d ur|V  |Q I d H }-|%d ur|%jKW r|%jKX s|-jYd ur|-jY},nd},n|r|!r|!jUstZd( |,r|j[d)|,|jj;||+d*}. j j8|.  j9|. \|.g |
tj]|, j;st^|*jd+kr j?d n jj_d,kr j?d- |> I d H  `  j;r3t#j$=|)I d H  d S  jab z|)I d H  W  jac n jac w t^|*jd+krCjd jj+jed krmtjZd.d/jid0 d S  jfd7  _fg }/g }0d }1d
}2tgg g d1}3|*jD ]?}4|4jhd ur|/"|4ji |0"|4jh |4jjrd|3_k|3jl"|4ji |3jm"|4jh |1d ur|4jnd urtod2 d}2|4jn}1q|1r|2sd|3_p jqd3|3  jr}5|3jpr|1r|2s js|1 d}5|/|0 }6|3jkr-|jtu|6  jv jw|||	|5s	|jOd4krd4nd5d6|,s|nd |6d7d8d9}7|7L jx  jytzj{dd: d S t^|0d+krE j j8|6  jT|6 d S d S d S )?Nr)   )r,   r>  r  Tr   r&  )rT  r^   tool_ctxry  r   FrS  thinking
llm_output"AsyncIterable[str | FlushSentinel]re   rG  c                 S s,   | 2 z3 d H W }t |trq|V  q6 d S r   )r   r&   )r  chunkrU   rU   rV   rH    s   
z;AgentActivity._pipeline_reply_task_impl.<locals>._read_textrX  rI  rJ  rf   c                   rK  rL  rM  rO  rP  rU   rV   rQ    rR  z@AgentActivity._pipeline_reply_task_impl.<locals>._on_first_framerV  fnc_callllm.FunctionCallc                   s   t   | _ | g d S r   )r  rc   rw  r  r   rU   rV   _tool_execution_started_cb  s   
zKAgentActivity._pipeline_reply_task_impl.<locals>._tool_execution_started_cboutr:   c                      | j r | j g d S d S r   fnc_call_outrw  r  r   rU   rV   _tool_execution_completed_cb     zMAgentActivity._pipeline_reply_task_impl.<locals>._tool_execution_completed_cbrN   rY   r  ra   function_streamtool_execution_started_cbtool_execution_completed_cbllm_node_ttftrZ  r#  r$  e2e_latencyU`use_tts_aligned_transcript` is enabled but no agent transcript was returned from ttsr[  )r  r  r   r  rc   r  r   r  r  z.maximum number of function calls steps reachedr  r  function_callsfunction_call_outputs?expected to receive only one AgentTask from the tool executionsfunction_tools_executedr  autor   )rY   r^   r`   ry  r  r  r  r   ra  )r  r  re   rG  r\  r  r  re   rf   r  r:   re   rf   )|rO   r,   r   rG  rD  rH  r"   r]  r   ATTR_INSTRUCTIONSATTR_USER_INPUTr  rh   _room_ioroomisconnectedr*   local_participantr<  rz  r=  r^  r_  r   r   r   r   rD   r   r   r8  r?   rg   llm_noder   r   rd  re  rf  text_chstarted_futrB   rg  r   rh  r   r   r   ri  r   rj  rl   r`  _wait_for_scheduledrb  	scheduledr   r  r  r  rd  re  r[  r  ra  r   rv   r  rc  r  rm  rn  r@   r>   rk  rl  r   ro  rA   ra   function_chrp  ttftru  _tool_items_addedro  rq  r   rr  rs  r   rv  rw  rt  r  rx  _mark_generation_doner   adddiscard	num_stepsmax_tool_steps
_num_stepsr6   r  r  reply_required_reply_requiredr  r  
agent_taskr  _handoff_requiredr}  r   update_agentr  r2  r   r  r  r  rG   r  )8r   rY   r^   r`   ry  r  r   r  r  r,   rL  room_ior;  rY  r  r}  llm_taskllm_gen_datatext_teetts_text_inputtr_inputr  r  read_transcript_from_ttsr   r  wait_for_scheduleduser_metricsrz  reply_started_atrH  r  r  r  text_forward_taskr$  rQ  r~  r  r  r  exe_tasktool_outputr  r  r  r  	new_callsnew_fnc_outputsnew_agent_taskignore_task_switchfnc_executed_evsanitized_outdrainingtool_messagestool_response_taskrU   rP  rV   r  i  s  

























	
Iz'AgentActivity._pipeline_reply_task_impl)r  r   r  c          
   	     sp  | j d us
J dt| g}|jr |t| j  ||I d H  |j	r4t
jj| I d H  |d ur\| j j }|jd|d}| j |I d H  | jjj| | j| | j}t
|jrq| j jttj|jd z1| j j|pxtdI d H }	| j||	||dI d H  W t
|jr|j|kr| j j|d d S d S d S t
|jr|j|kr| j j|d w w w )Nrt_session is not availabler  )r  r  r   r   rY   r  ry  r   ) ri   rl   r`  ra  r   r   rv   r  rb  r  r   rd  re  r^   r   rv  r   rg   r   r  rh   r  ro   r(   ra   r   r	   r   
ToolChoicer  r%   r  )
r   rY   ry  r  r   rz  r^   r  ori_tool_choicer  rU   rU   rV   r    sT   	
	


z"AgentActivity._realtime_reply_taskr  r  c                  s   t jd| jjd.}|tj|j |j }r|tj	| t
 |_| j||||dI d H  W d    d S 1 s;w   Y  d S )Nr=  r>  r  )r#   r  rh   r?  rH  r"   r@  rA  rB  rC  r   r   rD  _realtime_generation_task_impl)r   rY   r  ry  r   rL  rF  rU   rU   rV   r  	  s    	

"z'AgentActivity._realtime_generation_taskc          %        s|  t jjd}|tjj jj}|r"|j	
 r"t||j	j jd us+J dtjtjs6J d|tjdtjjjtjjji jd urVjrV|jj< jjjr`jjjnd jjjrljjjnd tj}t !" g}j#r|$t !j%&  '|I d H  (  j)rt*j+j,| I d H  |tj-d d S d 	d 
dE	fd
d g g dt*j.t/ddF f	dd}	g }
$t j0|	|
dd t*j+j12j3d}|\}$| g dGfdd}$t j0| dd dHfdd}dIfdd}t4j|j5|||d \}}'g I d H  |tj-j) |tj6t78d!d" D  d urh't !9 gI d H  j:d# |tj-j) t;; 
dJ	
fd*d+}t<|
d,kr|
d, nd-\}}}|r|j=nd.}j)rt*j+j, I d H  |rd ur>  9 I d H }|j?}|d ur|j@A r|j@B s|jCd ur|jC}nd.}d,}jjDjEr|jFI d H }jjG|jH|tI|d/ |d0 nr|r|j=st/Jd1 |r#|r#||jH|j)d2}jKjLjM$| N|g jO| |tjP| D ]
}|Q I d H  q%R  j)rCt*j+,|I d H  d S |jSTfd3d4 jUV z|I d H  W jUW njUW w t<|jd,kr jXd57  _Xg }d}tYg g d6}d }d} |jD ]L}!|jZ$|!j[ |j\$|!j] |!j]d ur|$|!j] |!j^rd}d|__jKjLjM$|!j] j`|!j]g |d ur|!jad urt/bd7 d} |!ja}q|r| sd|_cjdd8| je}"|jcr|r| sjf| d}"t<|d,krgjgs	jhr0jgr jgA s jgur jgI d H  nt id,I d H  jgs	jhs	jjjk }#|#jMl| zjm|#I d H  W n tjnyf }$ zt/jJd9d:to|$id; W Y d }$~$nd }$~$ww |j_rjjDjpsjq  jrjstt|"sj5d<krd<nd=d>d?d@dA jutvjwddB d S jjDjpr|j_s|rt/JdCjjx dD d S d S d S d S d S )KNr>  r  zllm is not a realtime modelchatTrI  rJ  re   rf   c                   rK  rL  rM  rO  rP  rU   rV   rQ  R	  rR  zEAgentActivity._realtime_generation_task_impl.<locals>._on_first_frameFr   outputsGlist[tuple[MessageGeneration, _TextOutput | None, _AudioOutput | None]]c                   s^  t jtjs
J g }zj2 z3 d H W }t|dkr$td  n|jI d H }d }d|vrPjrPjj	j
r=td tjj|jd}|\}}| n|j }d }d urd }|d urtjj|jjjd\}	}
jrj }r|j	js|j	js|
jI d H  }r|}d|	 |
j}n)d|v rj|j}t |r|I d H n|}njj	j
rt!d ntd	 |d urt"|d
\}}|| |j#$  j%|}t |r|I d H n|}d }|d urt&|d\}}|| |s|r|j'$  | |||f q6 tj(| I d H  W tjj)| I d H  d S tjj)| I d H  w )Nr   zEexpected to receive only one message generation from the realtime APIr=  zJtext response received from realtime API, falling back to use a TTS model.r   rS  TzText message received from Realtime API with audio modality. This usually happens when text chat context is synced to the API. Try to add a TTS model as fallback or use text modality with TTS instead.zGaudio output is enabled but neither tts nor realtime audio is availablerV  rX  )*r   r   r   message_streamr  r   r   
modalitiesr   r   r;  r   rd  re  rf  text_streamr   	__aiter__rB   rg   rg  rh   r   rh  r   ri  r   rj  rk  realtime_audio_output_nodeaudio_streamrl   rn  r  r>   rl  r   rm  r@   ro  r/  re  )r  forward_tasksr  msg_modalitiesr  rf  tr_text_inputr~  realtime_audio_resultr  r  r   r  realtime_audior  r  r  r  )	rQ  r;  r  ry  r  r   r}  teesrY  rU   rV   _read_messagesj	  s   


	





Z*zDAgentActivity._realtime_generation_task_impl.<locals>._read_messagesz/AgentActivity.realtime_generation.read_messagesr   r   c                    s$    2 z3 d H W }  |  q6 d S r   )r   )fnc)fnc_stream_for_tracingr  rU   rV   _read_fnc_stream	  s   zFAgentActivity._realtime_generation_task_impl.<locals>._read_fnc_streamz1AgentActivity.realtime_generation.read_fnc_streamr  r  c                   s.    | g  jjj|   j| g d S r   )rw  rg   r   r  r   rh   r  r  )r   rY   rU   rV   r  	  s   zPAgentActivity._realtime_generation_task_impl.<locals>._tool_execution_started_cbr  r:   c                   r  r   r  r  r   rU   rV   r  	  r  zRAgentActivity._realtime_generation_task_impl.<locals>._tool_execution_completed_cbr  c                 S  s   g | ]
}|j d dhdqS )typerc   )exclude)
model_dump)r   r  rU   rU   rV   
<listcomp>
  s    z@AgentActivity._realtime_generation_task_impl.<locals>.<listcomp>r  
message_idr   r  r  r   rZ   c                   sH   i }r r |d< |d< t jd|g| |d} d ur |_||_|S )Nr#  r$  r[  )r  r  r   r  )r   r  rc   r  )r  r  r  r  r  )r#  r$  rU   rV   _create_assistant_message
  s   zOAgentActivity._realtime_generation_task_impl.<locals>._create_assistant_messager   )NNNr  i  )r  r  audio_end_msaudio_transcriptr  )r  r  r  c                   s    j dS )Nr  )rh   r  r   r   rU   rV   r   c
  r   z>AgentActivity._realtime_generation_task_impl.<locals>.<lambda>r)   r  r  r  zJfailed to update chat context before generating the function calls resultsr  r  r  r  r   )rY   ry  r  r   ra  z*Tool reply cannot be prevented when using z#, it generates reply automatically.r\  )r  r  re   rf   r  r  r  )r  r   r  r   r  r   re   rZ   )yr   rG  rD  rH  r"   r]  r   rh   r  r  r  r*   r  ri   r   r   r   set_attributesATTR_GEN_AI_OPERATION_NAMEATTR_GEN_AI_PROVIDER_NAMEr4  ATTR_GEN_AI_REQUEST_MODELr3  rj   response_idr<  rz  r=  r^  r_  r   r`   rl   r`  ra  r   r   rv   r  rb  rc  r  r   rd  re  rd  r  r   r   re  rf  r  rA   ra   ATTR_RESPONSE_FUNCTION_CALLSjsondumpsrp  r  r  r  ro  rq  playback_positionrl  r   rr  rs  r   message_truncationr  truncater  r  r   rg   r   r  rw  r  rt  r[  r  first_tool_started_futr   r   r  r  r  r6   r  r  r  r  r  r  r  r  r  r  r}  r   r  rs   rt   r  r^   r   r2  r   r7  r   auto_tool_reply_generationr]  r   r  r,   r  rG   r  _label)%r   rY   r  ry  r   rL  r  r  rz  r  message_outputsfnc_tee
fnc_streamr  r  r  r  r  r  msg_genr  r~  r  r  r  r  r  rf  r  generate_tool_replyr  r  r  r  r  r^   erU   )rQ  r;  r  r  r  ry  r  r   rY   r#  r$  r}  r  rY  rV   r  	  s  


 h














Zz,AgentActivity._realtime_generation_task_implr  c                   s:    j d ur
 j   d fdd} jj| _ d S )Nre   rf   c                    s    j d u s jr j j urd  _ d S d}  jjjrB jjj }rB|jrB j  sB jj	d j j
d |  d} tjddid  jdt| d	 d  _ d  _d S )
NFr  )r   Tz resumed false interrupted speechr  r  agent_false_interruption)resumed)rx   rs   rh   r   r  r<  r=  r  r   r  rD  rV  r   r2  r}  r4   ry   )r  r;  r   r  rU   rV   _on_false_interruption
  s6   


zMAgentActivity._start_false_interruption_timer.<locals>._on_false_interruptionr  )ry   r   rh   _loop
call_later)r   r  r  rU   r  rV   r  
  s   


z-AgentActivity._start_false_interruption_timer)r]  r]  c                  s   |d ur
|I d H  | j d ur| j   d | _ | jsd S |r3| jjs3| jjr3| j  | j I d H  d | _| jjj	rI| jj
jrK| jj
j  d S d S d S r   )ry   r   rx   r  r   r]  r  rh   r   r  r<  r=  rV  )r   r\  r]  rU   rU   rV   r`  
  s(   



z"AgentActivity._cancel_speech_pausevad.VAD | Nonec                 C  r   r   )r(   rg   r   rh   r   rU   rU   rV   r        zAgentActivity.vadstt.STT | Nonec                 C  r   r   )r(   rg   r   rh   r   rU   rU   rV   r     r  zAgentActivity.stt"llm.LLM | llm.RealtimeModel | Nonec                 C  r   r   )r(   rg   r   rh   r   rU   rU   rV   r     r  zAgentActivity.llmtts.TTS | Nonec                 C  r   r   )r(   rg   r   rh   r   rU   rU   rV   r     r  zAgentActivity.tts)rO   r+   rd   rI   re   rf   )r   r   re   r   )re   r   )re   rI   )re   r+   )re   r   )re   rb   )re   r   )re   r   )re   r_   )r   r   re   rf   )r`   r_   re   rf   )r^   r]   r   r   re   rf   )
ra   r   r   r   r   r   r   r   re   rf   )r   r   rY   r   r   r   re   r   r  )rO  rP  re   rf   )rO  rW  re   rf   )rf  rg  re   rf   )rf  rl  re   rf   )
ro  rp  r=  rq  r   rr  rn  r   re   rG   )r[   r  r^   r  r   r  ra   r  r   rr  r  r   r  rF   re   rG   )F)rb  r   re   r  )rb  r   re   r  )r  rb   r  rb   re   rf   )r  rG   r  r  rb  r   re   rf   )r  r  re   rf   )r  r  re   rf   )r   r  re   rf   )r  r  re   rf   )r  r  re   rf   )r  r  re   rf   )r  r  re   rf   )r  r  re   rf   )r  r  r  r  re   rf   )r\   r3   re   rf   )r\   r2   re   r   )r\  r"  r\   r2   re   rf   )re   r]   )r   r;  re   rf   )rY   rG   ro  rp  r=  r<  rn  r   ry  r,   re   rf   )rY   rG   r^   r]   r`   r_   ry  r,   r  r  r   r   r  r  r  r  re   rf   )
rY   rG   ry  r,   r  r   r   r   re   rf   )
rY   rG   r  r  ry  r,   r   r   re   rf   )r  rb   re   rf   r   )r\  r"  r]  r   re   rf   )re   r  )re   r  )re   r  )re   r  )QrQ   rR   rS   r   r   propertyr   rN   rO   r   r   r   r   r   r   r`   r   r   rD   r   r   r%   r   r   r  r  r#   r  rM  rK  r>  rV  r   rZ  rY  r[  rj  rm  rt  rE   r  rI  rc  r]  r  r  r  r   r  r   rS  r,  r-  r4  r5  r6  r3  r  r  r  r  r  r  r  r!  r   r:  r  r~  rE  r  r  r  r  r  r  r`  r   r   r   r   rU   rU   rU   rV   rJ   i   s   >K	"51v17j%
:	-#$
- 2
 
.&  
i
5   6')j
__future__r   rl   contextvarsr  r  r  collections.abcr   r   r   dataclassesr   typingr   r   r	   opentelemetryr
   r   r   livekitr   livekit.agents.llm.realtimer   livekit.agents.metrics.baser   r  r   r   r   r   r   llm.tool_contextr   r   r   r   r   logr   r  r   r   r   r   r    r!   	telemetryr"   r#   r  tokenize.basicr$   typesr%   r&   r'   
utils.miscr(   _utilsr*   rO   r+   r,   r-   r.   audio_recognitionr/   r0   r1   r2   r3   eventsr4   r5   r6   r7   r8   r9   
generationr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rY   rE   rF   rG   rH   agent_sessionrI   
ContextVarr   r   rM   r   rX   rJ   rU   rU   rU   rV   <module>   sL      4