o
    i0                     @  s  U d dl mZ d dlZd dlZd dlZd dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d d	lmZmZ d d
lmZ d dlm Z  ddl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl$m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4 ddlm5Z5m6Z6m7Z7m8Z8 ddl9m:Z: ddl!m;Z;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZD ddlEmFZF ddlGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ ddlRmSZS ddlTmUZU ddlVmWZW dd lXmYZYmZZZ erdd!l#m[Z[m\Z\m]Z] dd"l$m^Z^ dd#l_m`Z` G d$d% d%ed&d'Zad(d(d(d(d)Zbd%ecd*< d&d&d&d&d)Zdd%ecd+< dAd/d0ZeeG d1d2 d2ZfeG d3d4 d4Zged5Zhed6ZieG d7d8 d8eZjG d9d: d:Zkd;d<gZld=ecd>< G d?d@ d@e jmeN eeh ZndS )B    )annotationsN)AsyncIterableSequence)AbstractContextManagernullcontext)Token)	dataclass)TracebackType)TYPE_CHECKINGGenericLiteralProtocolTypeVaroverloadruntime_checkable)contexttrace)	TypedDict)rtc   )cli	inferencellmsttttsutilsvad)APIError)
JobContextget_job_context)AgentHandoffChatContextlogger)trace_typestracer)DEFAULT_API_CONNECT_OPTIONS	NOT_GIVENAPIConnectOptions
NotGivenOr)is_given   )ioroom_io)_set_participant_attributes)Agent)AgentActivity)TurnDetectionMode)ClientEventsHandler)

AgentEvent
AgentStateAgentStateChangedEvent
CloseEventCloseReasonConversationItemAddedEvent
EventTypesUserInputTranscribedEvent	UserStateUserStateChangedEvent)IVRActivity)
RecorderIO)	RunResult)InputDetailsSpeechHandle)	LLMModels	STTModels	TTSModels)mcp)TextTransformsc                   @  s8   e Zd ZU dZded< 	 ded< 	 ded< 	 ded< dS )RecordingOptionsu  Granular control over which recording features are active.

    All keys default to ``True`` when not specified, so ``{"logs": False}``
    means "record everything except logs."

    Can be passed directly to :pymethod:`AgentSession.start(record=...)`:

    * ``record=True``  → all on (backward compatible)
    * ``record=False`` → all off (backward compatible)
    * ``record={"audio": True, "traces": False}`` → granular
    boolaudiotraceslogs
transcriptN)__name__
__module____qualname____doc____annotations__ rR   rR   d/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/voice/agent_session.pyrG   D   s   
 rG   F)totalT)rI   rJ   rK   rL   _RECORDING_ALL_ON_RECORDING_ALL_OFFrecordbool | RecordingOptionsreturnc                 C  s:   t | tr| r	tnt}tdi |S tdi i t| S )NrR   )
isinstancerH   rU   rV   rG   )rW   defaultsrR   rR   rS   _resolve_recording_optionsi   s   
r\   c                   @  s>   e Zd ZU eZded< eZded< eZded< dZded< dS )	SessionConnectOptionsr(   stt_conn_optionsllm_conn_optionstts_conn_options   intmax_unrecoverable_errorsN)	rM   rN   rO   r&   r^   rQ   r_   r`   rc   rR   rR   rR   rS   r]   p   s   
 r]   c                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< ded< dS )AgentSessionOptionsrH   allow_interruptions discard_audio_if_uninterruptiblefloatmin_interruption_durationrb   min_interruption_wordsmin_endpointing_delaymax_endpointing_delaymax_tool_stepsfloat | Noneuser_away_timeoutfalse_interruption_timeoutresume_false_interruptionmin_consecutive_speech_delayzbool | Noneuse_tts_aligned_transcriptpreemptive_generationzSequence[TextTransforms] | Nonetts_text_transformsivr_detectionN)rM   rN   rO   rQ   rR   rR   rR   rS   rd   y   s    
 rd   
Userdata_TRun_Tc                   @  s   e Zd Zd
ddZd	S )_VideoSamplerframertc.VideoFramesessionAgentSessionrY   rH   c                 C     d S NrR   )selfry   r{   rR   rR   rS   __call__   s    z_VideoSampler.__call__Nry   rz   r{   r|   rY   rH   )rM   rN   rO   r   rR   rR   rR   rS   rx      s    rx   c                   @  s(   e Zd ZddddddZdddZdS )VoiceActivityVideoSampler      ?333333?speaking_fps
silent_fpsr   rg   r   c                C  s   || _ || _d | _d S r~   )r   r   _last_sampled_time)r   r   r   rR   rR   rS   __init__   s   
z"VoiceActivityVideoSampler.__init__ry   rz   r{   r|   rY   rH   c                 C  sf   t   }|jdk}|r| jn| j}|dkrdS d| }| jd u r%|| _dS || j |kr1|| _dS dS )Nspeakingr   Fr   T)time
user_stater   r   r   )r   ry   r{   nowis_speaking
target_fpsmin_frame_intervalrR   rR   rS   r      s   

z"VoiceActivityVideoSampler.__call__N)r   rg   r   rg   r   )rM   rN   rO   r   r   rR   rR   rR   rS   r      s    r   filter_markdownfilter_emojizlist[TextTransforms]DEFAULT_TTS_TEXT_TRANSFORMSc                      s4  e Zd Zeeeeeeeedddddddeddddeed	d	ed
edd fd;d<Zd fdAdBZeddDdEZejddGdEZeddIdJZ	eddLdMZ
ed dOdPZed!dRdSZed"dUdVZed#dXdYZed$d[d\Zed%d^d_Zed&dadbZed'dddeZed(dgdhZed)djdkZdld
dmd*dudvZeeedeedwd+ddZed	eedeedd,ddZd	eeeeedd-ddZd.ddZed/ddZd	d
dd0ddZddd1ddZejedd	d
dd0ddZd.ddZ eeedd2ddZ!eeddd3ddZ"eeeeedldd4ddZ#d	dd5ddZ$d.ddZ%dddd6ddńZ&d7ddǄZ'ddd
ddʜd8ddӄZ(ejedd9ddׄZ)d:ddڄZ*ejedd.dd܄Z+ejedd.ddބZ,d.ddZ-d.ddZ.d
d
dd;ddZ/d
dd<ddZ0d=ddZ1d>ddZ2d?ddZ3ed@ddZ4edAddZ5edBddZ6edCddZ7d.ddZ8d.dd	Z9d.d
dZ:d.ddZ;d.ddZ<dDddZ=dEddZ>  Z?S (F  r|   Tg      ?r   g      @ra   g      .@       @g        FN)turn_detectionr   r   r   r   toolsmcp_serversuserdatare   rf   rh   ri   rj   rk   rl   video_samplerrn   ro   rp   rq   rr   rt   rs   ru   conn_optionsloop agent_false_interruption_timeoutr   NotGivenOr[TurnDetectionMode]r   %NotGivenOr[stt.STT | STTModels | str]r   NotGivenOr[vad.VAD]r   9NotGivenOr[llm.LLM | llm.RealtimeModel | LLMModels | str]r   %NotGivenOr[tts.TTS | TTSModels | str]r   (NotGivenOr[list[llm.Tool | llm.Toolset]]r   NotGivenOr[list[mcp.MCPServer]]r   NotGivenOr[Userdata_T]re   rH   rf   rh   rg   ri   rb   rj   rk   rl   r    NotGivenOr[_VideoSampler | None]rn   rm   ro   rp   rq   rr   NotGivenOr[bool]rt   +NotGivenOr[Sequence[TextTransforms] | None]rs   ru   r   !NotGivenOr[SessionConnectOptions]r    asyncio.AbstractEventLoop | Noner   NotGivenOr[float | None]rY   Nonec                  s,  t    |p
t | _t|rtd |}t|s!tddd}|| _	t
 | _t|	|
|||||||||t|r;|nt||t|rD|ndd| _|pMt | _d| _|pUd| _t|trbtj|}t|trmtj|}t|trxtj|}|p{d| _|pd| _|pd| _|pd| _|pd| _t|r|ng | _ d| _!d| _"t#$| j%| j&| _'t#(| j)| j*| j+| _,d| _-d| _.d| _/t0 | _1t0 | _2d| _3d| _4d| _5d| _6d| _7d| _8d	| _9d
| _:d| _;t|r|nd| _<d| _=d| _>d| _?d| _@d| _Ad| _Bd| _Cd| _Dd| _Eg | _FtGH | _Id| _Jd| _KdS )uh  `AgentSession` is the LiveKit Agents runtime that glues together
        media streams, speech/LLM components, and tool orchestration into a
        single real-time voice agent.

        It links audio, video, and text I/O with STT, VAD, TTS, and the LLM;
        handles turn detection, endpointing, interruptions, and multi-step
        tool calls; and exposes everything through event callbacks so you can
        focus on writing function tools and simple hand-offs rather than
        low-level streaming logic.

        Args:
            turn_detection (TurnDetectionMode, optional): Strategy for deciding
                when the user has finifshed speaking.

                * ``"stt"`` – rely on speech-to-text end-of-utterance cues
                * ``"vad"`` – rely on Voice Activity Detection start/stop cues
                * ``"realtime_llm"`` – use server-side detection from a
                  realtime LLM
                * ``"manual"`` – caller controls turn boundaries explicitly
                * ``_TurnDetector`` instance – plug-in custom detector

                If *NOT_GIVEN*, the session chooses the best available mode in
                priority order ``realtime_llm → vad → stt → manual``; it
                automatically falls back if the necessary model is missing.
            stt (stt.STT | str, optional): Speech-to-text backend.
            vad (vad.VAD, optional): Voice-activity detector
            llm (llm.LLM | llm.RealtimeModel | str, optional): LLM or RealtimeModel
            tts (tts.TTS | str, optional): Text-to-speech engine.
            tools (list[llm.FunctionTool | llm.RawFunctionTool], optional): List of
                tools shared by every agent in the agent session.
            mcp_servers (list[mcp.MCPServer], optional): List of MCP servers
                providing external tools for the agent to use.
            userdata (Userdata_T, optional): Arbitrary per-session user data.
            allow_interruptions (bool): Whether the user can interrupt the
                agent mid-utterance. Default ``True``.
            discard_audio_if_uninterruptible (bool): When ``True``, buffered
                audio is dropped while the agent is speaking and cannot be
                interrupted. Default ``True``.
            min_interruption_duration (float): Minimum speech length (s) to
                register as an interruption. Default ``0.5`` s.
            min_interruption_words (int): Minimum number of words to consider
                an interruption, only used if stt enabled. Default ``0``.
            min_endpointing_delay (float): Minimum time-in-seconds since the
                last detected speech before the agent declares the user’s turn
                complete. In VAD mode this effectively behaves like
                max(VAD silence, min_endpointing_delay); in STT mode it is
                applied after the STT end-of-speech signal, so it can be
                additive with the STT provider’s endpointing delay. Default
                ``0.5`` s.
            max_endpointing_delay (float): Maximum time-in-seconds the agent
                will wait before terminating the turn. Default ``3.0`` s.
            max_tool_steps (int): Maximum consecutive tool calls per LLM turn.
                Default ``3``.
            video_sampler (_VideoSampler, optional): Uses
                :class:`VoiceActivityVideoSampler` when *NOT_GIVEN*; that sampler
                captures video at ~1 fps while the user is speaking and ~0.3 fps
                when silent by default.
            user_away_timeout (float, optional): If set, set the user state as
                "away" after this amount of time after user and agent are silent.
                Default ``15.0`` s, set to ``None`` to disable.
            false_interruption_timeout (float, optional): If set, emit an
                `agent_false_interruption` event after this amount of time if
                the user is silent and no user transcript is detected after
                the interruption. Set to ``None`` to disable. Default ``2.0`` s.
            resume_false_interruption (bool): Whether to resume the false interruption
                after the false_interruption_timeout. Default ``True``.
            min_consecutive_speech_delay (float, optional): The minimum delay between
                consecutive speech. Default ``0.0`` s.
            use_tts_aligned_transcript (bool, optional): Whether to use TTS-aligned
                transcript as the input of the ``transcription_node``. Only applies
                if ``TTS.capabilities.aligned_transcript`` is ``True`` or ``streaming``
                is ``False``. When NOT_GIVEN, it's disabled.
            tts_text_transforms (Sequence[TextTransforms], optional): The transforms to apply
                to the tts input text, available built-in transforms: ``"filter_markdown"``, ``"filter_emoji"``.
                Set to ``None`` to disable. When NOT_GIVEN, all filters will be applied.
            preemptive_generation (bool):
                Whether to speculatively begin LLM and TTS requests before an end-of-turn is
                detected. When True, the agent sends inference calls as soon as a user
                transcript is received rather than waiting for a definitive turn boundary. This
                can reduce response latency by overlapping model inference with user audio,
                but may incur extra compute if the user interrupts or revises mid-utterance.
                Defaults to ``False``.
            ivr_detection (bool): Whether to detect if the agent is interacting with an IVR system.
                Default ``False``.
            conn_options (SessionConnectOptions, optional): Connection options for
                stt, llm, and tts.
            loop (asyncio.AbstractEventLoop, optional): Event loop to bind the
                session to. Falls back to :pyfunc:`asyncio.get_event_loop()`.
        zZ`agent_false_interruption_timeout` is deprecated, use `false_interruption_timeout` insteadr   r   r   N)re   rf   rh   ri   rj   rk   rl   rn   ro   rp   rq   rt   rs   ru   rr   Fr   	listeninginitializing)Lsuperr   asyncioget_event_loop_loopr*   r#   warningr   _video_samplerr!   empty	_chat_ctxrd   r   _optsr]   _conn_options_started_turn_detectionrZ   strr   STTfrom_model_stringLLMTTS_stt_vad_llm_tts_mcp_servers_tools_llm_error_counts_tts_error_countsr,   
AgentInput_on_video_input_changed_on_audio_input_changed_inputAgentOutput_on_video_output_changed_on_audio_output_changed_on_text_output_changed_output_forward_audio_atask_forward_video_atask_update_activity_ataskLock_activity_lock_lock_room_io_recorder_io_client_events_handler_agent	_activity_next_activity_user_state_agent_state_user_away_timer	_userdata_closing_task_closing_job_context_cb_registered_global_run_state_user_speaking_span_agent_speaking_span_session_span_root_span_context_session_ctx_token_recorded_eventsrV   copy_recording_options_started_at_ivr_activity)r   r   r   r   r   r   r   r   r   re   rf   rh   ri   rj   rk   rl   r   rn   ro   rp   rq   rr   rt   rs   ru   r   r   r   	__class__rR   rS   r      s   
y













zAgentSession.__init__eventr9   argr3   c                   s   | j | t || d S r~   )r   appendr   emit)r   r   r   r   rR   rS   r     s   zAgentSession.emitrv   c                 C     | j d u r	td| j S )Nz AgentSession userdata is not set)r   
ValueErrorr   rR   rR   rS   r        
zAgentSession.userdatavaluec                 C  s
   || _ d S r~   )r   )r   r   rR   rR   rS   r     s   
TurnDetectionMode | Nonec                 C     | j S r~   )r   r   rR   rR   rS   r        zAgentSession.turn_detectionlist[mcp.MCPServer] | Nonec                 C  r   r~   )r   r   rR   rR   rS   r     r   zAgentSession.mcp_serversio.AgentInputc                 C  r   r~   )r   r   rR   rR   rS   input  r   zAgentSession.inputio.AgentOutputc                 C  r   r~   )r   r   rR   rR   rS   output  r   zAgentSession.outputrd   c                 C  r   r~   )r   r   rR   rR   rS   options  r   zAgentSession.optionsr]   c                 C  r   r~   )r   r   rR   rR   rS   r     r   zAgentSession.conn_optionsllm.ChatContextc                 C  r   r~   )r   r   rR   rR   rS   history  r   zAgentSession.historySpeechHandle | Nonec                 C  s   | j d ur	| j jS d S r~   )r   current_speechr   rR   rR   rS   r    s   zAgentSession.current_speechr;   c                 C  r   r~   )r   r   rR   rR   rS   r     r   zAgentSession.user_stater4   c                 C  r   r~   )r   r   rR   rR   rS   agent_state  r   zAgentSession.agent_stater/   c                 C  r   )NzVoiceAgent isn't running)r   RuntimeErrorr   rR   rR   rS   current_agent  r   zAgentSession.current_agentlist[llm.Tool | llm.Toolset]c                 C  r   r~   )r   r   rR   rR   rS   r     r   zAgentSession.toolstext)input_modalityoutput_type
user_inputr   r  Literal['text', 'audio']r  type[Run_T] | NoneRunResult[Run_T]c                C  s@   | j d ur| j  stdt||d}|| _ | j||d |S )Nnested runs are not supported)r  r  )r  r  )r   doner  r?   generate_reply)r   r  r  r  	run_staterR   rR   rS   run  s   zAgentSession.run)roomroom_optionsrW   room_input_optionsroom_output_optionsagentcapture_runLiteral[True]r  NotGivenOr[rtc.Room]r  NotGivenOr[room_io.RoomOptions]rW   rX   r  $NotGivenOr[room_io.RoomInputOptions]r  %NotGivenOr[room_io.RoomOutputOptions]r?   c                     d S r~   rR   r   r  r  r  r  rW   r  r  rR   rR   rS   start     zAgentSession.start)r  r  r  rW   r  r  Literal[False]c                  r!  r~   rR   r"  rR   rR   rS   r#    r$  #NotGivenOr[bool | RecordingOptions]RunResult | Nonec             	     s  j 4 I dH 6 jr	 W d  I dH  dS t _d}zt }t|s,|jj}W n ty<   t|s:d}Y nw t	|_
|rJ|j
 td _}	jdur`tj d_t|	}
t|
_g _d_d_d_d_t _t }	|	tj|j  |_!"d g }t#j$% }|j&r|j'sj(j)dusj*j)durt+,d |j-j.d nst|r-js-t/j0j1|||d}t22|}j(j)dur|j3rt+,d d|_3j*j)dur|j4rt+,d	 d|_4j*j5dur|j6rt+,d
 d|_6t/j7||d_j8 I dH  t9jd_|: }|r-j;|j< |rj(j)rj*j)rj
d sH|j&r|j=rt>d_j?j(j)j(_)j@j*j)j*_)|j&rj|j=sn|j&stABjj8|jCd d}|D| |jEdu r|_EntFj
G rtdjHjIrtJ_KjLMjKjN |DtAjBjK8 dd |	tjO|jPjQ |	tjR|jjS |	tjT|jjU jr|DtAjB|V dd jWs|Xfdd d_Wd}|rjYdurjYZ stdt[dd}|_Y|DtABj\j!dd ztAj]| I dH  W t^j_j`| I dH  n
t^j_j`| I dH  w jdurGj8 I dH  j(j)durXtAjBa dd_bj(jcduritAjBd dd_ed_"d jrjjfrd=fd"d#}jjfg| d>fd'd(d? fd,d- j(j)ddd. }j(jcddd. } j*j)} j*jc} j*j5}t+hd/d0id1d2 |D pd3d0id4d2 |D pd3 jjjkrj*j)rj*j)jlst+j,d5d6j*j)j id7 t+hd8d0id9d2 |D pd3 |s|r+t+hd:d0id;d2 |D pd3d0id<d2 |D p)d3 |r3|I dH  |W  d  I dH  S 1 I dH sFw   Y  dS )@a  Start the voice agent.

        Create a default RoomIO if the input or output audio is not already set.
        If the console flag is provided, start a ChatCLI.

        Args:
            capture_run: Whether to return a RunResult and capture the run result during session start.
            room: The room to use for input and output
            room_input_options: Options for the room input
            room_output_options: Options for the room output
            record: Whether to record the audio, transcripts, traces, or logs
        NFagent_sessionr   zeagent started with the console subcommand, but input.audio/output.audio is already set, overriding...)r   r{   )r  r  zHRoomIO audio input is enabled but input.audio is already set, ignoring..zJRoomIO audio output is enabled but output.audio is already set, ignoring..zZRoomIO transcription output is enabled but output.transcription is already set, ignoring..)r  r(  r  )r{   r-   rI   )r(  z	audio.ogg)output_pathzOnly one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False)._ivr_activity_startname_job_ctx_connectc                     s    j tjdS N)reason)_aclose_implr7   JOB_SHUTDOWNrR   r   rR   rS   <lambda>  s    z$AgentSession.start.<locals>.<lambda>Tr  )r  )wait_on_enter_forward_audio_task_forward_video_taskr   _asyncio.Future[None]rY   r   c                   s(    j dkr jdkr   d S d S d S )Nr   )r   r   _set_user_away_timerr6  r   rR   rS   on_room_io_subscribed  s   z1AgentSession.start.<locals>.on_room_io_subscribedinp$io.AudioInput | io.VideoInput | None#list[io.AudioInput | io.VideoInput]c                      | d u rg S | g | j  S r~   )source)r;  )_collect_sourcerR   rS   r@       z+AgentSession.start.<locals>._collect_sourceout6io.TextOutput | io.VideoOutput | io.AudioOutput | None5list[io.VideoOutput | io.AudioOutput | io.TextOutput]c                   r>  r~   )next_in_chain)rB  )_collect_chainrR   rS   rF    rA  z*AgentSession.start.<locals>._collect_chainz*using audio io: %s -> `AgentSession` -> %sz -> c                 S     g | ]	}d |j  d qS `label.0rB  rR   rR   rS   
<listcomp>      z&AgentSession.start.<locals>.<listcomp>z(none)c                 S  rH  rI  rK  rM  rR   rR   rS   rO    rP  z`resume_false_interruption is enabled but audio output does not support pause, it will be ignoredaudio_outputextraz)using transcript io: `AgentSession` -> %sc                 S  rH  rI  rK  rM  rR   rR   rS   rO    rP  z(using video io: %s > `AgentSession` > %sc                 S  rH  rI  rK  rM  rR   rR   rS   rO    rP  c                 S  rH  rI  rK  rM  rR   rR   rS   rO    rP  )r6  r7  rY   r   )r;  r<  rY   r=  )rB  rC  rY   rD  )mr   r   r   r   r   r*   jobenable_recordingr  r\   r   init_recordingr%   
start_spanr   r   otel_contextdetachr   set_span_in_contextattachr   r   r   r   r   get_currentr   get_current_spanset_attributer$   ATTR_AGENT_LABELrL  r   _update_agent_stater   AgentsConsoleget_instanceenabledio_acquiredr   rI   r   r#   r   
acquire_ior   r-   RoomOptions_ensure_optionsr   audio_inputrQ  transcriptiontext_outputRoomIOr#  r2   get_text_input_optionsregister_text_inputtext_input_cbrW   r>   record_inputrecord_outputr   create_tasksession_directoryr   _primary_agent_sessionanyvaluesr  ru   r=   r   r   extendr   ATTR_ROOM_NAMEr  r,  ATTR_JOB_IDidATTR_AGENT_NAME
agent_nameconnectr   add_shutdown_callbackr   r  r?   _update_activitygatherr   aiocancel_and_waitr4  r   videor5  r   subscribed_futadd_done_callbackdebugjoinr   rp   	can_pause)r   r  r  r  r  rW   r  r  job_ctxcurrent_spanctxtasksctext_input_optstaskr  r:  rh  video_inputrQ  video_outputtranscript_outputrR   )rF  r@  r   rS   r#  	  sb  












(

 2c                   s(   | j d u r
td| j  I d H  d S NAgentSession isn't running)r   r  drainr   rR   rR   rS   r    s   
zAgentSession.drainroom_io.RoomIOc                 C  s   | j std| j S )NzDCannot access room_io: the AgentSession was not started with a room.)r   r  r   rR   rR   rS   r-     s
   zAgentSession.room_io)r  errorr/  r7   r  r  Jllm.LLMError | stt.STTError | tts.TTSError | llm.RealtimeModelError | Nonec                C  s&   | j rd S t| j|||d| _ d S N)r  r  r/  )r   r   rq  r0  )r   r/  r  r  rR   rR   rS   _close_soon#  s
   
zAgentSession._close_soon)r  c                C  s   | j d |tjd d S r  )r  r7   USER_INITIATED)r   r  rR   rR   rS   shutdown0  s   zAgentSession.shutdownr"   c             	     s`  | j r
t| j  | j4 I d H  | js"	 W d   I d H  d S d| _|   | jd ur|sFz| jjddI d H  W n	 t	yE   Y nw | j
 I d H  | jjrY| jjI d H  d | j_d | j_d | j_d | j_|tjkr}| jj }d ur}|jddd | j I d H  d | _| jr| j  d | _| jr| j  d | _| jd urtj| jI d H  | jr| j I d H  | jd ur| j I d H  | jr| j  d | _d| _|  dt!||d |   d| _"d	| _#d
| _$d
| _%d | _ | j&r| j& I d H  d | _&| j'r| j' I d H  d | _'W d   I d H  n1 I d H sw   Y  t(j)d|j*|dd d S )NTforcer   )audio_detachedtranscript_timeoutFcloser  r/  r   r   r   zsession closed)r/  r  rR  )+r   rX  r[  r   r   r   _cancel_user_away_timerr   	interruptr  r  r  r   rI   r  r   ri  r7   ERROR_audio_recognitioncommit_user_turnacloser   endr   r   r   r  r  r   r   r   r   r6   r   r   r   r   r   r   r#   r  r   )r   r/  r  r  audio_recognitionrR   rR   rS   r0  3  sv   






*OzAgentSession._aclose_implc                   s   | j tjdI d H  d S r.  )r0  r7   r  r   rR   rR   rS   r    s   zAgentSession.acloserj   rk   r   NotGivenOr[float]$NotGivenOr[TurnDetectionMode | None]c                C  sR   t |r|| j_t |r|| j_t |r|| _| jdur'| jj|||d dS dS )a  
        Update the options for the agent session.

        Args:
            min_endpointing_delay (NotGivenOr[float], optional): The minimum endpointing delay.
            max_endpointing_delay (NotGivenOr[float], optional): The maximum endpointing delay.
            turn_detection (NotGivenOr[TurnDetectionMode | None], optional): Strategy for deciding
                when the user has finished speaking. ``None`` reverts to automatic selection.
        Nr  )r*   r   rj   rk   r   r   update_options)r   rj   rk   r   rR   rR   rS   r    s   

zAgentSession.update_optionsrI   re   add_to_chat_ctxstr | AsyncIterable[str]rI   )NotGivenOr[AsyncIterable[rtc.AudioFrame]]r  rA   c          	      C  s   | j d u r	td| j}| j jr| jn| j }|d u rtdt }t tju r5| j	d ur5tj
| j	dd}|! |j||||d}|rP|| W d    |S W d    |S 1 s[w   Y  |S )Nr  z)AgentSession is closing, cannot use say()Fend_on_exitr  )r   r  r   scheduling_pausedr   r   r   r]  INVALID_SPANr   use_spansay_watch_handle)	r   r
  rI   re   r  r  activityr  handlerR   rR   rS   r    s2   





zAgentSession.say)r  instructionstool_choicere   chat_ctxr  !NotGivenOr[str | llm.ChatMessage]r  NotGivenOr[str]r  NotGivenOr[llm.ToolChoice]r  NotGivenOr[ChatContext]c             
   C  s   | j du r	tdt|trtjd|gdn|}| j}| j jr"| jn| j }	|	du r-tdt	 }
t
 t
ju rD| jdurDt
j| jdd}
|
* |	j|rM|nd||||t|dd	}|rh|| W d   |S W d   |S 1 ssw   Y  |S )
a  Generate a reply for the agent to speak to the user.

        Args:
            user_input (NotGivenOr[str | llm.ChatMessage], optional): The user's input that may influence the reply,
                such as answering a question.
            instructions (NotGivenOr[str], optional): Additional instructions for generating the reply.
            tool_choice (NotGivenOr[llm.ToolChoice], optional): Specifies the external tool to use when
                generating the reply. If generate_reply is invoked within a function_tool, defaults to "none".
            allow_interruptions (NotGivenOr[bool], optional): Indicates whether the user can interrupt this speech.
            chat_ctx (NotGivenOr[ChatContext], optional): The chat context to use for generating the reply.
                Defaults to the chat context of the current agent if not provided.
            input_modality (Literal["text", "audio"], optional): The input mode to use for generating the reply.

        Returns:
            SpeechHandle: A handle to the generated reply.
        Nr  user)rolecontentz4AgentSession is closing, cannot use generate_reply()Fr  )modality)user_messager  r  re   r  input_details)r   r  rZ   r   r   ChatMessager   r  r   r   r   r]  r  r   r  _generate_replyr@   r  )r   r  r  r  re   r  r  r  r  r  r  r  rR   rR   rS   r    s>   



zAgentSession.generate_replyr  r  r7  c                C  s    | j du r	td| j j|dS )zInterrupt the current speech generation.

        Returns:
            An asyncio.Future that completes when the interruption is fully processed
            and chat context has been updated.
        Nr  r  )r   r  r  )r   r  rR   rR   rS   r    s   
zAgentSession.interruptc                 C  s    | j d u r	td| j   d S r  )r   r  clear_user_turnr   rR   rR   rS   r    s   
zAgentSession.clear_user_turnr  stt_flush_durationr  r  c                C  s&   | j du r	td| j j||d dS )a  Commit the user turn and generate a reply.

        Args:
            transcript_timeout (float, optional): The timeout for the final transcript
                to be received after committing the user turn.
                Increase this value if the STT is slow to respond.
            stt_flush_duration (float, optional): The duration of the silence to be appended to the STT
                to flush the buffer and generate the final transcript.

        Raises:
            RuntimeError: If the AgentSession isn't running.
        Nr  r  )r   r  r  )r   r  r  rR   rR   rS   r  #  s
   

zAgentSession.commit_user_turnc                 C  sL   || _ | jr"tj| | j| j dd | _}| j}|r$|| d S d S d S )N_update_activity_taskr+  )r   r   r   rq  r  r   r   r  )r   r  r  r  rR   rR   rS   update_agent9  s   zAgentSession.update_agentr  r#  )previous_activitynew_activityblocked_tasksr3  r  Literal['close', 'pause']r  Literal['start', 'resume']r  list[asyncio.Task] | Noner3  c          
   	     s  | j 4 I d H  || _|dkr1| jr| jjnd }|jd ur*||us&|dkr*tdt|| | _n|dkrB|jd u r>td|j| _| jd urMt	| j | j}| jd urz|dkrj| j
 I d H  | j I d H  n|dkrz| jj|ptg dI d H  | j| _d | _| j}t|r|jjnd | jjjd}	|r|j|	|r|jnd | jjd	 | j|	 |dkr| j I d H  n|dkr| j I d H  W d   I d H  n1 I d H sw   Y  |r| jjd usJ t| jjI d H  d S d S )
Nr#  r  z2cannot start agent: an activity is already runningresumez:cannot resume agent: no existing active activity to resumepause)r  )old_agent_idnew_agent_id)item	old_agent	new_agent)r   r   r   r  r  r0   r   r   rX  r[  r  r  r  r   r    ry  _agent_handoffr   insertr#  r  _on_enter_taskr   shield)
r   r  r  r  r  r3  previous_agentprevious_activity_vr  handoff_itemrR   rR   rS   r~  G  s\   	



(6zAgentSession._update_activityold_taskasyncio.Task[None] | Nonec                   s(   |d ur
|I d H  |  |I d H  d S r~   )r~  )r   r  r  rR   rR   rS   r    s   
z"AgentSession._update_activity_taskCllm.LLMError | stt.STTError | tts.TTSError | llm.RealtimeModelErrorc                   s    j s|jrd S |jdkr  jd7  _ j jjkrd S n|jdkr3  jd7  _ j jjkr3d S t|jt	rCt
d|j  nt
jd|jd d fdd}t j|tjd _  j | d S )N	llm_errorr+   	tts_errorz4AgentSession is closing due to unrecoverable error: z2AgentSession is closing due to unrecoverable error)exc_infor6  asyncio.Task[None]rY   r   c                   s
   d  _ d S r~   )r   r9  r   rR   rS   on_close_done  s   
z-AgentSession._on_error.<locals>.on_close_doner  )r6  r  rY   r   )r   recoverabletyper   r   rc   r   rZ   r  r   r#   r   rq  r0  r7   r  r  )r   r  r  rR   r   rS   	_on_error  s,   

zAgentSession._on_errorc                   sD   | j j}|d u rd S |2 z3 d H W }| jd ur| j| q6 d S r~   )r   rI   r   
push_audio)r   rh  ry   rR   rR   rS   r4    s   
z AgentSession._forward_audio_taskc                   s\   | j j}|d u rd S |2 z3 d H W }| jd ur*| jd ur$| || s$q| j| q6 d S r~   )r   r  r   r   
push_video)r   r  ry   rR   rR   rS   r5    s   
z AgentSession._forward_video_taskc                 C  sR   |    | jjd u rd S | j }r|jr|j sd S | j| jj| jd| _	d S )Naway)
r  r   rn   r   r  r  r   
call_later_update_user_stater   )r   r-   rR   rR   rS   r8    s   
z!AgentSession._set_user_away_timerc                 C  s"   | j d ur| j   d | _ d S d S r~   )r   cancelr   rR   rR   rS   r    s   


z$AgentSession._cancel_user_away_timer)rX  
start_timestaterX  otel_context.Context | Noner  c                C  s   | j |krd S |rt|d nd }|dkr6d| _d| _| jd u r5tjd||d| _| jr5t| j| jj	j
 n| jd urC| j  d | _|dkrQ| jdkrQ|   n|   | j }|| _ | dt||d d S )	N ʚ;r   r   agent_speaking)r   r  r   agent_state_changed	old_state	new_state)r   rb   r   r   r   r%   rW  r   r.   r  local_participantr  r   r8  r  r   r5   )r   r  rX  r  start_time_nsr  rR   rR   rS   r`    s6   





z AgentSession._update_agent_state)last_speaking_timer  c                C  s   | j |krd S |rt|d nd }|dkr2| jd u r2tjd|d| _| jr1| jjr1t| j| jj n| jd urA| jj|d d | _|dkrO| j	dkrO| 
  n|   | j }|| _ | dt||d d S )	Nr  r   user_speaking)r  )end_timer   user_state_changedr  )r   rb   r   r%   rW  r   linked_participantr.   r  r   r8  r  r   r<   )r   r  r  last_speaking_time_nsr  rR   rR   rS   r    s,   



zAgentSession._update_user_stateevr:   c                 C  s*   | j dkr|jr| d | d| d S )Nr  r   user_input_transcribed)r   is_finalr  r   )r   r  rR   rR   rS   _user_input_transcribed/  s   
z$AgentSession._user_input_transcribedmessagellm.ChatMessagec                 C  s"   | j | | dt|d d S )Nconversation_item_added)r  )r   r  r   r8   )r   r	  rR   rR   rS   _conversation_item_added6  s   z%AgentSession._conversation_item_addeditems3Sequence[llm.FunctionCall | llm.FunctionCallOutput]c                 C  s   | j | d S r~   )r   r  )r   r  rR   rR   rS   _tool_items_added:  s   zAgentSession._tool_items_addedstt.STT | Nonec                 C  r   r~   )r   r   rR   rR   rS   r   >  r   zAgentSession.stt"llm.LLM | llm.RealtimeModel | Nonec                 C  r   r~   )r   r   rR   rR   rS   r   B  r   zAgentSession.llmtts.TTS | Nonec                 C  r   r~   )r   r   rR   rR   rS   r   F  r   zAgentSession.ttsvad.VAD | Nonec                 C  r   r~   )r   r   rR   rR   rS   r   J  r   zAgentSession.vadc                 C  6   | j sd S | jd ur| j  tj|  dd| _d S )Nr5  r+  )r   r   r  r   rq  r5  r   rR   rR   rS   r   P     

z$AgentSession._on_video_input_changedc                 C  r  )Nr4  r+  )r   r   r  r   rq  r4  r   rR   rR   rS   r   [  r  z$AgentSession._on_audio_input_changedc                 C  r}   r~   rR   r   rR   rR   rS   r   f     z%AgentSession._on_video_output_changedc                 C  sH   | j r| jjr| jj }r |js"tjdd|jid d S d S d S d S d S )NzZresume_false_interruption is enabled, but the audio output does not support pause, ignoredrQ  rR  )	r   r   rp   r   rI   r  r#   r   rL  )r   rQ  rR   rR   rS   r   i  s   

z%AgentSession._on_audio_output_changedc                 C  r}   r~   rR   r   rR   rR   rS   r   u  r  z$AgentSession._on_text_output_changedc                   s   | S r~   rR   r   rR   rR   rS   
__aenter__z  s   zAgentSession.__aenter__exc_typetype[BaseException] | NoneexcBaseException | Noneexc_tbTracebackType | Nonec                   s   |   I d H  d S r~   )r  )r   r  r  r  rR   rR   rS   	__aexit__}  s   zAgentSession.__aexit__)8r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   re   rH   rf   rH   rh   rg   ri   rb   rj   rg   rk   rg   rl   rb   r   r   rn   rm   ro   rm   rp   rH   rq   rg   rr   r   rt   r   rs   rH   ru   rH   r   r   r   r   r   r   rY   r   )r   r9   r   r3   rY   r   )rY   rv   )r   rv   rY   r   )rY   r   )rY   r   )rY   r   )rY   r   )rY   rd   )rY   r]   )rY   r  )rY   r  )rY   r;   )rY   r4   )rY   r/   )rY   r	  )r  r   r  r  r  r  rY   r  )r  r/   r  r  r  r  r  r  rW   rX   r  r  r  r   rY   r?   )r  r/   r  r%  r  r  r  r  rW   rX   r  r  r  r   rY   r   )r  r/   r  rH   r  r  r  r  rW   r&  r  r  r  r   rY   r'  )rY   r   )rY   r  )r/  r7   r  rH   r  r  rY   r   )r  rH   rY   r   )rj   r  rk   r  r   r  rY   r   )
r
  r  rI   r  re   r   r  rH   rY   rA   )r  r  r  r  r  r  re   r   r  r  r  r  rY   rA   )r  rH   rY   r7  )r  rg   r  rg   rY   r   )r  r/   rY   r   )r  r/   r  r  r  r  r  r  r3  rH   rY   r   )r  r  r  r/   rY   r   )r  r  rY   r   )r  r4   rX  r  r  rm   rY   r   )r  r;   r  rm   rY   r   )r  r:   rY   r   )r	  r
  rY   r   )r  r  rY   r   )rY   r  )rY   r  )rY   r  )rY   r  )rY   r|   )r  r  r  r  r  r  rY   r   )@rM   rN   rO   r'   r   r   propertyr   setterr   r   r   r   r  r   r  r  r   r  r  r   r  r   r#  r  r-   r  r  r   log_exceptionsr#   r0  r  r  r  r  r  r  r  r  r~  r  r  r4  r5  r8  r  r`  r  r  r  r  r   r   r   r   r   r   r   r   r   r  r  __classcell__rR   rR   r   rS   r|      s6    h  
\#%<
C
 
		,$r|   )rW   rX   rY   rG   )o
__future__r   r   r   r   collections.abcr   r   
contextlibr   r   contextvarsr   dataclassesr   typesr	   typingr
   r   r   r   r   r   r   opentelemetryr   rX  r   typing_extensionsr   livekitr    r   r   r   r   r   r   r   _exceptionsr   rT  r   r   r    r!   logr#   	telemetryr$   r%   r&   r'   r(   r)   
utils.miscr*   r,   r-   _utilsr.   r  r/   agent_activityr0   r  r1   client_eventsr2   eventsr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   ivrr=   recorder_ior>   
run_resultr?   speech_handler@   rA   rB   rC   rD   rE   transcription.filtersrF   rG   rU   rQ   rV   r\   r]   rd   rv   rw   rx   r   r   EventEmitterr|   rR   rR   rR   rS   <module>   sv    $
$0
 