o
    is=                     @  s  d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	 d dl
mZmZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lmZmZmZ d d
lmZ d dlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddlm%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+m,Z,m-Z- ddl.m/Z/ eG dd de0eZ1eG dd dZ2eG dd dZ3eG dd dZ4eG dd dZ5G dd deZ6edZ7G d d! d!eej8ed" e7B  ee7 Z9G d#d$ d$eZ:e:Z;dS )%    )annotationsN)ABCabstractmethod)AsyncIterableAsyncIterator)	dataclassfield)Enumunique)TracebackType)GenericLiteralTypeVar)	BaseModel
ConfigDictField)rtc)Metadata   )APIConnectionErrorAPIError)logger)
STTMetrics)DEFAULT_API_CONNECT_OPTIONS	NOT_GIVENAPIConnectOptions
NotGivenOrTimedString)AudioBufferaiois_given)calculate_audio_durationc                   @  s.   e Zd ZdZ	 dZ	 dZ	 dZ	 dZ	 dZdS )SpeechEventTypestart_of_speechinterim_transcriptpreflight_transcriptfinal_transcriptrecognition_usageend_of_speechN)	__name__
__module____qualname__START_OF_SPEECHINTERIM_TRANSCRIPTPREFLIGHT_TRANSCRIPTFINAL_TRANSCRIPTRECOGNITION_USAGEEND_OF_SPEECH r2   r2   X/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/stt/stt.pyr"      s    r"   c                   @  sf   e Zd ZU ded< ded< dZded< dZded< dZded< d	Zd
ed< d	Zded< d	Z	ded< d	S )
SpeechDatastrlanguagetext        float
start_timeend_time
confidenceNz
str | None
speaker_idzbool | Noneis_primary_speakerzlist[TimedString] | Nonewords)
r)   r*   r+   __annotations__r:   r;   r<   r=   r>   r?   r2   r2   r2   r3   r4   4   s   
 r4   c                   @  s   e Zd ZU ded< dS )RecognitionUsager9   audio_durationN)r)   r*   r+   r@   r2   r2   r2   r3   rA   @   s   
 rA   c                   @  s@   e Zd ZU ded< dZded< eedZded< d	Zd
ed< d	S )SpeechEventr"   type r5   
request_id)default_factoryzlist[SpeechData]alternativesNzRecognitionUsage | Noner'   )	r)   r*   r+   r@   rF   r   listrH   r'   r2   r2   r2   r3   rC   F   s
   
 rC   c                   @  sB   e Zd ZU ded< ded< dZded< dZded< dZded	< d
S )STTCapabilitiesbool	streaminginterim_resultsFdiarizationzLiteral['word', 'chunk', False]aligned_transcriptToffline_recognizeN)r)   r*   r+   r@   rN   rO   rP   r2   r2   r2   r3   rJ   N   s   
 rJ   c                   @  sP   e Zd ZU eddZdZded< ded< ded	< ed
ddZded< ded< dS )STTErrorT)arbitrary_types_allowed	stt_errorzLiteral['stt_error']rD   r9   	timestampr5   label.)exclude	ExceptionerrorrK   recoverableN)	r)   r*   r+   r   model_configrD   r@   r   rX   r2   r2   r2   r3   rQ   X   s   
 
rQ   TEventc                      s   e Zd Zd4 fddZed5dd	Zed5d
dZed5ddZed6ddZe	e
dd7ddZe
edd7ddZd8d!d"Ze
edd9d$d%Zd:d&d'Zd;d(d)Zd<d0d1Zd:d2d3Z  ZS )=STTcapabilitiesrJ   returnNonec                  s6   t    || _t| j dt| j | _d| _d S )N.T)super__init___capabilitiesrD   r*   r)   _label_recognize_metrics_needed)selfr]   	__class__r2   r3   rb   i   s   

zSTT.__init__r5   c                 C     | j S N)rd   rf   r2   r2   r3   rU   o      z	STT.labelc                 C     dS )zGet the model name/identifier for this STT instance.

        Returns:
            The model name if available, "unknown" otherwise.

        Note:
            Plugins should override this property to provide their model information.
        unknownr2   rk   r2   r2   r3   models      
z	STT.modelc                 C  rm   )zGet the provider name/identifier for this STT instance.

        Returns:
            The provider name if available, "unknown" otherwise.

        Note:
            Plugins should override this property to provide their provider information.
        rn   r2   rk   r2   r2   r3   provider   rp   zSTT.providerc                 C  ri   rj   )rc   rk   r2   r2   r3   r]      rl   zSTT.capabilities)r6   bufferr   r6   NotGivenOr[str]conn_optionsr   rC   c                     d S rj   r2   )rf   rr   r6   rt   r2   r2   r3   _recognize_impl   s   zSTT._recognize_implr6   rt   c                  sx  t |jd D ]}z:t }| j|||dI d H }| jr@t | }t|jt || jt	|dt
| j| jdd}| d| |W   S  ty }	 zT||}
|jdkr^| j|	dd  ||jkrv| j|	dd td	|jd  d
|	| j|	dd tjd|	 d|
 d| j|d ddd t|
I d H  W Y d }	~	qd }	~	w ty }	 z| j|	dd  d }	~	ww td)N   rw   F
model_namemodel_providerrF   rT   durationrU   rB   streamedmetadatametrics_collectedr   rY   !failed to recognize speech after 	 attemptsTfailed to recognize speech: , retrying in ssttattemptr~   extraunreachable)range	max_retrytimeperf_counterrv   re   r   rF   rd   r!   r   ro   rq   emitr   _interval_for_retry_emit_errorr   r   warningasynciosleeprW   RuntimeError)rf   rr   r6   rt   ir:   eventr}   stt_metricseretry_intervalr2   r2   r3   	recognize   sf   



	zSTT.recognize	api_errorrW   rY   rK   c              	   C  s"   |  dtt | j||d d S NrX   )rT   rU   rX   rY   )r   rQ   r   rd   rf   r   rY   r2   r2   r3   r      s   zSTT._emit_errorRecognizeStreamc                C  s   t d)NzYstreaming is not supported by this STT, please use a different STT or use a StreamAdapter)NotImplementedError)rf   r6   rt   r2   r2   r3   stream   s   z
STT.streamc                   s   dS )z;Close the STT, and every stream/requests associated with itNr2   rk   r2   r2   r3   aclose   s   z
STT.aclosec                      | S rj   r2   rk   r2   r2   r3   
__aenter__      zSTT.__aenter__exc_typetype[BaseException] | NoneexcBaseException | Noneexc_tbTracebackType | Nonec                      |   I d H  d S rj   r   rf   r   r   r   r2   r2   r3   	__aexit__      zSTT.__aexit__c                 C  rm   )z&Pre-warm connection to the STT serviceNr2   rk   r2   r2   r3   prewarm   s   zSTT.prewarm)r]   rJ   r^   r_   )r^   r5   )r^   rJ   )rr   r   r6   rs   rt   r   r^   rC   r   rW   rY   rK   r^   r_   )r6   rs   rt   r   r^   r   r^   r_   )r^   r\   r   r   r   r   r   r   r^   r_   )r)   r*   r+   rb   propertyrU   ro   rq   r]   r   r   rv   r   r   r   r   r   r   r   r   __classcell__r2   r2   rg   r3   r\   d   s0    
;



r\   )r   rX   c                   @  s   e Zd ZG dd dZedd@d
dZedAddZejdBddZe	dCddZ
dCddZdDddZdEdd ZdFd#d$ZdCd%d&ZdCd'd(ZdCd)d*ZdGd,d-ZdHd/d0ZdCd1d2ZdCd3d4ZdId5d6ZdJd=d>Zd?S )Kr   c                   @  s   e Zd ZdZdS )zRecognizeStream._FlushSentinelz$Sentinel to mark when it was flushedN)r)   r*   r+   __doc__r2   r2   r2   r3   _FlushSentinel   s    r   )sample_rater   r\   rt   r   r   NotGivenOr[int]c                  s   | _ | _tjtjtjB    _tjt	   _
tj j
d _ j\ _}tj |dd _d _t   _ j fdd t|rM|nd _d _d _d _dS )	ap  
        Args:
        sample_rate : int or None, optional
            The desired sample rate for the audio input.
            If specified, the audio input will be automatically resampled to match
            the given sample rate before being processed for Speech-to-Text.
            If not provided (None), the input will retain its original sample rate.
        r   zSTT._metrics_task)namer   c                   s
    j  S rj   )	_event_chclose)_rk   r2   r3   <lambda>  s   
 z*RecognizeStream.__init__.<locals>.<lambda>Nr8   )_stt_conn_optionsr   Chanr   
AudioFramer   r   	_input_chrC   r   	itertoolstee_tee_event_aiterr   create_task_metrics_monitor_task_metrics_task_num_retries
_main_task_taskadd_done_callbackr    
_needed_sr
_pushed_sr
_resampler_start_time_offset)rf   r   rt   r   monitor_aiterr2   rk   r3   rb     s    

zRecognizeStream.__init__r^   r9   c                 C  ri   rj   )r   rk   r2   r2   r3   start_time_offset&  rl   z!RecognizeStream.start_time_offsetvaluer_   c                 C  s   |dk rt d|| _d S )Nr   z&start_time_offset must be non-negative)
ValueErrorr   )rf   r   r2   r2   r3   r   *  s   
c                   ru   rj   r2   rk   r2   r2   r3   _run0  r   zRecognizeStream._runc              
     sP  | j j}t }| j|krz|  jt | 7  _t }|  I d H W S  ty } zZ|dkr9| j|dd  | j|krO| j|dd td| j d|| j|dd | j 	| j}t
jd| d| d	| jj| jdd
d t|I d H  |  jd7  _W Y d }~nd }~w ty } z| j|dd  d }~ww | j|ksd S d S )Nr   Fr   r   r   Tr   r   r   r   r   rx   )r   r   r   r   r   r   r   r   r   r   r   r   r   rd   r   r   rW   )rf   max_retrieslast_start_timer   r   r2   r2   r3   r   3  sJ   

zRecognizeStream._main_taskr   rW   rY   rK   c              	   C  s&   | j dtt | j j||d d S r   )r   r   rQ   r   rd   r   r2   r2   r3   r   [  s   zRecognizeStream._emit_errorevent_aiterAsyncIterable[SpeechEvent]c                   s   |2 z@3 dH W }|j tjkr:|jdusJ dt|jt d| jj|jj	dt
| jj| jjdd}| jd| q|j tjkrCd| _q6 dS )	zTask used to collect metricsNz>recognition_usage must be provided for RECOGNITION_USAGE eventr8   Try   r|   r   r   )rD   r"   r0   r'   r   rF   r   r   rd   rB   r   ro   rq   r   r/   r   )rf   r   evr   r2   r2   r3   r   f  s,   z%RecognizeStream._metrics_monitor_taskframertc.AudioFramec                 C  s   |    |   | jr| j|jkrtd|j| _| jr2| j|jkr2| js2tj|j| jtj	j
d| _| jrH| j|}|D ]}| j| q=dS | j| dS )zPush audio to be recognizedz6the sample rate of the input frames must be consistent)qualityN)_check_input_not_ended_check_not_closedr   r   r   r   r   r   AudioResamplerAudioResamplerQualityHIGHpushr   send_nowait)rf   r   framesr2   r2   r3   
push_frame  s$   zRecognizeStream.push_framec                 C  sF   |    |   | jr| j D ]}| j| q| j|   dS )z#Mark the end of the current segmentN)r   r   r   flushr   r   r   )rf   r   r2   r2   r3   r     s   zRecognizeStream.flushc                 C  s   |    | j  dS )z3Mark the end of input, no more audio will be pushedN)r   r   r   rk   r2   r2   r3   	end_input  s   zRecognizeStream.end_inputc                   sN   | j   t| jI dH  | jdurt| jI dH  | j I dH  dS )zClose ths stream immediatelyN)r   r   r   cancel_and_waitr   r   r   r   rk   r2   r2   r3   r     s   

zRecognizeStream.acloserC   c                   sJ   z| j  I d H }W |S  ty$   | j s!| j  }r!|td w rj   )r   	__anext__StopAsyncIterationr   	cancelled	exception)rf   valr   r2   r2   r3   r     s   zRecognizeStream.__anext__AsyncIterator[SpeechEvent]c                 C  s   | S rj   r2   rk   r2   r2   r3   	__aiter__  s   zRecognizeStream.__aiter__c                 C  ,   | j jrt| }t|j d|j dd S )Nr`   z
 is closed)r   closedrD   r   r*   r)   rf   clsr2   r2   r3   r        z!RecognizeStream._check_not_closedc                 C  r   )Nr`   z input ended)r   r   rD   r   r*   r)   r   r2   r2   r3   r     r   z&RecognizeStream._check_input_not_endedc                   r   rj   r2   rk   r2   r2   r3   r     r   zRecognizeStream.__aenter__r   r   r   r   r   r   c                   r   rj   r   r   r2   r2   r3   r     r   zRecognizeStream.__aexit__N)r   r\   rt   r   r   r   )r^   r9   )r   r9   r^   r_   r   r   )r   r   r^   r_   )r   r   r^   r_   )r^   rC   )r^   r   )r^   r   r   )r)   r*   r+   r   r   rb   r   r   setterr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r2   r2   r2   r3   r      s.    
$

(










r   )<
__future__r   r   r   abcr   r   collections.abcr   r   dataclassesr   r   enumr	   r
   typesr   typingr   r   r   pydanticr   r   r   livekitr   livekit.agents.metrics.baser   _exceptionsr   r   logr   metricsr   r   r   r   r   r   utilsr   r   r    utils.audior!   r5   r"   r4   rA   rC   rJ   rQ   r[   EventEmitterr\   r   SpeechStreamr2   r2   r2   r3   <module>   sN    		
  \