o
    H)j3                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	 d dl
Z
ddlmZmZ ddlmZ ddlmZ eeZdZeed	d
ZeeddZeeddZeeddZeedppedppedppdZedd  Z eeddZ!eeddZ"eeddZ#edd  dv Z$G dd  d e%Z&d@d%d&Z'dAd*d+Z(dBd/d0Z)dCd4d5Z*dDd8d9Z+dEd<d=Z,G d>d? d?eZ-dS )F    )annotationsN)AnyDictList   )BaseSTT	STTResult)SarvamConcurrencySlot)wav_duration_seconds   SARVAM_BATCH_RETRIES5SARVAM_RATE_LIMIT_RETRIES8!SARVAM_RATE_LIMIT_BACKOFF_SECONDSSARVAM_POLL_INTERVAL_SECONDS10SARVAM_JOB_TIMEOUT_SECONDSSARVAM_JOB_TIMEOUT_SSARVAM_PENDING_TIMEOUT_S900SARVAM_STT_MODEbatchSARVAM_MIN_AUDIO_SECONDSz0.5SARVAM_SYNC_CHUNK_SECONDS25SARVAM_MIN_DIARIZATION_SEGMENTS4SARVAM_ACCEPT_WEAK_DIARIZATION1)r   trueyesc                   @  s   e Zd ZdZdS )DiarizationQualityErrorzPRaised when STT result looks like fake chunk-based labels, not real diarization.N)__name__
__module____qualname____doc__ r'   r'   A/home/aiteam/pcaa-dev/call-proccessing/stt_pipeline/stt/sarvam.pyr"   %   s    r"   
speaker_idstrreturn
int | Nonec              	   C  s0   zt t| ddW S  ttfy   Y d S w )Nspeaker_ )intr*   replace	TypeError
ValueErrorr)   r'   r'   r(   _speaker_numeric_id)   s
   r4   segmentsList[Dict[str, Any]]Nonec                 C  sx   dd | D }dd |D }|sdS dd t tt|D }| D ]}t|dd}|du r0q!d	||  |d
< q!dS )uH   Show Speaker 1/2/… in the UI regardless of Sarvam's 0- or 1-based IDs.c                 S  s   g | ]
}t |d dqS r)   r.   r4   get.0segr'   r'   r(   
<listcomp>2       z1_apply_display_speaker_labels.<locals>.<listcomp>c                 S     g | ]}|d ur|qS Nr'   r<   nr'   r'   r(   r>   3       Nc                 S  s   i | ]	\}}||d  qS )r   r'   )r<   irC   r'   r'   r(   
<dictcomp>6   s    z1_apply_display_speaker_labels.<locals>.<dictcomp>r)   r.   Speaker speaker)	enumeratesortedsetr4   r:   )r5   numsid_to_displayr=   rC   r'   r'   r(   _apply_display_speaker_labels0   s   rN   resultDict[str, Any]r   c                 C  sX  |  dpd}|  dpi }t|tr| dg ng }g }|D ]I}t| dd}|dr0|nd| }|dd}| dp@d }	|	sFqt| dpMd	}
t| d
pVd	}|d| ||	|
||
|d q|rdd |D }dd |D }|rt	|nd	}|D ]}t
|d }||krdnd|d< qt| |r|d d nd}t|||ddS )N
transcriptr.   diarized_transcriptentriesr)   0r-   start_time_secondsr   end_time_secondsrG   )rH   r)   textstartend
start_timeend_timec                 S  s   g | ]}t |d  qS r3   )r4   r;   r'   r'   r(   r>   W   rD   z(_parse_sarvam_result.<locals>.<listcomp>c                 S  r@   rA   r'   rB   r'   r'   r(   r>   X   rD   agentcustomerrolerY   g        sarvam)rQ   speaker_segmentsdurationprovider)r:   
isinstancedictr*   
startswithr0   stripfloatappendminr4   rN   r   )rO   transcript_textdiarizedrS   ra   entryraw_idr)   numrW   rX   rY   rL   min_numr=   rC   rb   r'   r'   r(   _parse_sarvam_result>   sJ   

rq   exc	Exceptionboolc                 C  s4   t |  }d|v pd|v pd|v pt| dd dkS )N429z
rate limit
rate_limitstatus_codei  )r*   lowergetattr)rr   msgr'   r'   r(   _is_rate_limit_errorh   s   r{   rb   rh   c           	      C  s   |t ks
t| dk rdS tdt|t  d }t| |d kr!dS d}d}| D ]F}t|dp5|dp5d}t|dpC|d	pC|}|| }t|t  d
k s_t|t|t  t   d
k rc|d7 }|t d krm|d7 }q'|t| d ko}|t| d kS )uX   Detect fake diarization from 25s chunked sync (starts at 0, 25, 50… with long blocks).   Fg+?r   r   rX   rZ   rY   r[   g      ?   g      ?g      ?)_CHUNK_SECONDSlenmaxr/   rh   r:   absround)	r5   rb   expected_chunksalignedlong_blocksr=   rX   rY   	block_lenr'   r'   r(    _looks_like_chunk_aligned_blocksr   s"   , r   audio_durationcallidc                 C  s   | j pg }| jp	d std| d|tk rdS |s(td| d|ddt||r>td| dt| d	|dd|d
krYt|tk rYtd| dt| d|dddd |D }|d |dkr{t|dk r}td| d|dddS dS )z;Reject transcripts that lack real Sarvam batch diarization.r.   [z] Empty transcriptNz ] No speaker segments (duration=z.1fs)z"] Chunk-aligned fake diarization (z blocks, duration=-   z] Too few speaker segments (z) for u&   s audio — likely missing diarizationc                 S     h | ]
}t |d dqS r8   r9   r<   sr'   r'   r(   	<setcomp>   r?   z'validate_diarization.<locals>.<setcomp>   r|   z ] Only one speaker detected for zs call)	ra   rQ   rg   r"   _MIN_AUDIO_SECONDSr   r   _MIN_SEGMENTS_LONG_CALLdiscard)rO   r   r   r5   speaker_idsr'   r'   r(   validate_diarization   s8   


r   c                   @  sL   e Zd ZdddZedddZdddZdddZdddZdddZ	dS )	SarvamSTTapi_keyr*   c                 C  s   |st d|| _d S )Nz#SARVAM_SUBSCRIPTION_KEY is required)r2   _api_key)selfr   r'   r'   r(   __init__   s   
zSarvamSTT.__init__r+   c                 C  s   dS )Nr`   r'   )r   r'   r'   r(   provider_name   s   zSarvamSTT.provider_name	audio_urlr   r   c                 C  sR   ddl m} || jd}t| | |||W  d    S 1 s"w   Y  d S )Nr   )SarvamAI)api_subscription_key)sarvamair   r   r	   _transcribe_locked)r   r   r   r   clientr'   r'   r(   
transcribe   s
   
$zSarvamSTT.transcribetmp_pathc                 C  s  d }t tD ]G}z|jjddddd}||grW  n2W q tyM } z$dt|v rB|td k rBtd||d  t	
d d }n W Y d }~qd }~ww |sXtd	t d
|  td||j |jttd}| st|dd p|t|dd p||}td| dd}	|jr|jD ]}
|
jdkr|
jr|
jd j}	 nq|jj|j|	gd}d }|jr|	|jv r|j|	 j}|stdtj|dd}|jdkrtd|j td| t| S )Nzsaaras:v2.5Tr|   zTranslate all speech to English)modelwith_diarizationnum_speakersprompt403r   u0   [%s] Upload attempt %d failed (403), retrying…zAll z upload attempts failedu=   [%s] Sarvam batch job %s started (diarization=on), waiting…)poll_intervaltimeout	job_statestatez#Sarvam job did not succeed (status=)z0.jsonSuccessr   )job_idfilesz$No download URL in Sarvam job resultx   r      zResult download failed: HTTP z+[%s] Batch transcription download complete.)range_MAX_UPLOAD_RETRIESspeech_to_text_translate_job
create_jobupload_filesRuntimeErrorr*   loggerwarningtimesleeprX   infor   wait_until_complete_POLL_INTERVAL_S_JOB_TIMEOUT_Sis_successfulry   job_detailsr   outputs	file_nameget_download_linksdownload_urlsfile_urlrequestsr:   rw   rq   json)r   r   r   r   jobattemptrr   statusr   output_filedetaillinksdl_urlresult_respr'   r'   r(   _transcribe_batch_once   sb   


z SarvamSTT._transcribe_batch_oncer   rh   c                 C  s  d }t tt}t|D ]}zZ| |||}zt||| W n+ tyH }	 ztr=|jp+d	 r=t
d||t|jp8g |	 n W Y d }	~	nd }	~	ww t
d|t|j|jpU|tdd |jD d h  |W   S  tyo     ty }
 zR|
}t|
r||d k rtdtd|  }t
d	||d || t| W Y d }
~
q||d k rtd
d|d  }t
d||d ||
| t| W Y d }
~
q d }
~
ww |r|td)Nr.   z=[%s] Weak diarization accepted (%.1fs audio, %s segments): %sz6[%s] Diarization OK: %s segments, %.1fs, %s speaker(s)c                 S  r   r8   r9   r   r'   r'   r(   r     r?   z.SarvamSTT._transcribe_batch.<locals>.<setcomp>r   g     V@r|   u;   [%s] Batch rate limit (attempt %s/%s), retrying in %.0fs…g      >@r}   u8   [%s] Batch failed (attempt %s/%s): %s — retry in %.0fsz!Sarvam batch transcription failed)r   _MAX_BATCH_RETRIES_MAX_RATE_LIMIT_RETRIESr   r   r   r"   _ACCEPT_WEAK_DIARIZATIONrQ   rg   r   r   r   ra   r   rb   rs   r{   rj   _RATE_LIMIT_BACKOFF_Sr   r   r   )r   r   r   r   r   last_excattemptsr   rO   diar_excrr   wait_sr'   r'   r(   _transcribe_batch   sz   



zSarvamSTT._transcribe_batchc              	   C  s  d }zwt d|t t d|| tj|dd}|jdkr%td|j tjddd	d
}|	|j
 |j}W d    n1 sAw   Y  t|}|tk rWtd|ddtdkrbt d|t | ||||W |rxtj|ryt| S S S |rtj|rt| w w w )Nz3[%s] Sarvam STT mode=%s (diarization via batch API)z[%s] Downloading audio: %sr   r   r   zAudio download failed: HTTP z.wavFz/tmp)suffixdeletedirzAudio too short or empty (z.2fr   r   uA   [%s] SARVAM_STT_MODE=%s ignored — batch+diarization is required)r   r   	_STT_MODEr   r:   rw   r   tempfileNamedTemporaryFilewritecontentnamer
   r   r   r   ospathexistsunlink)r   r   r   r   r   resptmprb   r'   r'   r(   r   &  s4   
zSarvamSTT._transcribe_lockedN)r   r*   )r+   r*   )r   r*   r   r*   r+   r   )r   r*   r   r*   r+   r   )r   r*   r   r*   r   rh   r+   r   )
r#   r$   r%   r   propertyr   r   r   r   r   r'   r'   r'   r(   r      s    



2<r   )r)   r*   r+   r,   )r5   r6   r+   r7   )rO   rP   r+   r   )rr   rs   r+   rt   )r5   r6   rb   rh   r+   rt   )rO   r   r   rh   r   r*   r+   r7   ).
__future__r   loggingr   r   r   typingr   r   r   r   baser   r   concurrencyr	   audio_chunkerr
   	getLoggerr#   r   r   r/   getenvr   r   rh   r   r   r   rg   rx   r   r   r~   r   r   r   r"   r4   rN   rq   r{   r   r   r   r'   r'   r'   r(   <module>   sJ    





*


"