o
    di=                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlZeejj e  ejejd e dZdZd	d
 ZdddZdd Zdd Zdd Zdd ZdS )    N)upload_to_azure_blob)send_openai_analysis)get_db_connection)load_dotenv)levelSARVAM_SUBSCRIPTION_KEYzstorage/audio/c              
   C   s  z|  di }| dg }|s|  ddg dddW S g }t }|D ]M}| dd}|| |d	d
 }||| dd| dd| ddd}t|dkrfd| v p\d| v }	|	radnd|d< nd|d< || q"|r{tdd |D nd}
d
	dd |D }t
dt| dt| d|
dd |p|  dd|t|t|
ddW S  ty } zt
d|  |  ddg dddW  Y d}~S d}~ww )a5  
    Parse Sarvam API response with diarization

    Returns:
        dict with:
        - full_transcript: Complete transcript text
        - speaker_segments: List of segments with speaker, text, timestamps
        - num_speakers: Number of unique speakers
        - duration: Total duration in seconds
    diarized_transcriptentries
transcript r   )full_transcriptspeaker_segmentsnum_speakersduration
speaker_idunknown_ start_time_secondsend_time_seconds)speakerr   text
start_timeend_time   	speaker_0	speaker_1agentcustomerroleNc                 S      g | ]}|d  qS )r    .0sr!   r!   </home/aiteam/pcaa-dev/post call analysis/sarvam_processor.py
<listcomp>U       z-parse_diarized_transcript.<locals>.<listcomp>c                 S   r    )r   r!   r"   r!   r!   r%   r&   X   r'   u   ✅ Parsed diarization: z speakers, z segments, z.1fr$   u'   ❌ Error parsing diarized transcript: )getsetaddreplacetitlelenlowerappendmaxjoinlogginginforound	Exceptionerror)resultdiarized_datar	   r   speakers_setentryr   speaker_namesegmentis_agentr   r   er!   r!   r%   parse_diarized_transcript   sV   





(
r?   c                 C   s   t  }|tjj}| r|  d}|  d}nd}d}|d| d | }|D ]H}z#|d }|d }tj	t
d	| d
}	t|}
|
jdkrZtd| d|
j  W q*t|	d}||
j W d    n1 spw   Y  tj|	std|  W q*t }|std|  W q*|d }|d }|d dd dd tj|	 d |d dd  }t||	std|  W q*t|std|  W q*t||}|std|  W q*t|}|d| d||d t|d |d |d  ||d!d"t|df	 |  t d#|  t!||  zt"|	 t d$|  W n t#yO } zt$d%|	 d&|  W Y d }~nd }~ww W q* t#ys } ztd'|d  d&t|  W Y d }~q*d }~ww |%  |%  d(d)iS )*N_calls_sarvamresponse
7987_calls7987_sarvamresponseSELECT * FROM z WHERE status = 0callidfileUrl
translate_.wav   '   ❌ Failed to download file for callid . Status code: wb+   ❌ File missing after download for callid /   ❌ Failed to initialize Sarvam job for callid job_idoutput_storage_pathinput_storage_path?r   /   .   ❌ Failed to upload file to Azure for callid )   ❌ Failed to start Sarvam job for jobid #   ❌ Failed to get result for jobid z
                INSERT INTO z
                (callid, transcript, speaker_segments, num_speakers, duration, request_id, language, raw_response, status, created_at, updated_at)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
            r   r   r   r   language_coder   u.   ✅ Translated transcript inserted for callid u#   🧹 Removed local file for callid u   ⚠️ Could not delete file :    ❌ Exception for callid messagezTranslation batch completed)&r   cursorpymysqlcursors
DictCursorexecutefetchallospathr1   	AUDIO_DIRrequestsr(   status_coder2   r6   openwritecontentexistsinit_sarvam_jobsplitrstripbasenamer   start_sarvam_jobpoll_sarvam_statusr?   jsondumpsstrcommitr3   r   remover5   warningclose)bidconnr\   calls_tablesarvam_response_tablecallscallcall_idfile_url
local_fileresponsefjobrO   
output_url	azure_urlr7   r8   r>   r!   r!   r%   send_to_sarvamn   s   



>




"*r   c               
   C   s   t dd} z#tjd| i dd}|jdkr| W S td|j d|j  W d S  tjj	yG } ztd	t
|  W Y d }~d S d }~ww )
Napplication/jsonAPI-Subscription-KeyzContent-Typez7https://api.sarvam.ai/speech-to-text-translate/job/initTheadersrq   verify   u   ❌ Job init failed:  - u'   ❌ Request exception during job init: )SUBSCRIPTION_KEYre   postrf   rq   r2   r6   r   
exceptionsRequestExceptionrs   )r   r   r>   r!   r!   r%   rk      s   

rk   c              
   C   s   t dd}| ddid}z!tjd||dd}|jdkrW dS td	|j d
|j  W dS  tjjyL } ztdt	|  W Y d }~dS d }~ww )Nr   r   with_diarizationT)rO   job_parametersz2https://api.sarvam.ai/speech-to-text-translate/jobr   rI   u    ❌ Failed to start Sarvam job: r   Fu(   ❌ Request exception during job start: )
r   re   r   rf   r2   r6   r   r   r   rs   )rO   r   datar   r>   r!   r!   r%   ro      s"   
ro   c                 C   s  dt i}tdD ]<}td ztjd|  d|dd}|jdkr| }|d	r\t|d	 d
kr\t	
d|  d|d  d|d	 d
 d   |d dkoZ|d	 d
 d dk}nt	
d|  d|d  d |d dk}|rt	
d|  d td d|v r|dd\}}|d d| }	n|dd }	tj|	dd}
t	
d|
 d|  |
jdkr|
jrz|
 }t	
d|  d|  |W W   S  tjy   t	d|  d |
j  Y W  d S w t	d!|  d"|
j  W  d S t	d#|  d$|
j  W  d S t	
d%|  d&|d   nt	d'|  d(|j  W q tjjyE } zt	d)|  d|  W Y d }~ d S d }~ww t	d*|   d S )+Nr   x   
   z3https://api.sarvam.ai/speech-to-text-translate/job/z/statusT)r   r   rI   job_detailsr   zJob z state: 	job_stater   state	CompletedSuccessz - no job details availableu   ✅ Job z+ completed successfully, fetching result...rR   rT   rS   z/0.json?z/0.json)r   u   ✅ Final response job :u   ✅ Final JSON for job rY   u"   ❌ Failed to decode JSON for job z. Raw response: u   ❌ Empty response for job z. Response: u2   ❌ Failed to get content from output URL for job z. Response status: u   ❌ Job z5 not completed or failed, retrying... Current state: u'   ❌ Failed to check job status for job rK   u/   ❌ Exception while polling job status for job u   ❌ Timeout polling job )r   rangetimesleepre   r(   rf   rq   r-   r2   r3   rl   rm   r   JSONDecodeErrorr6   r   r   rv   )rO   r   r   attemptr   r   job_successbase_urlquery	final_urlfinal_responseparsedr>   r!   r!   r%   rp      sZ   
,"


rp   c                 C   s  t  }|tjj}| d}| d}|d| d| f | }|s>td|  d| d| d |	  |	  d	S t
|trH|d
nd }|dkrjtd|  d| d| d| d	 |	  |	  d	S zzY|d }tjtd|  d}	t|}
|
jdkrtd|  d|
j  W W |	  |	  d	S t|	d}||
j W d    n1 sw   Y  tj|	std|   W W |	  |	  d	S t }|std|   W W |	  |	  d	S |d }|d }|d dd dd tj|	 d |d dd  }t||	s.td|   W W |	  |	  d	S t|sGtd|  W W |	  |	  d	S t||}|sctd |  W W |	  |	  d	S t|}|d!| d"| |d# t !|d$ |d% |d& ||d'd(t"|df	 |d)| d*| f |#  t$d+|   t%| | tj|	rt&|	 t$d,|   W W |	  |	  d-S  t'y } ztd.|  d/t"|  W Y d }~W |	  |	  d	S d }~ww |	  |	  w )0Nr@   rA   rD   z WHERE callid = %su   ⚠️ Call ID z not found in table z (bid=z).Fstatusr   z found in table z) but status=z, expected 0. Skipping.rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   z
            INSERT INTO z
            (callid, transcript, speaker_segments, num_speakers, duration, request_id, language, raw_response, status, created_at, updated_at)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
        r   r   r   r   rX   r   zUPDATE z! SET status = 1 WHERE callid = %suA   ✅ Translated transcript inserted and status updated for callid u)   🧹 Deleted local audio file for callid TrZ   rY   )(r   r\   r]   r^   r_   r`   fetchoner2   rv   rw   
isinstancedictr(   rb   rc   r1   rd   re   rf   r6   rg   rh   ri   rj   rk   rl   rm   rn   r   ro   rp   r?   rq   rr   rs   rt   r3   r   ru   r5   )r~   rx   ry   r\   rz   r{   r}   r   r   r   r   r   r   rO   r   r   r7   r8   r>   r!   r!   r%   process_call_id  s   



D=8>0
,
'



r   )N)rb   re   r2   r   r]   rq   azure_uploadr   openai_helperr   	db_configr   dotenvr   urllib3disable_warningsr   InsecureRequestWarningbasicConfigINFOgetenvr   rd   r?   r   rk   ro   rp   r   r!   r!   r!   r%   <module>   s,   	

OY4