o
    .-iq&                     @   sN   d Z ddlZddlmZ ddlmZ G dd dZG dd dZed	d
Z	dS )z
Voice Enhancement Service for ElevenLabs VAD
Filters background noise and enhances human voice frequencies
to improve VAD accuracy and reduce false positives from background noise.

Uses only numpy (no scipy) for maximum compatibility.
    N)Tuple)Logc                	   @   sB   e Zd ZdZddedededefddZd	ejd
ejfddZ	dS )BandpassFilterz;Simple IIR bandpass filter implementation using numpy only.   low_freq	high_freqsample_rateorderc                 C   sJ   || _ || _|| _tdtj | | | _tdtj | | | _dS )z
        Initialize bandpass filter.
        
        Args:
            low_freq: Low cutoff frequency in Hz
            high_freq: High cutoff frequency in Hz
            sample_rate: Sample_rate in Hz
            order: Filter order (default 4)
        g       N)r   r   r   npexppi	low_alpha
high_alpha)selfr   r   r   r	    r   T/home/vmc/milan/call_management/calls/homebook/services/voice_enhancement_service.py__init__   s
   
zBandpassFilter.__init__audioreturnc                 C   s   t |}tdt|D ]}| j||d  ||  ||d    ||< qt |}tdt|D ]}d| j ||  | j||d    ||< q0|S )z
        Apply bandpass filter to audio.
        
        Args:
            audio: Input audio signal
            
        Returns:
            Filtered audio signal
           )r
   copyrangelenr   r   )r   r   Zhigh_passediZ
low_passedr   r   r   apply#   s   
,
*zBandpassFilter.applyN)r   )
__name__
__module____qualname____doc__floatintr   r
   ndarrayr   r   r   r   r   r      s    r   c                   @   s   e Zd ZdZddefddZdejdefdd	Z	dejdefd
dZ
dejdejfddZ		d dejdededejfddZdejdefddZ		d!dedededeeef fddZdd ZdS )"VoiceEnhancementServicea  
    Advanced audio preprocessing to enhance human voice and suppress background noise.
    
    Human voice characteristics:
    - Fundamental frequency: 85-255 Hz (male), 165-255 Hz (female)
    - Formants (resonances): 300-3400 Hz
    - Most energy: 100-4000 Hz
    
    Background noise typically:
    - Low frequency rumble: < 100 Hz
    - High frequency hiss: > 4000 Hz
    - Random/non-periodic patterns
    >  r   c                 C   sx   || _ d| _d| _t| j| j|| _d| _d| _d| _d| _t	
d t	
d| d	 t	
d
| j d| j d	 dS )z
        Initialize voice enhancement service.
        
        Args:
            sample_rate: Audio sample rate in Hz (default 16000 for ElevenLabs)
        d   i  Ng\(\?g{Gz?333333?u:   🎙️ Voice Enhancement Service initialized (numpy-only)z   Sample Rate: z Hzz   Voice Band: -)r   Zvoice_low_freqZvoice_high_freqr   bandpass_filternoise_floor_estimatenoise_floor_alphaenergy_thresholdzero_crossing_rate_thresholdr   info)r   r   r   r   r   r   I   s   
 z VoiceEnhancementService.__init__r   r   c                 C   s   t t |d S )z,Calculate normalized energy of audio signal.   )r
   sqrtmean)r   r   r   r   r   _calculate_energyi   s   z)VoiceEnhancementService._calculate_energyc              	   C   s,   t t t t |d }|t| S )zz
        Calculate zero crossing rate (ZCR).
        Voice has moderate ZCR, noise has very high or very low ZCR.
        r-   )r
   sumabsdiffsignr   )r   r   Zzero_crossingsr   r   r   _calculate_zero_crossing_ratem   s    z5VoiceEnhancementService._calculate_zero_crossing_ratec                 C   s   t j|}t |}t |}| jdu r|| _n| j| j d| j |  | _|d| j  }t |d| }|t d|  }t jj	|t
|d}|S )z
        Apply spectral subtraction to reduce stationary background noise.
        
        Args:
            audio: Input audio signal
            
        Returns:
            Noise-reduced audio signal
        Nr   g       @皙?y              ?)n)r
   fftrfftr2   angler(   r)   maximumr   irfftr   )r   r   Zspectrum	magnitudeZphaseZclean_magnitudeZclean_spectrumZclean_audior   r   r   _spectral_subtractionu   s   



z-VoiceEnhancementService._spectral_subtractionr%         @	thresholdratioc                 C   sF   t |}t |}||k}t || ||| | |   ||< |S )ad  
        Apply dynamic range compression to make quiet speech louder
        while preventing loud sounds from clipping.
        
        Args:
            audio: Input audio signal
            threshold: Compression threshold (0-1)
            ratio: Compression ratio (e.g., 4:1)
            
        Returns:
            Compressed audio signal
        )r
   r2   r   r4   )r   r   r@   rA   Z	abs_audio
compressedmaskr   r   r    _apply_dynamic_range_compression   s   

z8VoiceEnhancementService._apply_dynamic_range_compressionc                 C   s@   |  |}| |}|| jkod|  k o| jk }|S   }|S )z
        Determine if audio chunk contains voice activity.
        
        Args:
            audio: Input audio signal (normalized -1 to 1)
            
        Returns:
            True if voice activity detected, False otherwise
        r6   )r0   r5   r*   r+   )r   r   ZenergyZzcris_voicer   r   r   is_voice_activity   s   



z)VoiceEnhancementService.is_voice_activityTaudio_bytesapply_spectral_subtractionapply_compressionc              
   C   s   z`t j|t jdt j}|d }| |}|s,t |}|d t j }|dfW S | j	|}|r9| 
|}|r@| |}t t |}|dkrR|d|  }|d t j}	|	 dfW S  ty~ }
 ztd|
  |dfW  Y d}
~
S d}
~
ww )a  
        Enhance voice in audio by filtering noise and boosting speech frequencies.
        
        Args:
            audio_bytes: Raw PCM audio bytes (16-bit signed)
            apply_spectral_subtraction: Whether to apply spectral subtraction
            apply_compression: Whether to apply dynamic range compression
            
        Returns:
            Tuple of (enhanced_audio_bytes, is_voice_detected)
        )dtypeg      @Fgffffff?Tu   ❌ Voice enhancement failed: N)r
   
frombufferint16astypefloat32rF   
zeros_liketobytesr'   r   r>   rD   maxr2   	Exceptionr   error)r   rG   rH   rI   r   rE   ZsilenceZsilence_bytesmax_valZaudio_int16er   r   r   enhance_voice   s.   




z%VoiceEnhancementService.enhance_voicec                 C   s   d| _ td dS )zFReset the noise floor estimate (useful when call environment changes).Nu   🔄 Noise floor estimate reset)r(   r   debug)r   r   r   r   reset_noise_floor  s   z)VoiceEnhancementService.reset_noise_floorN)r#   )r%   r?   )TT)r   r   r   r   r    r   r
   r!   r   r0   r5   r>   rD   boolrF   bytesr   rV   rX   r   r   r   r   r"   :   s4     &



8r"   r#   )r   )
r   numpyr
   typingr   services.log_utilsr   r   r"   voice_enhancement_servicer   r   r   r   <module>   s    , T