o
    i	                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	m
Z
 dgZdd	dddZeG dd dZG dd de
jZdS )    )annotationsN)	dataclass)	blingfire   )token_stream	tokenizerSentenceTokenizerF)retain_formattextstrmin_sentence_lenintr	   boolreturnlist[tuple[str, int, int]]c          
      C  s   t | \}}g }d}|D ]1\}}| || }tdd| }	|	r(t|	|k r)q|r4||||f n||	||f |}q|t| k ri| |d  }|rY|||t| f |S |  }	ri||	|t| f |S )Nr   z	\s*\n+\s* )r   text_to_sentences_with_offsetsresubstriplenappend)
r
   r   r	   _offsetsmerged_sentencesstartendraw_sentencesentence r   c/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/tokenize/blingfire.py_split_sentences   s(   r!   c                   @  s&   e Zd ZU ded< ded< ded< dS )_TokenizerOptionsr   r   stream_context_lenr   r	   N)__name__
__module____qualname____annotations__r   r   r   r    r"   .   s   
 r"   c                   @  s@   e Zd ZdddddddZdddddZdddddZdS )r      
   Fr   r#   r	   r   r   r#   r	   r   r   Nonec                C  s   t |||d| _d S )Nr*   )r"   _config)selfr   r#   r	   r   r   r    __init__6   s
   zSentenceTokenizer.__init__N)languager
   r   r/   
str | None	list[str]c                C  s    dd t || jj| jjdD S )Nc                 S  s   g | ]}|d  qS )r   r   ).0tokr   r   r    
<listcomp>D   s    z.SentenceTokenizer.tokenize.<locals>.<listcomp>r   r	   )r!   r,   r   r	   )r-   r
   r/   r   r   r    tokenizeC   s   zSentenceTokenizer.tokenizetokenizer.SentenceStreamc                C  s,   t jtjt| jj| jjd| jj| jjdS )Nr5   )r   min_token_lenmin_ctx_len)	r   BufferedSentenceStream	functoolspartialr!   r,   r   r	   r#   )r-   r/   r   r   r    streamM   s   zSentenceTokenizer.stream)r   r   r#   r   r	   r   r   r+   )r
   r   r/   r0   r   r1   )r/   r0   r   r7   )r$   r%   r&   r.   r6   r=   r   r   r   r    r   5   s    
)r
   r   r   r   r	   r   r   r   )
__future__r   r;   r   dataclassesr   livekitr    r   r   __all__r!   r"   r   r   r   r   r    <module>   s    