o
    i                     @  s   d dl mZ d dlZd dlmZ ddlmZmZmZm	Z	m
Z
mZ g dZeG dd dZG d	d
 d
ejZG dd dejZdddZdddd ddZd!ddZdS )"    )annotationsN)	dataclass   )_basic_hyphenator_basic_paragraph_basic_sent_basic_wordtoken_stream	tokenizer)SentenceTokenizerWordTokenizerhyphenate_wordtokenize_paragraphsc                   @  s.   e Zd ZU ded< ded< ded< ded< dS )	_TokenizerOptionsstrlanguageintmin_sentence_lenstream_context_lenboolretain_formatN)__name__
__module____qualname____annotations__ r   r   _/var/www/html/livekit_bhavya/venv/lib/python3.10/site-packages/livekit/agents/tokenize/basic.pyr      s
   
 r   c                   @  sB   e Zd ZddddddddZdddddZdddddZdS )r   english   
   Fr   r   r   r   r   r   r   r   r   r   r   returnNonec                C  s   t ||||d| _d S )Nr    )r   _config)selfr   r   r   r   r   r   r   __init__#   s   zSentenceTokenizer.__init__Nr   text
str | None	list[str]c                C  s"   dd t j|| jj| jjdD S )Nc                 S     g | ]}|d  qS r   r   .0tokr   r   r   
<listcomp>3       z.SentenceTokenizer.tokenize.<locals>.<listcomp>r   r   )r   split_sentencesr#   r   r   r$   r'   r   r   r   r   tokenize2   s   zSentenceTokenizer.tokenizetokenizer.SentenceStreamc                C  s.   t jtjtj| jj| jjd| jj| jj	dS )Nr1   r
   min_token_lenmin_ctx_len)
r	   BufferedSentenceStream	functoolspartialr   r2   r#   r   r   r   r$   r   r   r   r   stream<   s   zSentenceTokenizer.stream)
r   r   r   r   r   r   r   r   r!   r"   r'   r   r   r(   r!   r)   )r   r(   r!   r5   r   r   r   r%   r4   r=   r   r   r   r   r   "   s    
r   c                   @  s@   e Zd Zdddddd
dZdddddZdddddZdS )r   TFignore_punctuationsplit_characterr   rA   r   rB   r   r!   r"   c                C  s   || _ || _|| _d S N)_ignore_punctuation_split_character_retain_format)r$   rA   rB   r   r   r   r   r%   I   s   
zWordTokenizer.__init__Nr&   r'   r   r   r(   r)   c                C  s"   dd t j|| j| j| jdD S )Nc                 S  r*   r+   r   r,   r   r   r   r/   U   r0   z*WordTokenizer.tokenize.<locals>.<listcomp>r@   )r   split_wordsrD   rE   rF   r3   r   r   r   r4   T   s   zWordTokenizer.tokenizetokenizer.WordStreamc                C  s&   t jtjtj| j| j| jddddS )Nr@   r   r6   )	r	   BufferedWordStreamr:   r;   r   rG   rD   rE   rF   r<   r   r   r   r=   _   s   zWordTokenizer.stream)rA   r   rB   r   r   r   r!   r"   r>   )r   r(   r!   rH   r?   r   r   r   r   r   H   s    r   wordr   r!   r)   c                 C  s
   t | S rC   )r   r   )rJ   r   r   r   r   l   s   
r   TFrA   rB   r'   rA   r   rB   list[tuple[str, int, int]]c                C  s   t j| ||dS )NrK   )r   rG   )r'   rA   rB   r   r   r   rG   p   s   rG   c                 C  s   dd t | D S )Nc                 S  r*   r+   r   r,   r   r   r   r/   y   s    z'tokenize_paragraphs.<locals>.<listcomp>)r   split_paragraphs)r'   r   r   r   r   x   s   r   )rJ   r   r!   r)   )r'   r   rA   r   rB   r   r!   rL   )r'   r   r!   r)   )
__future__r   r:   dataclassesr    r   r   r   r   r	   r
   __all__r   r   r   r   rG   r   r   r   r   r   <module>   s     &
$