
    /j                        S SK Jr  S SKrS SKJr  SSKJrJrJrJ	r	J
r
Jr  / SQr\ " S S5      5       r " S	 S
\R                  5      r " S S\R                  5      rSS jrSSS.       SS jjrSS jrg)    )annotationsN)	dataclass   )_basic_hyphenator_basic_paragraph_basic_sent_basic_wordtoken_stream	tokenizer)SentenceTokenizerWordTokenizerhyphenate_wordtokenize_paragraphsc                  >    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	rg
)_TokenizerOptions   strlanguageintmin_sentence_lenstream_context_lenboolretain_format N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r       N/app/agent/.venv/lib/python3.13/site-packages/livekit/agents/tokenize/basic.pyr   r      s    Mr!   r   c                  b    \ rS rSrSSSSS.         SS jjrSS	.SS
 jjrSS	.SS jjrSrg)r   "   english   
   Fr   r   r   r   c               &    [        UUUUS9U l        g )Nr(   )r   _config)selfr   r   r   r   s        r"   __init__SentenceTokenizer.__init__#   s     )-1'	
r!   Nr   c                   [         R                  " UU R                  R                  U R                  R                  S9 Vs/ s H  nUS   PM
     sn$ s  snf )Nr   r   r   )r   split_sentencesr*   r   r   r+   textr   toks       r"   tokenizeSentenceTokenizer.tokenize2   sY     #22!%!>!>"ll88
 F
 	
 
s   Ac                  [         R                  " [        R                  " [        R
                  U R                  R                  U R                  R                  S9U R                  R                  U R                  R                  S9$ )Nr0   r   min_token_lenmin_ctx_len)
r
   BufferedSentenceStream	functoolspartialr   r1   r*   r   r   r   r+   r   s     r"   streamSentenceTokenizer.stream<   sb    22''++!%!>!>"ll88
 ,,7777
 	
r!   )r*   )
r   r   r   r   r   r   r   r   returnNoner3   r   r   
str | NonerA   	list[str])r   rD   rA   ztokenizer.SentenceStreamr   r   r   r   r,   r5   r?   r    r   r!   r"   r   r   "   sa     " ""$#
 
 	

  
 
 

 =A 
 04 	
 	
r!   r   c                  \    \ rS rSrSSSS.       SS jjrSS.SS jjrSS.SS	 jjrS
rg)r   H   TFignore_punctuationsplit_characterr   c               (    Xl         X l        X0l        g N)_ignore_punctuation_split_character_retain_format)r+   rJ   rK   r   s       r"   r,   WordTokenizer.__init__I   s     $6  /+r!   Nr.   c                   [         R                  " UU R                  U R                  U R                  S9 Vs/ s H  nUS   PM
     sn$ s  snf )NrI   r   )r	   split_wordsrN   rO   rP   r2   s       r"   r5   WordTokenizer.tokenizeT   sZ     #..#'#;#; $ 5 5"11	
 F
 	
 
s   A
c          	         [         R                  " [        R                  " [        R
                  U R                  U R                  U R                  S9SSS9$ )NrI   r   r8   )	r
   BufferedWordStreamr<   r=   r	   rS   rN   rO   rP   r>   s     r"   r?   WordTokenizer.stream_   sO    ..''''#'#;#; $ 5 5"11	 	
 		
r!   )rN   rP   rO   )rJ   r   rK   r   r   r   rA   rB   rC   )r   rD   rA   ztokenizer.WordStreamrF   r   r!   r"   r   r   H   sT     $( %#	, !	, 		,
 	, 
	, =A 	
 04 

 

r!   r   c                .    [         R                  " U 5      $ rM   )r   r   )words    r"   r   r   l   s    ++D11r!   TFrJ   rK   c               ,    [         R                  " XUS9$ )NrZ   )r	   rS   )r3   rJ   rK   s      r"   rS   rS   p   s     ""_ r!   c                ^    [         R                  " U 5       Vs/ s H  oS   PM	     sn$ s  snf )Nr   )r   split_paragraphs)r3   r4   s     r"   r   r   x   s)    .??EFEsFEFFFs   *)rY   r   rA   rE   )r3   r   rJ   r   rK   r   rA   zlist[tuple[str, int, int]])r3   r   rA   rE   )
__future__r   r<   dataclassesr    r   r   r   r	   r
   r   __all__r   r   r   r   rS   r   r   r!   r"   <module>rb      s    "  !    #
	33 #
L!
I++ !
H2
 .25
&*DHGr!   