
    /j	                        S SK Jr  S SKrS SKrS SKJr  S SKJr  SSKJ	r	J
r
  S/rSS	.       SS
 jjr\ " S S5      5       r " S S\
R                  5      rg)    )annotationsN)	dataclass)	blingfire   )token_stream	tokenizerSentenceTokenizerF)retain_formatc                  [         R                  " U 5      u  p4/ nSnU Hw  u  p7XU n[        R                  " SSU5      R	                  5       n	U	(       a  [        U	5      U:  a  MG  U(       a  UR                  XU45        OUR                  XU45        UnMy     U[        U 5      :  a\  XS  nU(       a  UR                  X[        U 5      45        U$ UR	                  5       =n	(       a  UR                  X[        U 5      45        U$ )Nr   z	\s*\n+\s* )r   text_to_sentences_with_offsetsresubstriplenappend)
textmin_sentence_lenr
   _offsetsmerged_sentencesstartendraw_sentencesentences
             R/app/agent/.venv/lib/python3.13/site-packages/livekit/agents/tokenize/blingfire.py_split_sentencesr      s     99$?JAE#66,\:@@B3x=+;;##\#$>?##Xc$:;  s4yF|##\#d)$DE  &++--X-##Xc$i$@A    c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg)	_TokenizerOptions.   intr   stream_context_lenboolr
    N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r%   r   r   r    r    .   s    r   r    c                  \    \ rS rSrSSSS.       SS jjrSS.SS	 jjrSS.SS
 jjrSrg)r	   5      
   Fr   r#   r
   c               $    [        UUUS9U l        g )Nr0   )r    _config)selfr   r#   r
   s       r   __init__SentenceTokenizer.__init__6   s     )-1'
r   N)languagec                   [        UU R                  R                  U R                  R                  S9 Vs/ s H  nUS   PM
     sn$ s  snf )Nr   r
   r   )r   r2   r   r
   )r3   r   r6   toks       r   tokenizeSentenceTokenizer.tokenizeC   sS     (!%!>!>"ll88
 F
 	
 
s   Ac                   [         R                  " [        R                  " [        U R
                  R                  U R
                  R                  S9U R
                  R                  U R
                  R                  S9$ )Nr8   )r   min_token_lenmin_ctx_len)	r   BufferedSentenceStream	functoolspartialr   r2   r   r
   r#   )r3   r6   s     r   streamSentenceTokenizer.streamM   s\    22'' !%!>!>"ll88
 ,,7777
 	
r   )r2   )r   r"   r#   r"   r
   r$   returnNone)r   strr6   
str | NonerD   z	list[str])r6   rG   rD   ztokenizer.SentenceStream)r&   r'   r(   r)   r4   r:   rB   r+   r%   r   r   r	   r	   5   sT     !#"$#
 
  	

 
 

 =A 
 04 	
 	
r   )r   rF   r   r"   r
   r$   rD   zlist[tuple[str, int, int]])
__future__r   r@   r   dataclassesr   livekitr    r   r   __all__r   r    r	   r%   r   r   <module>rM      sz    "  	 !  %  @E
!$8<<   !
	33 !
r   