o
    y	i'R                     @   s   d Z ddlZddlZddlZddlZddlZddlmZmZm	Z	m
Z
 ddlmZ eeZzddlmZ ddlmZ ddlmZ ddlmZ d	ZW n eyY   d
Zed Y nw G dd dZe ZdS )zs
Knowledge Base Service for Voice Bot
Handles FAISS-based document search with S3 storage and business ID mapping.
    N)ListDictOptionalAny)Config)BedrockEmbeddings)PyPDFLoader)RecursiveCharacterTextSplitter)FAISSTFzMFAISS libraries not available. Knowledge base functionality will be disabled.c                   @   s0  e Zd ZdZdd Zdd Zd#dedee d	efd
dZ	d$dededee de
d	eeeef  f
ddZdeeef d	ee fddZd%dededee de
d	ef
ddZd&dee dee fddZd#dedee d	efddZd#dedee d	ee fddZd#dededee fdd Zd	efd!d"ZdS )'KnowledgeBaseServicezLManages FAISS-based document search with S3 storage and business ID mapping.c                 C   sJ   t jot| _d | _d | _d | _d | _i | _| jr| 	  d S t
d d S )Nz6Knowledge base service disabled or FAISS not available)r   ZKNOWLEDGE_BASE_ENABLEDFAISS_AVAILABLEenabled	s3_clientbedrock_clientbedrock_embeddingstext_splittervector_store_cache_initialize_aws_clientsloggerinfoself r   "services/knowledge_base_service.py__init__   s   zKnowledgeBaseService.__init__c              
   C   s   zEt jrt jstd d| _W dS tjdt jt jt jd| _	tjdt jt jt jd| _
tt j| j
d| _tt jt jd	| _td
 W dS  tyc } ztd|  d| _W Y d}~dS d}~ww )z*Initialize AWS clients for S3 and Bedrock.z8AWS credentials not configured. Knowledge base disabled.FNZs3)region_nameaws_access_key_idaws_secret_access_keyzbedrock-runtime)Zservice_namer   r   r   )Zmodel_idclient)Z
chunk_sizeZchunk_overlapz/Knowledge base service initialized successfullyz-Failed to initialize knowledge base service: )r   ZAWS_ACCESS_KEY_IDZAWS_SECRET_ACCESS_KEYr   warningr   boto3r   Z
AWS_REGIONr   r   r   ZFAISS_EMBEDDING_MODELr   r	   ZFAISS_CHUNK_SIZEZFAISS_CHUNK_OVERLAPr   r   	Exceptionerror)r   er   r   r   r   ,   s>   
z,KnowledgeBaseService._initialize_aws_clientsNbusiness_idbot_idreturnc                 C   s  | j s
td dS z&|r| d| n|}|| jv r+td| d| d W dS |r7d| d	| d	}nd| d	}| jjtj|d
}d|vrUtd|  W dS | 	||}|rp|| j|< td| d|  W dS g }|d D ]}|d 
drzX| jjtj|d d}	tjddd}
|
|	d   |
j}W d   n1 sw   Y  t|}| }|D ]}||jd< ||jd< |d |jd< q|| t| W qv ty } ztd|d  d|  W Y d}~qvd}~ww qv|std|  W dS | j|}t|| j}|| j|< | ||| tdt| d| d|  W dS  tyP } ztd| d|  W Y d}~dS d}~ww )z=Load documents for a specific business ID and bot ID from S3."Knowledge base service is disabledF_zDocuments for business 	 and bot z already loadedTindexes//BucketZPrefixContentsz No documents found for business z*Loaded existing vector store for business Keyz.pdfr-   r/   )suffixdeleteZBodyNr$   r%   sourcezError processing PDF : z&No valid documents found for business zLoaded z document chunks for business z%Error loading documents for business ) r   r   r   r   r   r   list_objects_v2r   S3_BUCKET_NAME_load_vector_store_from_s3endswithZ
get_objecttempfileZNamedTemporaryFilewritereadnamer   loadmetadataextendosunlinkr!   r"   r   Zsplit_documentsr
   Zfrom_documentsr   _save_vector_store_to_s3len)r   r$   r%   	cache_keyprefixresponsevector_storeZ	documentsobjZpdf_objZ	temp_fileZtemp_file_pathloaderZpdf_docsdocr#   Z
split_docsr   r   r   load_documents_for_businessS   s   





 
 z0KnowledgeBaseService.load_documents_for_business   querytop_kc                 C   s  | j s
td g S za|r| d| n|}|| jvr$| ||s$g W S | j|}|s:td| d|  g W S |j||d}g }|D ]\}	}
||	j|	j	t
|
d qEtdt| d|d	d
  d |W S  ty } ztd| d| d|  g W  Y d	}~S d	}~ww )z7Search documents for a specific business ID and bot ID.r'   r(   z#No vector store found for business r)   )k)contentr>   scorezFound z relevant documents for query: N2   ...z'Error searching documents for business r4   )r   r   r   r   rK   getZsimilarity_search_with_scoreappendZpage_contentr>   floatr   rC   r!   r"   )r   r$   rM   r%   rN   rD   rG   ZdocsresultsrJ   rQ   r#   r   r   r   search_documents   s6   


$z%KnowledgeBaseService.search_documentsdatac              
   C   s   z[| di  d}|r!|dkr!| r!td|  | W S g d}|D ],}|| di v rS|d | }|rS|dkrS| rStd| d|  |   W S q'td W d	S  tyx } ztd
t|  W Y d	}~d	S d	}~ww )zXExtract business ID from WebSocket data (X-BID from extra_headers) - From previous code.Zextra_headerszX-BIDzNot providedz+Found business ID in WebSocket data X-BID: )bidr$   zbusiness-idzx-bidzx-business-idz$Found business ID in WebSocket data r4   z&No business ID found in WebSocket dataNz2Error extracting business ID from WebSocket data: )rT   stripr   r   r   r!   r"   str)r   rY   r$   Zalternative_keyskeyrZ   r#   r   r   r   'extract_business_id_from_websocket_data   s(   

z<KnowledgeBaseService.extract_business_id_from_websocket_data  	max_charsc                 C   sH  | j sdS z}| j|||dd}|sW dS |jdd d g }d}|D ]@}|d }	|d	 }
|
d
kr1q"d|
dd|	 }|t| |krW|| }|dkrU||d| d   n|| |t|7 }q"d|}tdt| d| d| d|dd  d	 |W S  ty } zt	d| d| d|  W Y d}~dS d}~ww )zUGet relevant context for a query from the knowledge base - Enhanced with bot support.    )rN   c                 S   s   | d S )NrQ   r   )xr   r   r   <lambda>  s    z<KnowledgeBaseService.get_context_for_query.<locals>.<lambda>)r]   r   rP   rQ   g       @z[Score: z.2fz] d   NrS   z

zGenerated context of z characters for business r)   z	 (query: rR   z...)z#Error getting context for business r4   )
r   rX   sortrC   rU   joinr   r   r!   r"   )r   r$   rM   r%   r`   rW   Zcontext_partsZtotal_charsresultrP   rQ   Zcontent_with_scoreZremaining_charscontextr#   r   r   r   get_context_for_query   s<   

0z*KnowledgeBaseService.get_context_for_queryc                    s    rB|r#  d| }|| j v r!| j |= td  d|  dS dS  fdd| j  D }|D ]}| j |= q1td   dS | j   td dS )	zClear the vector store cache.r(   zCleared cache for business r)   c                        g | ]}|   d r|qS r(   
startswith.0r]   r$   r   r   
<listcomp>/       z4KnowledgeBaseService.clear_cache.<locals>.<listcomp>z'Cleared all cache entries for business z Cleared all knowledge base cacheN)r   r   r   keysclear)r   r$   r%   rD   keys_to_remover]   r   rq   r   clear_cache%  s   


z KnowledgeBaseService.clear_cachec           
         sF  | j sdS z||rd  d| d}nd  d}| jjtj|d}d|v r|d D ]}|d drH| jjtj|d d td	|d  d
 q)|r\  d| }|| j	v r[| j	|= n fdd| j	
 D }|D ]}| j	|= qjtd  d|  W dS W dS  ty }	 ztd  d| d|	  W Y d}	~	dS d}	~	ww )zAClear vector store files from S3 for a specific business and bot.Fr*   r+   r,   r.   r/   ).pkl.faissr0   zDeleted z from S3r(   c                    rk   rl   rm   ro   rq   r   r   rr   W  rs   z>KnowledgeBaseService.clear_s3_vector_store.<locals>.<listcomp>z,Cleared all vector store files for business r)   Tz,Error clearing S3 vector store for business r4   N)r   r   r5   r   r6   r8   delete_objectr   r   r   rt   r!   r"   )
r   r$   r%   rE   rF   rH   rD   rv   r]   r#   r   rq   r   clear_s3_vector_store7  sF   

z*KnowledgeBaseService.clear_s3_vector_storec                 C   s$  z|rd| d| d}nd| d}| j jtj|d}d|v rdd |d D }dd |d D }|r|rt }|d d	 }|d d	 }	| j jtj|| d
d | j jtj|	| dd ztjd|| j	dd}
W n[ t
y } zOtd|  td z| j jtj|d | j jtj|	d td W n t
y } ztd|  W Y d}~nd}~ww W Y d}~W d   W dS d}~ww td| d|  |
W  d   W S 1 sw   Y  td| d|  W dS  t
y } ztdt|  W Y d}~dS d}~ww )z"Load existing vector store from S3r*   r+   r,   r.   c                 S      g | ]}|d   dr|qS )r/   rx   r8   rp   rH   r   r   r   rr   t      zCKnowledgeBaseService._load_vector_store_from_s3.<locals>.<listcomp>c                 S   r|   )r/   ry   r}   r~   r   r   r   rr   u  r   r   r/   /vectorstore.pkl)r-   r/   Filename/vectorstore.faissvectorstoreT)
index_namefolder_pathZ
embeddingsZallow_dangerous_deserializationz2Failed to load vector store with standard method: zLClearing incompatible vector store files and will recreate from documents...r0   z/Cleared incompatible vector store files from S3z%Could not delete incompatible files: Nz.Loaded existing vector store for business ID:  and bot ID: z0No existing vector store found for business ID: z&Could not load existing vector store: )r   r5   r   r6   r9   TemporaryDirectoryZdownload_filer
   Z
load_localr   r!   r   r   r   rz   r\   )r   r$   r%   rE   rF   Z	pkl_filesZfaiss_filestemp_dirpkl_key	faiss_keyrG   Z
load_errorZdelete_errorr#   r   r   r   r7   d  s~   



"1z/KnowledgeBaseService._load_vector_store_from_s3rG   c              
   C   s  zft  W}|jd|d |r"d| d| d}d| d| d}nd| d}d| d}| jj| dtj|d | jj| dtj|d td| d	|  W d
   W d
S 1 s_w   Y  W d
S  t	y } zt
dt|  W Y d
}~d
S d
}~ww )zSave vector store to S3r   )r   r   r*   r+   r   r   )r   r-   r/   z*Saved vector store to S3 for business ID: r   Nz!Error saving vector store to S3: )r9   r   Z
save_localr   Zupload_filer   r6   r   r   r!   r"   r\   )r   rG   r$   r%   r   r   r   r#   r   r   r   rB     s6   
&"z-KnowledgeBaseService._save_vector_store_to_s3c                 C   s   | j o| jduS )z-Check if knowledge base service is available.N)r   r   r   r   r   r   is_available  s   z!KnowledgeBaseService.is_available)N)NrL   )Nr_   )NN)__name__
__module____qualname____doc__r   r   r\   r   boolrK   intr   r   r   rX   r^   rj   rw   r{   r7   rB   r   r   r   r   r   r      s    '0a'$0 -M"r   )r   r    r@   ZjsonZloggingr9   typingr   r   r   r   Zconfigr   Z	getLoggerr   r   Zlangchain_awsr   Z$langchain_community.document_loadersr   Zlangchain.text_splitterr	   Z langchain_community.vectorstoresr
   r   ImportErrorr   r   Zknowledge_base_servicer   r   r   r   <module>   s0    
   
@