Spaces:

surfiniaburger
/

aura-mind-glow

Sleeping

App Files Files Community

aura-mind-glow / knowledge_base.py

surfiniaburger

bug

9abd4ea 3 months ago

raw

history blame

3.05 kB

	import os
	from sentence_transformers import SentenceTransformer

	from database import init_db, check_if_indexed, delete_database_and_index, get_db_connection
	from create_index import create_initial_index as build_secure_index
	from search import search as secure_search
	from ingest_document import ingest_pdf

	# Use a CLIP model that can handle both text and images
	MODEL_NAME = 'clip-ViT-B-32'

	class KnowledgeBase:
	def __init__(self):
	self.model = SentenceTransformer(MODEL_NAME)
	# Ensure the database is initialized
	init_db()
	# Check if the index exists, if not, build it from initial data
	if not check_if_indexed():
	print("Local knowledge base not found. Building initial knowledge base...")
	self._build_initial_knowledge_base()

	def _build_initial_knowledge_base(self):
	current_dir = os.path.dirname(__file__)
	knowledge_base_data_dir = os.path.join(current_dir, "knowledge_base_data")

	document_filenames = [
	"healthy_maize_remedy.txt",
	"maize_phosphorus_deficiency_remedy.txt",
	"comic_relief.txt"
	]

	documents_content = {}
	for filename in document_filenames:
	file_path = os.path.join(knowledge_base_data_dir, filename)
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	documents_content[filename] = f.read()
	except FileNotFoundError:
	print(f"Warning: Knowledge base file not found, skipping: {file_path}")

	if documents_content:
	build_secure_index(documents_content)
	else:
	print("No initial knowledge base documents found to index.")

	def create_initial_index(self, documents_dict):
	# This method now directly calls the external build_secure_index
	build_secure_index(documents_dict)

	def rebuild_from_default_files(self):
	# This method orchestrates rebuilding the index from the default knowledge_base_data files
	self._build_initial_knowledge_base()

	def ingest_pdf(self, file_path, file_name):
	# This method now directly calls the external ingest_pdf
	ingest_pdf(file_path, file_name)

	def search(self, query, k=1):
	# This method now directly calls the external secure_search
	return secure_search(query, k)

	def get_retriever():
	kb = KnowledgeBase()
	class Retriever:
	def __init__(self, kb):
	self.kb = kb
	def get_relevant_documents(self, query):
	results = self.kb.search(query)
	from langchain.schema import Document
	# Ensure that only text content is passed to Document
	# For image results, you might need a different handling or filter them out if Langchain Document doesn't support them directly.
	text_documents = [Document(page_content=r['content']) for r in results if r['type'] == 'text']
	return text_documents

	return Retriever(kb)