teagardan
/

teagardan_ragdom

Model card Files Files and versions

teagardan_ragdom / lanchain pdf rag

teagardan's picture

Create lanchain pdf rag

5fe7325 verified about 1 year ago

history blame contribute delete

2.18 kB



	import PyPDF2
	from langchain import LLMChain
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import Llama

	# Function to read and extract text from PDF files
	def extract_text_from_pdfs(pdf_files):
	pdf_texts = []
	for pdf_file in pdf_files:
	with open(pdf_file, 'rb') as file:
	reader = PyPDF2.PdfFileReader(file)
	text = ""
	for page_num in range(reader.numPages):
	text += reader.getPage(page_num).extractText()
	pdf_texts.append(text)
	return pdf_texts

	# Function to create vector store from extracted texts
	def create_vector_store(texts):
	embeddings = OpenAIEmbeddings()
	vector_store = FAISS.from_texts(texts, embeddings)
	return vector_store

	# Function to create RAG application
	def create_rag_application(pdf_files):
	# Step 1: Extract text from PDFs
	pdf_texts = extract_text_from_pdfs(pdf_files)

	# Step 2: Create vector store
	vector_store = create_vector_store(pdf_texts)

	# Step 3: Load LLAMA model
	llm = Llama(model_name="llama-3.1")

	# Step 4: Create LLMChain with vector store and LLAMA model
	rag_chain = LLMChain(llm=llm, vector_store=vector_store)

	return rag_chain

	# Example usage
	pdf_files = ["/Users/teagardan/Documents/Teagardan/Mission USA/CURSOR AI/LANGCHAIN LLM RAG"] # List of 100 PDF file paths
	rag_application = create_rag_application(pdf_files)

	import ollama

	# Function to create RAG application using Ollama
	def create_rag_application_with_ollama(pdf_files):
	# Step 1: Extract text from PDFs
	pdf_texts = extract_text_from_pdfs(pdf_files)

	# Step 2: Create vector store
	vector_store = create_vector_store(pdf_texts)

	# Step 3: Load LLAMA model using Ollama
	llm = ollama.Llama(model_name="llama-3.1")

	# Step 4: Create LLMChain with vector store and LLAMA model
	rag_chain = LLMChain(llm=llm, vector_store=vector_store)

	return rag_chain

	# Example usage with Ollama
	pdf_files = ["/Users/teagardan/Documents/Teagardan/Mission USA/CURSOR AI/LANGCHAIN LLM RAG"] # List of 100 PDF file paths