build local knowledge base 2

Table of Contents

Building a Local Knowledge Base and Agent with LLM

Here's a detailed guide on how to create a local knowledge base from your files and build an agent that can interact with this knowledge.

Part 1: Building a Local Knowledge Base

Step 1: Set Up Your Environment

# Create a virtual environment
python -m venv llm_kb_env
source llm_kb_env/bin/activate  # On Windows: llm_kb_env\Scripts\activate

# Install necessary packages
pip install langchain langchain_community pypdf python-docx sentence-transformers chromadb
pip install llama-index openai faiss-cpu tiktoken

Step 2: Organize Your Documents

  1. Create a directory for your document collection
    mkdir -p data/documents
  2. Place your files (code, txt, pdf, etc.) in this directory

Step 3: Document Loading and Processing

from langchain.document_loaders import (
    TextLoader,
    PyPDFLoader,
    DirectoryLoader,
    UnstructuredMarkdownLoader,
    CSVLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Configure loaders for different file types
loaders = {
    "txt": DirectoryLoader("data/documents/", glob="**/*.txt", loader_cls=TextLoader),
    "pdf": DirectoryLoader("data/documents/", glob="**/*.pdf", loader_cls=PyPDFLoader),
    "md": DirectoryLoader("data/documents/", glob="**/*.md", loader_cls=UnstructuredMarkdownLoader),
    "csv": DirectoryLoader("data/documents/", glob="**/*.csv", loader_cls=CSVLoader),
    # Add more file types as needed
}

# Load documents
documents = []
for loader_type, loader in loaders.items():
    try:
        documents.extend(loader.load())
        print(f"Loaded {loader_type} documents")
    except Exception as e:
        print(f"Error loading {loader_type} documents: {e}")

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks")

Step 4: Create Vector Embeddings

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma, FAISS

# Initialize embeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Option 1: Create a ChromaDB vector store
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    persist_directory="data/chroma_db"
)
vectorstore.persist()  # Save to disk

# Option 2: Create a FAISS vector store (in-memory, can be saved)
faiss_index = FAISS.from_documents(chunks, embedding_model)
faiss_index.save_local("data/faiss_index")

Part 2: Building an Agent Based on the Knowledge Base

Step 5: Set Up Retrieval System

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# Load your vectorstore
vectorstore = Chroma(persist_directory="data/chroma_db", embedding_function=embedding_model)

# Create a retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# Optional: Add contextual compression for better results
llm = ChatOpenAI(temperature=0)  # You can use local models too
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

# Create a question-answering chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Other options: "map_reduce", "refine"
    retriever=retriever,
    return_source_documents=True,
    verbose=True
)

Step 6: Create an Agent with Tools

from langchain.agents import Tool, initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory

# Define a search tool using our QA chain
search_tool = Tool(
    name="Document Search",
    func=qa_chain.run,
    description="Useful for searching information in your documents. Input should be a question."
)

# Optional: Add a calculation tool
from langchain.tools import Tool
from langchain.chains import LLMMathChain

llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
math_tool = Tool(
    name="Calculator",
    func=llm_math_chain.run,
    description="Useful for performing calculations. Input should be a mathematical expression."
)

# Create a memory system
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Initialize the agent
agent = initialize_agent(
    tools=[search_tool, math_tool],
    llm=llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=True,
    memory=memory
)

Step 7: Create a Simple Interface

def query_agent(query):
    try:
        response = agent.run(input=query)
        return response
    except Exception as e:
        return f"Error: {str(e)}"

# Interactive loop
if __name__ == "__main__":
    print("Welcome to your document assistant! Type 'exit' to quit.")
    while True:
        query = input("Question: ")
        if query.lower() == 'exit':
            break
        response = query_agent(query)
        print(f"Answer: {response}")

Part 3: Using Local LLMs (Optional)

If you want to keep everything local, replace the OpenAI model with a local model:

from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load local model
model_id = "TheBloke/Llama-2-7B-Chat-GGUF"  # Choose an appropriate model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_8bit=True  # For memory efficiency
)

# Create a pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0.3,
    top_p=0.95,
    repetition_penalty=1.15
)

# Create LLM
local_llm = HuggingFacePipeline(pipeline=pipe)

# Replace the OpenAI model with your local model in the QA chain and agent

Part 4: Advanced Features

Adding a Web Search Tool

from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
web_search_tool = Tool(
    name="Web Search",
    description="Search Google for recent or factual information.",
    func=search.run
)

# Add to your agent's tools list

Metadata Filtering

# When querying the vectorstore
metadata_filter = {"source": "important_document.pdf"}
filtered_docs = vectorstore.similarity_search(
    "your query", 
    k=5, 
    filter=metadata_filter
)

Evaluation

from langchain.evaluation.qa import QAEvalChain

# Create some examples
examples = [
    {"query": "What is X?", "answer": "X is Y according to the documents."}
    # Add more examples
]

# Create an evaluation chain
eval_chain = QAEvalChain.from_llm(llm)
graded_outputs = eval_chain.evaluate(examples, [qa_chain.run(eg["query"]) for eg in examples])

Comments |0|

Legend *) Required fields are marked
**) You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>
Category: 似水流年