Tutorial 9: 高级 RAG 技术

高级 RAG 概述

基础 RAG 存在一些局限性,高级技术可以显著提升性能。

高级 RAG 技术

├── 检索前优化
│   ├── 查询改写
│   ├── 查询分解
│   └── HyDE
│
├── 检索优化
│   ├── 混合检索
│   ├── 递归检索
│   └── 自适应检索
│
├── 检索后优化
│   ├── 重排序
│   ├── 上下文压缩
│   └── 去重与融合
│
└── 生成优化
    ├── 思维链
    ├── 自我反思
    └── 多轮对话

查询改写

将用户的原始查询改写为更适合检索的形式。

from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

class QueryRewriter:
    """查询改写器"""

    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    def rewrite(self, query: str) -> str:
        """改写查询"""
        prompt = f"""将以下用户查询改写为更适合搜索的形式。

原始查询: {query}

改写要求:
- 去除口语化表达
- 补充隐含的关键词
- 保持原意

改写后的查询:"""

        response = self.llm.invoke(prompt)
        return response.content.strip()

    def expand(self, query: str, n: int = 3) -> list:
        """扩展查询"""
        prompt = f"""为以下查询生成{n}个相关的搜索查询。

原始查询: {query}

生成的查询(每行一个):"""

        response = self.llm.invoke(prompt)
        queries = [q.strip() for q in response.content.strip().split('\n') if q.strip()]
        return [query] + queries[:n]

# 使用
rewriter = QueryRewriter()

original = "Python怎么学啊"
rewritten = rewriter.rewrite(original)
expanded = rewriter.expand(original)

print(f"原始: {original}")
print(f"改写: {rewritten}")
print(f"扩展: {expanded}")

查询分解

将复杂问题分解为多个子问题。

class QueryDecomposer:
    """查询分解器"""

    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    def decompose(self, query: str) -> list:
        """分解复杂查询"""
        prompt = f"""将以下复杂问题分解为多个简单的子问题。

问题: {query}

分解要求:
- 每个子问题应该是独立可回答的
- 子问题的答案组合起来能回答原问题
- 按逻辑顺序排列

子问题(每行一个):"""

        response = self.llm.invoke(prompt)
        sub_queries = [q.strip().lstrip('0123456789.-) ')
                      for q in response.content.strip().split('\n')
                      if q.strip()]
        return sub_queries

    def answer_with_decomposition(self, query: str, retriever, llm) -> str:
        """分解后逐个回答"""
        sub_queries = self.decompose(query)

        sub_answers = []
        for sq in sub_queries:
            docs = retriever.get_relevant_documents(sq)
            context = "\n".join([d.page_content for d in docs])

            answer_prompt = f"""基于以下信息回答问题。

信息: {context}
问题: {sq}
回答:"""

            answer = llm.invoke(answer_prompt).content
            sub_answers.append({"question": sq, "answer": answer})

        # 综合回答
        synthesis_prompt = f"""基于以下子问题的答案,综合回答原始问题。

原始问题: {query}

子问题和答案:
{chr(10).join([f"Q: {sa['question']}{chr(10)}A: {sa['answer']}" for sa in sub_answers])}

综合回答:"""

        final_answer = llm.invoke(synthesis_prompt).content
        return final_answer

# 使用
decomposer = QueryDecomposer()
complex_query = "比较Python和Java在机器学习领域的应用,哪个更适合初学者?"
sub_queries = decomposer.decompose(complex_query)

print("子问题:")
for i, sq in enumerate(sub_queries, 1):
    print(f"  {i}. {sq}")

上下文压缩

压缩检索到的文档,只保留与问题相关的部分。

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

class ContextCompressor:
    """上下文压缩器"""

    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    def compress(self, documents: list, query: str) -> list:
        """压缩文档"""
        compressed = []

        for doc in documents:
            prompt = f"""从以下文档中提取与问题相关的信息。

文档: {doc.page_content}

问题: {query}

提取的相关信息(如果没有相关信息,输出"无相关信息"):"""

            response = self.llm.invoke(prompt).content.strip()

            if response and response != "无相关信息":
                compressed.append({
                    "content": response,
                    "metadata": doc.metadata
                })

        return compressed

# 使用 LangChain 的压缩检索器
def create_compression_retriever(base_retriever):
    compressor = LLMChainExtractor.from_llm(
        ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
    )

    return ContextualCompressionRetriever(
        base_compressor=compressor,
        base_retriever=base_retriever
    )

递归检索

多层次检索,先检索摘要,再检索详细内容。

class RecursiveRetriever:
    """递归检索器"""

    def __init__(self, summary_store, detail_store):
        self.summary_store = summary_store  # 摘要向量库
        self.detail_store = detail_store    # 详细内容向量库

    def retrieve(self, query: str, top_k: int = 3) -> list:
        """递归检索"""
        # 1. 先从摘要中检索
        summaries = self.summary_store.similarity_search(query, k=top_k)

        # 2. 获取相关文档的ID
        doc_ids = [s.metadata.get("doc_id") for s in summaries]

        # 3. 从详细内容中检索
        all_details = []
        for doc_id in doc_ids:
            details = self.detail_store.similarity_search(
                query,
                k=2,
                filter={"doc_id": doc_id}
            )
            all_details.extend(details)

        return all_details

# 构建层次化索引
def build_hierarchical_index(documents, embeddings):
    """构建层次化索引"""
    from langchain_community.vectorstores import Chroma

    # 生成摘要
    llm = ChatOpenAI(model="gpt-3.5-turbo")
    summaries = []

    for i, doc in enumerate(documents):
        summary_prompt = f"用一句话总结以下内容:\n{doc.page_content[:500]}"
        summary = llm.invoke(summary_prompt).content

        summaries.append({
            "content": summary,
            "doc_id": f"doc_{i}",
            "metadata": doc.metadata
        })

    # 创建摘要向量库
    summary_store = Chroma.from_texts(
        texts=[s["content"] for s in summaries],
        embedding=embeddings,
        metadatas=[{"doc_id": s["doc_id"]} for s in summaries]
    )

    # 创建详细内容向量库
    detail_store = Chroma.from_documents(
        documents=documents,
        embedding=embeddings
    )

    return summary_store, detail_store

自我反思 RAG

让 LLM 评估自己的回答,必要时进行修正。

class SelfReflectiveRAG:
    """自我反思 RAG"""

    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm

    def query(self, question: str, max_iterations: int = 3) -> dict:
        """带自我反思的查询"""
        docs = self.retriever.get_relevant_documents(question)
        context = "\n".join([d.page_content for d in docs])

        # 初始回答
        answer = self._generate_answer(question, context)

        for i in range(max_iterations):
            # 自我评估
            evaluation = self._evaluate_answer(question, context, answer)

            if evaluation["is_satisfactory"]:
                break

            # 根据反馈改进
            answer = self._improve_answer(
                question, context, answer, evaluation["feedback"]
            )

        return {
            "answer": answer,
            "iterations": i + 1,
            "context": context
        }

    def _generate_answer(self, question: str, context: str) -> str:
        prompt = f"""基于以下信息回答问题。

信息: {context}
问题: {question}
回答:"""
        return self.llm.invoke(prompt).content

    def _evaluate_answer(self, question: str, context: str, answer: str) -> dict:
        prompt = f"""评估以下回答的质量。

问题: {question}
上下文: {context}
回答: {answer}

评估标准:
1. 回答是否准确?
2. 回答是否完整?
3. 回答是否基于上下文?

输出格式:
满意: 是/否
反馈: [改进建议]"""

        response = self.llm.invoke(prompt).content

        is_satisfactory = "满意: 是" in response or "是" in response.split('\n')[0]
        feedback = response.split("反馈:")[-1].strip() if "反馈:" in response else ""

        return {
            "is_satisfactory": is_satisfactory,
            "feedback": feedback
        }

    def _improve_answer(self, question: str, context: str,
                       answer: str, feedback: str) -> str:
        prompt = f"""根据反馈改进回答。

问题: {question}
上下文: {context}
原回答: {answer}
反馈: {feedback}

改进后的回答:"""
        return self.llm.invoke(prompt).content

多模态 RAG

支持图像和文本的混合检索。

# 概念示例
class MultiModalRAG:
    """多模态 RAG"""

    def __init__(self, text_store, image_store, vision_model):
        self.text_store = text_store
        self.image_store = image_store
        self.vision_model = vision_model

    def query(self, question: str, image=None):
        """多模态查询"""
        results = {
            "text_docs": [],
            "images": []
        }

        # 文本检索
        results["text_docs"] = self.text_store.similarity_search(question)

        # 图像检索(如果有图像查询)
        if image:
            image_embedding = self.vision_model.encode_image(image)
            results["images"] = self.image_store.search(image_embedding)

        # 也可以用文本检索相关图像
        text_embedding = self.vision_model.encode_text(question)
        results["related_images"] = self.image_store.search(text_embedding)

        return results

实战:构建高级 RAG 系统

from typing import List, Dict

class AdvancedRAG:
    """高级 RAG 系统"""

    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.query_rewriter = QueryRewriter()
        self.context_compressor = ContextCompressor()

    def query(
        self,
        question: str,
        use_rewrite: bool = True,
        use_compression: bool = True,
        use_reflection: bool = False
    ) -> Dict:
        """高级查询"""

        # 1. 查询改写
        if use_rewrite:
            rewritten_query = self.query_rewriter.rewrite(question)
        else:
            rewritten_query = question

        # 2. 检索
        docs = self.retriever.get_relevant_documents(rewritten_query)

        # 3. 上下文压缩
        if use_compression:
            compressed = self.context_compressor.compress(docs, question)
            context = "\n".join([c["content"] for c in compressed])
        else:
            context = "\n".join([d.page_content for d in docs])

        # 4. 生成回答
        answer = self._generate_answer(question, context)

        # 5. 自我反思(可选)
        if use_reflection:
            answer = self._reflect_and_improve(question, context, answer)

        return {
            "question": question,
            "rewritten_query": rewritten_query,
            "answer": answer,
            "sources": [d.metadata for d in docs]
        }

    def _generate_answer(self, question: str, context: str) -> str:
        prompt = f"""基于以下信息回答问题。如果信息不足,请说明。

信息:
{context}

问题: {question}

回答:"""
        return self.llm.invoke(prompt).content

    def _reflect_and_improve(self, question: str, context: str, answer: str) -> str:
        # 简化的自我反思
        check_prompt = f"""检查以下回答是否准确完整。

问题: {question}
上下文: {context}
回答: {answer}

如果需要改进,输出改进后的回答;否则输出"OK"。"""

        response = self.llm.invoke(check_prompt).content

        if response.strip() != "OK":
            return response
        return answer

关键概念总结

技术

作用

适用场景

查询改写

优化查询表达

口语化查询

查询分解

处理复杂问题

多步骤问题

上下文压缩

提取关键信息

长文档

递归检索

层次化检索

大型知识库

自我反思

提升回答质量

高精度要求

下一步

在最后一个教程中,我们将学习 RAG 生产部署。

Tutorial 10: RAG 生产部署