Tutorial 3: 搜索算法

什么是搜索问题？

很多 AI 问题可以建模为 搜索问题：从初始状态出发，通过一系列行动，到达目标状态。

搜索问题的组成：

初始状态: 起点
行动: 可以采取的操作
转移模型: 行动如何改变状态
目标测试: 判断是否到达目标
路径代价: 行动的代价

class SearchProblem:
    """搜索问题的抽象定义"""

    def __init__(self, initial_state, goal_state):
        self.initial_state = initial_state
        self.goal_state = goal_state

    def get_actions(self, state):
        """返回当前状态可用的行动"""
        raise NotImplementedError

    def get_result(self, state, action):
        """返回执行行动后的新状态"""
        raise NotImplementedError

    def is_goal(self, state):
        """判断是否到达目标"""
        return state == self.goal_state

    def get_cost(self, state, action, next_state):
        """返回行动的代价"""
        return 1  # 默认代价为1

经典例子：八数码问题

初始状态:        目标状态:
┌───┬───┬───┐   ┌───┬───┬───┐
│ 1 │ 2 │ 3 │   │ 1 │ 2 │ 3 │
├───┼───┼───┤   ├───┼───┼───┤
│ 4 │   │ 5 │ → │ 4 │ 5 │ 6 │
├───┼───┼───┤   ├───┼───┼───┤
│ 6 │ 7 │ 8 │   │ 7 │ 8 │   │
└───┴───┴───┘   └───┴───┴───┘

无信息搜索（盲目搜索）

1. 广度优先搜索（BFS）

策略: 先扩展浅层节点，逐层搜索

from collections import deque

def bfs(problem):
    """广度优先搜索"""
    # 边界：待探索的节点队列
    frontier = deque([(problem.initial_state, [])])
    # 已探索的状态
    explored = set()

    while frontier:
        state, path = frontier.popleft()

        # 目标测试
        if problem.is_goal(state):
            return path

        # 标记为已探索
        explored.add(state)

        # 扩展节点
        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)

            if next_state not in explored:
                frontier.append((next_state, path + [action]))

    return None  # 无解

# 示例：迷宫问题
class MazeProblem(SearchProblem):
    def __init__(self, maze, start, goal):
        super().__init__(start, goal)
        self.maze = maze
        self.rows = len(maze)
        self.cols = len(maze[0])

    def get_actions(self, state):
        actions = []
        x, y = state
        moves = [('up', -1, 0), ('down', 1, 0), ('left', 0, -1), ('right', 0, 1)]

        for name, dx, dy in moves:
            nx, ny = x + dx, y + dy
            if 0 <= nx < self.rows and 0 <= ny < self.cols:
                if self.maze[nx][ny] != 1:  # 1 表示墙
                    actions.append(name)
        return actions

    def get_result(self, state, action):
        x, y = state
        moves = {'up': (-1, 0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)}
        dx, dy = moves[action]
        return (x + dx, y + dy)

# 测试
maze = [
    [0, 0, 0, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 0, 1, 0]
]

problem = MazeProblem(maze, (0, 0), (4, 4))
path = bfs(problem)
print(f"BFS 找到的路径: {path}")

特点:

✅ 完备性：如果有解，一定能找到
✅ 最优性：找到的是最短路径（代价相同时）
❌ 空间复杂度高：O(b^d)，b是分支因子，d是深度

2. 深度优先搜索（DFS）

策略: 先扩展最深的节点

def dfs(problem, max_depth=float('inf')):
    """深度优先搜索"""

    def recursive_dfs(state, path, depth):
        if problem.is_goal(state):
            return path

        if depth >= max_depth:
            return None

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)

            if next_state not in path:  # 避免循环
                result = recursive_dfs(
                    next_state,
                    path + [action],
                    depth + 1
                )
                if result is not None:
                    return result

        return None

    return recursive_dfs(problem.initial_state, [], 0)

# 迭代加深搜索：结合 BFS 和 DFS 的优点
def iterative_deepening_dfs(problem, max_depth=100):
    """迭代加深搜索"""
    for depth in range(max_depth):
        result = dfs(problem, depth)
        if result is not None:
            return result
    return None

特点:

✅ 空间复杂度低：O(bd)
❌ 不完备：可能陷入无限循环
❌ 不最优：可能找到较长的路径

有信息搜索（启发式搜索）

启发式搜索使用 启发函数 h(n) 来估计从节点 n 到目标的代价。

1. 贪婪最佳优先搜索

策略: 总是扩展 h(n) 最小的节点

import heapq

def greedy_best_first(problem, heuristic):
    """贪婪最佳优先搜索"""
    frontier = [(heuristic(problem.initial_state), problem.initial_state, [])]
    explored = set()

    while frontier:
        _, state, path = heapq.heappop(frontier)

        if problem.is_goal(state):
            return path

        if state in explored:
            continue
        explored.add(state)

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)
            if next_state not in explored:
                h = heuristic(next_state)
                heapq.heappush(frontier, (h, next_state, path + [action]))

    return None

2. A* 搜索

策略: 使用 f(n) = g(n) + h(n)

g(n): 从起点到 n 的实际代价
h(n): 从 n 到目标的估计代价
f(n): 估计的总代价

def astar(problem, heuristic):
    """A* 搜索算法"""
    # (f值, g值, 状态, 路径)
    frontier = [(heuristic(problem.initial_state), 0, problem.initial_state, [])]
    explored = {}  # 状态 -> 最小g值

    while frontier:
        f, g, state, path = heapq.heappop(frontier)

        if problem.is_goal(state):
            return path, g  # 返回路径和代价

        # 如果已经用更小的代价访问过，跳过
        if state in explored and explored[state] <= g:
            continue
        explored[state] = g

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)
            next_g = g + problem.get_cost(state, action, next_state)

            if next_state not in explored or explored[next_state] > next_g:
                h = heuristic(next_state)
                f = next_g + h
                heapq.heappush(frontier, (f, next_g, next_state, path + [action]))

    return None, float('inf')

# 曼哈顿距离启发函数
def manhattan_distance(state, goal):
    """曼哈顿距离：适用于网格世界"""
    return abs(state[0] - goal[0]) + abs(state[1] - goal[1])

# 测试 A*
problem = MazeProblem(maze, (0, 0), (4, 4))
goal = (4, 4)
heuristic = lambda s: manhattan_distance(s, goal)

path, cost = astar(problem, heuristic)
print(f"A* 找到的路径: {path}")
print(f"路径代价: {cost}")

启发函数的性质

可采纳性（Admissibility）: h(n) ≤ h*(n)

h(n) 从不高估到目标的真实代价
保证 A* 找到最优解

一致性（Consistency）: h(n) ≤ c(n, a, n’) + h(n’)

满足三角不等式
一致性蕴含可采纳性

实战：用 PyTorch 学习启发函数

我们可以用神经网络来学习启发函数：

import torch
import torch.nn as nn
import random

class LearnedHeuristic(nn.Module):
    """学习的启发函数"""

    def __init__(self, state_size):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.ReLU()  # 启发值应该非负
        )

    def forward(self, state):
        return self.network(state)

def generate_training_data(maze, num_samples=1000):
    """生成训练数据：(状态, 到目标的真实距离)"""
    rows, cols = len(maze), len(maze[0])
    goal = (rows-1, cols-1)

    data = []

    for _ in range(num_samples):
        # 随机选择一个非墙壁的位置
        while True:
            x = random.randint(0, rows-1)
            y = random.randint(0, cols-1)
            if maze[x][y] != 1:
                break

        # 用 BFS 计算真实距离
        problem = MazeProblem(maze, (x, y), goal)
        path = bfs(problem)

        if path is not None:
            true_distance = len(path)
            state = torch.tensor([x/rows, y/cols, goal[0]/rows, goal[1]/cols],
                                dtype=torch.float32)
            data.append((state, true_distance))

    return data

def train_heuristic(maze, epochs=100):
    """训练启发函数"""
    model = LearnedHeuristic(state_size=4)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    # 生成训练数据
    print("生成训练数据...")
    data = generate_training_data(maze, num_samples=500)

    print(f"训练数据量: {len(data)}")

    # 训练
    for epoch in range(epochs):
        total_loss = 0
        random.shuffle(data)

        for state, true_dist in data:
            predicted = model(state)
            loss = criterion(predicted, torch.tensor([[true_dist]], dtype=torch.float32))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if (epoch + 1) % 20 == 0:
            print(f"Epoch {epoch+1}, Loss: {total_loss/len(data):.4f}")

    return model

# 训练并使用学习的启发函数
if __name__ == "__main__":
    maze = [
        [0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0]
    ]

    print("训练启发函数...")
    learned_h = train_heuristic(maze)

    # 测试
    print("\n测试学习的启发函数:")
    test_states = [(0, 0), (2, 2), (3, 3)]
    goal = (4, 4)

    for state in test_states:
        x, y = state
        state_tensor = torch.tensor([x/5, y/5, goal[0]/5, goal[1]/5], dtype=torch.float32)

        learned_h_value = learned_h(state_tensor).item()
        true_h_value = manhattan_distance(state, goal)

        print(f"状态 {state}: 学习的h={learned_h_value:.2f}, 曼哈顿距离={true_h_value}")

搜索算法比较

算法	完备性	最优性	时间复杂度	空间复杂度
BFS	是	是*	O(b^d)	O(b^d)
DFS	否	否	O(b^m)	O(bm)
迭代加深	是	是*	O(b^d)	O(bd)
贪婪最佳优先	否	否	O(b^m)	O(b^m)
A*	是	是	O(b^d)	O(b^d)

注：当所有边代价相同时

关键概念总结

概念	解释
搜索问题	从初始状态通过行动到达目标状态
状态空间	所有可能状态的集合
搜索树	搜索过程中生成的树结构
启发函数	估计到目标代价的函数
可采纳性	启发函数不高估真实代价
A* 算法	使用 f(n)=g(n)+h(n) 的最优搜索

下一步

在下一个教程中，我们将学习知识表示与推理。

Tutorial 4: 知识表示与推理