Tutorial 3: 搜索算法

什么是搜索问题?

很多 AI 问题可以建模为 搜索问题:从初始状态出发,通过一系列行动,到达目标状态。

搜索问题的组成:

  • 初始状态: 起点

  • 行动: 可以采取的操作

  • 转移模型: 行动如何改变状态

  • 目标测试: 判断是否到达目标

  • 路径代价: 行动的代价

class SearchProblem:
    """搜索问题的抽象定义"""

    def __init__(self, initial_state, goal_state):
        self.initial_state = initial_state
        self.goal_state = goal_state

    def get_actions(self, state):
        """返回当前状态可用的行动"""
        raise NotImplementedError

    def get_result(self, state, action):
        """返回执行行动后的新状态"""
        raise NotImplementedError

    def is_goal(self, state):
        """判断是否到达目标"""
        return state == self.goal_state

    def get_cost(self, state, action, next_state):
        """返回行动的代价"""
        return 1  # 默认代价为1

经典例子:八数码问题

初始状态:        目标状态:
┌───┬───┬───┐   ┌───┬───┬───┐
│ 1 │ 2 │ 3 │   │ 1 │ 2 │ 3 │
├───┼───┼───┤   ├───┼───┼───┤
│ 4 │   │ 5 │ → │ 4 │ 5 │ 6 │
├───┼───┼───┤   ├───┼───┼───┤
│ 6 │ 7 │ 8 │   │ 7 │ 8 │   │
└───┴───┴───┘   └───┴───┴───┘

无信息搜索(盲目搜索)

1. 广度优先搜索(BFS)

策略: 先扩展浅层节点,逐层搜索

from collections import deque

def bfs(problem):
    """广度优先搜索"""
    # 边界:待探索的节点队列
    frontier = deque([(problem.initial_state, [])])
    # 已探索的状态
    explored = set()

    while frontier:
        state, path = frontier.popleft()

        # 目标测试
        if problem.is_goal(state):
            return path

        # 标记为已探索
        explored.add(state)

        # 扩展节点
        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)

            if next_state not in explored:
                frontier.append((next_state, path + [action]))

    return None  # 无解

# 示例:迷宫问题
class MazeProblem(SearchProblem):
    def __init__(self, maze, start, goal):
        super().__init__(start, goal)
        self.maze = maze
        self.rows = len(maze)
        self.cols = len(maze[0])

    def get_actions(self, state):
        actions = []
        x, y = state
        moves = [('up', -1, 0), ('down', 1, 0), ('left', 0, -1), ('right', 0, 1)]

        for name, dx, dy in moves:
            nx, ny = x + dx, y + dy
            if 0 <= nx < self.rows and 0 <= ny < self.cols:
                if self.maze[nx][ny] != 1:  # 1 表示墙
                    actions.append(name)
        return actions

    def get_result(self, state, action):
        x, y = state
        moves = {'up': (-1, 0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)}
        dx, dy = moves[action]
        return (x + dx, y + dy)

# 测试
maze = [
    [0, 0, 0, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 0, 1, 0]
]

problem = MazeProblem(maze, (0, 0), (4, 4))
path = bfs(problem)
print(f"BFS 找到的路径: {path}")

特点:

  • ✅ 完备性:如果有解,一定能找到

  • ✅ 最优性:找到的是最短路径(代价相同时)

  • ❌ 空间复杂度高:O(b^d),b是分支因子,d是深度

2. 深度优先搜索(DFS)

策略: 先扩展最深的节点

def dfs(problem, max_depth=float('inf')):
    """深度优先搜索"""

    def recursive_dfs(state, path, depth):
        if problem.is_goal(state):
            return path

        if depth >= max_depth:
            return None

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)

            if next_state not in path:  # 避免循环
                result = recursive_dfs(
                    next_state,
                    path + [action],
                    depth + 1
                )
                if result is not None:
                    return result

        return None

    return recursive_dfs(problem.initial_state, [], 0)

# 迭代加深搜索:结合 BFS 和 DFS 的优点
def iterative_deepening_dfs(problem, max_depth=100):
    """迭代加深搜索"""
    for depth in range(max_depth):
        result = dfs(problem, depth)
        if result is not None:
            return result
    return None

特点:

  • ✅ 空间复杂度低:O(bd)

  • ❌ 不完备:可能陷入无限循环

  • ❌ 不最优:可能找到较长的路径

有信息搜索(启发式搜索)

启发式搜索使用 启发函数 h(n) 来估计从节点 n 到目标的代价。

1. 贪婪最佳优先搜索

策略: 总是扩展 h(n) 最小的节点

import heapq

def greedy_best_first(problem, heuristic):
    """贪婪最佳优先搜索"""
    frontier = [(heuristic(problem.initial_state), problem.initial_state, [])]
    explored = set()

    while frontier:
        _, state, path = heapq.heappop(frontier)

        if problem.is_goal(state):
            return path

        if state in explored:
            continue
        explored.add(state)

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)
            if next_state not in explored:
                h = heuristic(next_state)
                heapq.heappush(frontier, (h, next_state, path + [action]))

    return None

2. A* 搜索

策略: 使用 f(n) = g(n) + h(n)

  • g(n): 从起点到 n 的实际代价

  • h(n): 从 n 到目标的估计代价

  • f(n): 估计的总代价

def astar(problem, heuristic):
    """A* 搜索算法"""
    # (f值, g值, 状态, 路径)
    frontier = [(heuristic(problem.initial_state), 0, problem.initial_state, [])]
    explored = {}  # 状态 -> 最小g值

    while frontier:
        f, g, state, path = heapq.heappop(frontier)

        if problem.is_goal(state):
            return path, g  # 返回路径和代价

        # 如果已经用更小的代价访问过,跳过
        if state in explored and explored[state] <= g:
            continue
        explored[state] = g

        for action in problem.get_actions(state):
            next_state = problem.get_result(state, action)
            next_g = g + problem.get_cost(state, action, next_state)

            if next_state not in explored or explored[next_state] > next_g:
                h = heuristic(next_state)
                f = next_g + h
                heapq.heappush(frontier, (f, next_g, next_state, path + [action]))

    return None, float('inf')

# 曼哈顿距离启发函数
def manhattan_distance(state, goal):
    """曼哈顿距离:适用于网格世界"""
    return abs(state[0] - goal[0]) + abs(state[1] - goal[1])

# 测试 A*
problem = MazeProblem(maze, (0, 0), (4, 4))
goal = (4, 4)
heuristic = lambda s: manhattan_distance(s, goal)

path, cost = astar(problem, heuristic)
print(f"A* 找到的路径: {path}")
print(f"路径代价: {cost}")

启发函数的性质

可采纳性(Admissibility): h(n) ≤ h*(n)

  • h(n) 从不高估到目标的真实代价

  • 保证 A* 找到最优解

一致性(Consistency): h(n) ≤ c(n, a, n’) + h(n’)

  • 满足三角不等式

  • 一致性蕴含可采纳性

实战:用 PyTorch 学习启发函数

我们可以用神经网络来学习启发函数:

import torch
import torch.nn as nn
import random

class LearnedHeuristic(nn.Module):
    """学习的启发函数"""

    def __init__(self, state_size):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.ReLU()  # 启发值应该非负
        )

    def forward(self, state):
        return self.network(state)

def generate_training_data(maze, num_samples=1000):
    """生成训练数据:(状态, 到目标的真实距离)"""
    rows, cols = len(maze), len(maze[0])
    goal = (rows-1, cols-1)

    data = []

    for _ in range(num_samples):
        # 随机选择一个非墙壁的位置
        while True:
            x = random.randint(0, rows-1)
            y = random.randint(0, cols-1)
            if maze[x][y] != 1:
                break

        # 用 BFS 计算真实距离
        problem = MazeProblem(maze, (x, y), goal)
        path = bfs(problem)

        if path is not None:
            true_distance = len(path)
            state = torch.tensor([x/rows, y/cols, goal[0]/rows, goal[1]/cols],
                                dtype=torch.float32)
            data.append((state, true_distance))

    return data

def train_heuristic(maze, epochs=100):
    """训练启发函数"""
    model = LearnedHeuristic(state_size=4)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    # 生成训练数据
    print("生成训练数据...")
    data = generate_training_data(maze, num_samples=500)

    print(f"训练数据量: {len(data)}")

    # 训练
    for epoch in range(epochs):
        total_loss = 0
        random.shuffle(data)

        for state, true_dist in data:
            predicted = model(state)
            loss = criterion(predicted, torch.tensor([[true_dist]], dtype=torch.float32))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if (epoch + 1) % 20 == 0:
            print(f"Epoch {epoch+1}, Loss: {total_loss/len(data):.4f}")

    return model

# 训练并使用学习的启发函数
if __name__ == "__main__":
    maze = [
        [0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0]
    ]

    print("训练启发函数...")
    learned_h = train_heuristic(maze)

    # 测试
    print("\n测试学习的启发函数:")
    test_states = [(0, 0), (2, 2), (3, 3)]
    goal = (4, 4)

    for state in test_states:
        x, y = state
        state_tensor = torch.tensor([x/5, y/5, goal[0]/5, goal[1]/5], dtype=torch.float32)

        learned_h_value = learned_h(state_tensor).item()
        true_h_value = manhattan_distance(state, goal)

        print(f"状态 {state}: 学习的h={learned_h_value:.2f}, 曼哈顿距离={true_h_value}")

搜索算法比较

算法

完备性

最优性

时间复杂度

空间复杂度

BFS

是*

O(b^d)

O(b^d)

DFS

O(b^m)

O(bm)

迭代加深

是*

O(b^d)

O(bd)

贪婪最佳优先

O(b^m)

O(b^m)

A*

O(b^d)

O(b^d)

注:当所有边代价相同时

关键概念总结

概念

解释

搜索问题

从初始状态通过行动到达目标状态

状态空间

所有可能状态的集合

搜索树

搜索过程中生成的树结构

启发函数

估计到目标代价的函数

可采纳性

启发函数不高估真实代价

A* 算法

使用 f(n)=g(n)+h(n) 的最优搜索

下一步

在下一个教程中,我们将学习知识表示与推理。

Tutorial 4: 知识表示与推理