4.1. 常见错误

本节详细讲解 Python 中最常见的编程错误和陷阱。

4.1.1. 可变对象陷阱

4.1.1.1. 可变默认参数

这是 Python 最著名的陷阱之一。

# ❌ 危险：可变默认参数
def add_item(item, items=[]):
    items.append(item)
    return items

print(add_item('a'))  # ['a']
print(add_item('b'))  # ['a', 'b'] 不是 ['b']!
print(add_item('c'))  # ['a', 'b', 'c']

# 为什么？默认参数在函数定义时只创建一次
# 后续调用共享同一个列表对象

# ✅ 正确做法
def add_item(item, items=None):
    if items is None:
        items = []
    items.append(item)
    return items

# 同样适用于字典、集合等可变对象
def process(data=None):
    if data is None:
        data = {}
    return data

4.1.1.2. 列表浅复制

# ❌ 浅复制陷阱
original = [[1, 2], [3, 4]]
copy = original[:]  # 或 list(original)

copy[0].append(5)
print(original)  # [[1, 2, 5], [3, 4]] 原列表也被修改!

# 为什么？浅复制只复制外层列表，内层仍是引用

# ✅ 深复制
import copy
original = [[1, 2], [3, 4]]
deep = copy.deepcopy(original)

deep[0].append(5)
print(original)  # [[1, 2], [3, 4]] 不受影响

# 简单情况可以用列表推导
copy = [row[:] for row in original]

4.1.1.3. 列表乘法创建二维数组

# ❌ 错误方式
matrix = [[0] * 3] * 3

matrix[0][0] = 1
print(matrix)  # [[1, 0, 0], [1, 0, 0], [1, 0, 0]] 所有行都变了!

# 为什么？外层乘法创建的是同一列表的三个引用

# ✅ 正确方式
matrix = [[0] * 3 for _ in range(3)]

matrix[0][0] = 1
print(matrix)  # [[1, 0, 0], [0, 0, 0], [0, 0, 0]]

4.1.2. 作用域问题

4.1.2.1. UnboundLocalError

# ❌ 常见错误
x = 10

def foo():
    print(x)  # UnboundLocalError: local variable 'x' referenced before assignment
    x = 20

# 为什么？Python 在编译时确定 x 是局部变量（因为有赋值语句）
# 但运行到 print 时，局部变量 x 还未赋值

# ✅ 解决方案1：使用 global
def foo():
    global x
    print(x)
    x = 20

# ✅ 解决方案2：使用不同变量名
def foo():
    print(x)  # 全局变量
    y = 20   # 局部变量

# ✅ 解决方案3：作为参数传递（推荐）
def foo(x):
    print(x)
    return x + 10

4.1.2.2. nonlocal 与嵌套函数

# ❌ 错误
def outer():
    count = 0
    
    def inner():
        count += 1  # UnboundLocalError
        return count
    
    return inner

# ✅ 正确
def outer():
    count = 0
    
    def inner():
        nonlocal count  # 声明使用外层变量
        count += 1
        return count
    
    return inner

counter = outer()
print(counter())  # 1
print(counter())  # 2

4.1.3. 迭代陷阱

4.1.3.1. 修改正在迭代的容器

# ❌ 危险：迭代时修改列表
items = [1, 2, 3, 4, 5]
for i, item in enumerate(items):
    if item % 2 == 0:
        del items[i]  # 索引会错乱

print(items)  # 可能不是预期结果

# ❌ 同样危险
for item in items:
    if item % 2 == 0:
        items.remove(item)  # 会跳过元素

# ✅ 正确：创建新列表
items = [x for x in items if x % 2 != 0]

# ✅ 或者反向迭代
items = [1, 2, 3, 4, 5]
for i in range(len(items) - 1, -1, -1):
    if items[i] % 2 == 0:
        del items[i]

# ✅ 或者使用 filter
items = list(filter(lambda x: x % 2 != 0, items))

4.1.3.2. 迭代器耗尽

# ⚠️ 迭代器只能使用一次
numbers = (x ** 2 for x in range(5))

print(list(numbers))  # [0, 1, 4, 9, 16]
print(list(numbers))  # [] 空的!

# ✅ 需要多次使用时，转为列表
numbers = list(x ** 2 for x in range(5))

# 或重新创建生成器
def get_squares():
    return (x ** 2 for x in range(5))

4.1.3.3. zip 的最短原则

# ⚠️ zip 以最短的为准
names = ['Alice', 'Bob', 'Charlie']
scores = [90, 85]

for name, score in zip(names, scores):
    print(f"{name}: {score}")
# Charlie 被忽略!

# ✅ 使用 zip_longest
from itertools import zip_longest

for name, score in zip_longest(names, scores, fillvalue=0):
    print(f"{name}: {score}")

# ✅ Python 3.10+ 可以使用 strict
# list(zip(names, scores, strict=True))  # ValueError

4.1.4. 比较陷阱

4.1.4.1. is vs ==

# ⚠️ 小整数缓存
a = 256
b = 256
print(a is b)  # True (因为 -5 到 256 被缓存)

a = 257
b = 257
print(a is b)  # False 或 True (取决于上下文)

# ⚠️ 字符串驻留
s1 = "hello"
s2 = "hello"
print(s1 is s2)  # True (字符串驻留)

s1 = "hello world"
s2 = "hello world"
print(s1 is s2)  # 可能是 False

# ✅ 规则
# 用 == 比较值
# 用 is 只比较 None, True, False
if x is None:
    pass
if flag is True:  # 或直接 if flag:
    pass

4.1.4.2. 浮点数比较

# ❌ 浮点数精度问题
print(0.1 + 0.2)  # 0.30000000000000004
print(0.1 + 0.2 == 0.3)  # False!

# ✅ 使用 math.isclose
import math
print(math.isclose(0.1 + 0.2, 0.3))  # True
print(math.isclose(0.1 + 0.2, 0.3, rel_tol=1e-9))  # 自定义精度

# ✅ 金融计算使用 Decimal
from decimal import Decimal
result = Decimal('0.1') + Decimal('0.2')
print(result == Decimal('0.3'))  # True

4.1.4.3. 链式比较

# ⚠️ Python 的链式比较可能不符合直觉
x = 5
print(1 < x < 10)  # True (这是正确的)

# 但注意：
print(1 < x > 3)  # 等价于 1 < x and x > 3
print((1 < x) > 3)  # 不同！(1 < x) 是 True，然后 True > 3 是 False

# ⚠️ 特别注意 is 的链式
a = b = []
print(a is b is [])  # False! 等价于 a is b and b is []

4.1.5. 字符串陷阱

4.1.5.1. 字符串不可变

# ❌ 低效的字符串拼接
result = ""
for i in range(10000):
    result += str(i)  # 每次创建新字符串!

# ✅ 使用 join
result = "".join(str(i) for i in range(10000))

# ✅ 或使用 StringIO
from io import StringIO
buffer = StringIO()
for i in range(10000):
    buffer.write(str(i))
result = buffer.getvalue()

4.1.5.2. 编码问题

# ⚠️ 文件编码
# 不指定编码可能导致问题
with open('file.txt', 'r') as f:  # 使用系统默认编码
    content = f.read()

# ✅ 显式指定编码
with open('file.txt', 'r', encoding='utf-8') as f:
    content = f.read()

# ⚠️ bytes 和 str
b = b'hello'
s = 'hello'
# b + s  # TypeError!

# ✅ 显式转换
s = b.decode('utf-8')
b = s.encode('utf-8')

4.1.6. 异常处理陷阱

4.1.6.1. 捕获过于宽泛的异常

# ❌ 捕获所有异常
try:
    do_something()
except:  # 或 except Exception:
    pass  # 吞掉所有错误，包括 KeyboardInterrupt!

# ✅ 捕获具体异常
try:
    do_something()
except (ValueError, KeyError) as e:
    logger.error(f"Expected error: {e}")
except Exception as e:
    logger.exception(f"Unexpected error: {e}")
    raise  # 重新抛出

4.1.6.2. finally 中的 return

# ⚠️ finally 中的 return 会覆盖其他 return
def problematic():
    try:
        return "try"
    except:
        return "except"
    finally:
        return "finally"

print(problematic())  # "finally"

# 甚至覆盖异常
def also_problematic():
    try:
        raise ValueError("Error!")
    finally:
        return "finally"  # 异常被吞掉!

print(also_problematic())  # "finally"，没有异常!

# ✅ finally 只用于清理，不要 return

4.1.6.3. 异常中的变量作用域

# ⚠️ Python 3 中，except 中的变量在块结束后被删除
try:
    raise ValueError("error")
except ValueError as e:
    error = e  # 保存引用
    print(e)

# print(e)  # NameError: name 'e' is not defined
print(error)  # 这样可以

4.1.7. 最佳实践总结

避免陷阱的原则

不使用可变默认参数：用 None 代替
理解引用语义：复制时注意深浅
不修改迭代中的容器：创建新容器
用 == 比较值：is 只用于 None
捕获具体异常：不要裸 except
显式指定编码：打开文件时

调试技巧

# 检查对象身份
print(id(obj))

# 检查可变性
print(type(obj).__mutable__)  # 没有这个属性
# 一般来说：list, dict, set 可变；str, tuple, frozenset 不可变

# 检查引用
import sys
print(sys.getrefcount(obj))