1.1. 数据模型与魔法方法

Python 数据模型是理解 Python 工作方式的核心。通过实现特殊方法(魔法方法),你可以让自定义类支持内置操作。

1.1.1. 对象标识与相等性

1.1.1.1. 易错点:is vs ==

# is 比较的是对象标识(内存地址)
# == 比较的是值

a = [1, 2, 3]
b = [1, 2, 3]
c = a

print(a == b)  # True - 值相等
print(a is b)  # False - 不是同一个对象
print(a is c)  # True - 是同一个对象

# ⚠️ 小整数缓存陷阱
x = 256
y = 256
print(x is y)  # True - Python 缓存 -5 到 256

x = 257
y = 257
print(x is y)  # False - 超出缓存范围(在交互模式下)
# 注意:在脚本中可能为 True(编译器优化)

警告

永远使用 == 比较值,使用 is 只用于比较 None

if x is None:  # 正确
    pass
if x == None:  # 可以工作,但不推荐
    pass

1.1.2. 可变性与哈希

1.1.2.1. 可变对象不可哈希

# 列表是可变的,不可哈希
my_list = [1, 2, 3]
# hash(my_list)  # TypeError: unhashable type: 'list'

# 元组(如果元素都不可变)是可哈希的
my_tuple = (1, 2, 3)
print(hash(my_tuple))  # 有效

# ⚠️ 包含可变元素的元组不可哈希
mixed_tuple = ([1, 2], 3)
# hash(mixed_tuple)  # TypeError

1.1.2.2. 字典键必须可哈希

# ✅ 正确
d = {
    "key": "value",
    (1, 2): "tuple key",
    frozenset([1, 2]): "frozenset key",
}

# ❌ 错误
# d = {[1, 2]: "list key"}  # TypeError

1.1.3. 自定义类的魔法方法

1.1.3.1. __repr__ vs __str__

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __repr__(self):
        """开发者看的,应该是无歧义的表示,最好能 eval() 回来"""
        return f"Point({self.x!r}, {self.y!r})"
    
    def __str__(self):
        """用户看的,可读性优先"""
        return f"({self.x}, {self.y})"

p = Point(3, 4)
print(repr(p))  # Point(3, 4) - 用于调试
print(str(p))   # (3, 4) - 用于显示
print(p)        # (3, 4) - print 默认使用 __str__

# 在容器中总是使用 __repr__
points = [Point(1, 2), Point(3, 4)]
print(points)   # [Point(1, 2), Point(3, 4)]

小技巧

如果只实现一个,选择 __repr__。当 __str__ 未定义时,Python 会回退到 __repr__

1.1.3.2. 实现可比较的类

from functools import total_ordering

@total_ordering  # 自动生成其他比较方法
class Version:
    def __init__(self, major, minor, patch):
        self.major = major
        self.minor = minor
        self.patch = patch
    
    def __eq__(self, other):
        if not isinstance(other, Version):
            return NotImplemented
        return (self.major, self.minor, self.patch) == \
               (other.major, other.minor, other.patch)
    
    def __lt__(self, other):
        if not isinstance(other, Version):
            return NotImplemented
        return (self.major, self.minor, self.patch) < \
               (other.major, other.minor, other.patch)
    
    def __repr__(self):
        return f"Version({self.major}, {self.minor}, {self.patch})"
    
    def __hash__(self):
        return hash((self.major, self.minor, self.patch))

v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)
v3 = Version(1, 0, 0)

print(v1 < v2)   # True
print(v1 == v3)  # True
print(v1 >= v3)  # True - 由 @total_ordering 自动生成

# 可以用作字典键或集合元素
versions = {v1, v2, v3}
print(len(versions))  # 2

1.1.4. 容器协议

1.1.4.1. 实现自定义序列

class Deck:
    """实现序列协议的扑克牌组"""
    
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = '♠♥♦♣'
    
    def __init__(self):
        self._cards = [f"{rank}{suit}" 
                       for suit in self.suits 
                       for rank in self.ranks]
    
    def __len__(self):
        """支持 len()"""
        return len(self._cards)
    
    def __getitem__(self, position):
        """支持索引和切片"""
        return self._cards[position]
    
    def __contains__(self, card):
        """支持 in 操作符"""
        return card in self._cards
    
    def __iter__(self):
        """支持迭代"""
        return iter(self._cards)

deck = Deck()
print(len(deck))        # 52
print(deck[0])          # 2♠
print(deck[-1])         # A♣
print(deck[:3])         # ['2♠', '3♠', '4♠']
print('A♠' in deck)     # True

# 可以直接迭代
for card in deck[:5]:
    print(card, end=' ')  # 2♠ 3♠ 4♠ 5♠ 6♠

1.1.5. 属性访问控制

1.1.5.1. __getattr__ vs __getattribute__

class LazyLoader:
    """演示属性访问钩子"""
    
    def __init__(self):
        self._cache = {}
    
    def __getattr__(self, name):
        """
        只在属性未找到时调用
        适合实现懒加载
        """
        print(f"Loading {name}...")
        value = f"Loaded: {name}"
        self._cache[name] = value
        setattr(self, name, value)  # 缓存到实例
        return value

obj = LazyLoader()
print(obj.data)  # Loading data... \n Loaded: data
print(obj.data)  # Loaded: data (第二次不触发 __getattr__)

1.1.5.2. 使用 __slots__ 优化内存

import sys

class PointWithDict:
    def __init__(self, x, y):
        self.x = x
        self.y = y

class PointWithSlots:
    __slots__ = ('x', 'y')
    
    def __init__(self, x, y):
        self.x = x
        self.y = y

# 内存对比
p1 = PointWithDict(1, 2)
p2 = PointWithSlots(1, 2)

print(sys.getsizeof(p1.__dict__))  # 约 104 字节
# p2 没有 __dict__

# 创建大量对象时差异明显
import tracemalloc
tracemalloc.start()

points_dict = [PointWithDict(i, i) for i in range(10000)]
print(tracemalloc.get_traced_memory())  # 较大

tracemalloc.reset_peak()
points_slots = [PointWithSlots(i, i) for i in range(10000)]
print(tracemalloc.get_traced_memory())  # 约为前者的 40%

警告

使用 __slots__ 的限制:

  • 不能动态添加属性

  • 不能使用多重继承(如果多个父类都定义了非空 __slots__

  • 子类必须也定义 __slots__(否则失去优化效果)

1.1.6. 运算符重载

1.1.6.1. 数值运算

class Vector:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __add__(self, other):
        """v1 + v2"""
        if isinstance(other, Vector):
            return Vector(self.x + other.x, self.y + other.y)
        return NotImplemented
    
    def __radd__(self, other):
        """other + v(当 other 不支持 + 操作时)"""
        return self + other
    
    def __mul__(self, scalar):
        """v * scalar"""
        if isinstance(scalar, (int, float)):
            return Vector(self.x * scalar, self.y * scalar)
        return NotImplemented
    
    def __rmul__(self, scalar):
        """scalar * v"""
        return self * scalar
    
    def __abs__(self):
        """abs(v) - 向量长度"""
        return (self.x ** 2 + self.y ** 2) ** 0.5
    
    def __bool__(self):
        """bool(v) - 非零向量为 True"""
        return bool(abs(self))
    
    def __repr__(self):
        return f"Vector({self.x}, {self.y})"

v1 = Vector(3, 4)
v2 = Vector(1, 2)

print(v1 + v2)     # Vector(4, 6)
print(v1 * 2)      # Vector(6, 8)
print(3 * v1)      # Vector(9, 12)
print(abs(v1))     # 5.0
print(bool(v1))    # True
print(bool(Vector(0, 0)))  # False

1.1.7. 最佳实践总结

命名约定
  • _name: 内部使用,不应该从外部访问

  • __name: 名称改编,防止子类覆盖

  • __name__: 系统定义的特殊方法

返回 NotImplemented

当操作不支持时,返回 NotImplemented 而不是抛出异常。这允许 Python 尝试反向操作。

def __eq__(self, other):
    if not isinstance(other, MyClass):
        return NotImplemented  # ✅
        # raise TypeError(...)  # ❌
遵循协议

实现相关方法时,确保行为一致:

  • __eq____hash__ 应该一起实现

  • __lt____eq__ 可用 @total_ordering 补全