# 数据模型与魔法方法 Python 数据模型是理解 Python 工作方式的核心。通过实现特殊方法(魔法方法),你可以让自定义类支持内置操作。 ## 对象标识与相等性 ### 易错点:`is` vs `==` ```python # is 比较的是对象标识(内存地址) # == 比较的是值 a = [1, 2, 3] b = [1, 2, 3] c = a print(a == b) # True - 值相等 print(a is b) # False - 不是同一个对象 print(a is c) # True - 是同一个对象 # ⚠️ 小整数缓存陷阱 x = 256 y = 256 print(x is y) # True - Python 缓存 -5 到 256 x = 257 y = 257 print(x is y) # False - 超出缓存范围(在交互模式下) # 注意:在脚本中可能为 True(编译器优化) ``` :::{warning} 永远使用 `==` 比较值,使用 `is` 只用于比较 `None`: ```python if x is None: # 正确 pass if x == None: # 可以工作,但不推荐 pass ``` ::: ## 可变性与哈希 ### 可变对象不可哈希 ```python # 列表是可变的,不可哈希 my_list = [1, 2, 3] # hash(my_list) # TypeError: unhashable type: 'list' # 元组(如果元素都不可变)是可哈希的 my_tuple = (1, 2, 3) print(hash(my_tuple)) # 有效 # ⚠️ 包含可变元素的元组不可哈希 mixed_tuple = ([1, 2], 3) # hash(mixed_tuple) # TypeError ``` ### 字典键必须可哈希 ```python # ✅ 正确 d = { "key": "value", (1, 2): "tuple key", frozenset([1, 2]): "frozenset key", } # ❌ 错误 # d = {[1, 2]: "list key"} # TypeError ``` ## 自定义类的魔法方法 ### `__repr__` vs `__str__` ```python class Point: def __init__(self, x, y): self.x = x self.y = y def __repr__(self): """开发者看的,应该是无歧义的表示,最好能 eval() 回来""" return f"Point({self.x!r}, {self.y!r})" def __str__(self): """用户看的,可读性优先""" return f"({self.x}, {self.y})" p = Point(3, 4) print(repr(p)) # Point(3, 4) - 用于调试 print(str(p)) # (3, 4) - 用于显示 print(p) # (3, 4) - print 默认使用 __str__ # 在容器中总是使用 __repr__ points = [Point(1, 2), Point(3, 4)] print(points) # [Point(1, 2), Point(3, 4)] ``` :::{tip} 如果只实现一个,选择 `__repr__`。当 `__str__` 未定义时,Python 会回退到 `__repr__`。 ::: ### 实现可比较的类 ```python from functools import total_ordering @total_ordering # 自动生成其他比较方法 class Version: def __init__(self, major, minor, patch): self.major = major self.minor = minor self.patch = patch def __eq__(self, other): if not isinstance(other, Version): return NotImplemented return (self.major, self.minor, self.patch) == \ (other.major, other.minor, other.patch) def __lt__(self, other): if not isinstance(other, Version): return NotImplemented return (self.major, self.minor, self.patch) < \ (other.major, other.minor, other.patch) def __repr__(self): return f"Version({self.major}, {self.minor}, {self.patch})" def __hash__(self): return hash((self.major, self.minor, self.patch)) v1 = Version(1, 0, 0) v2 = Version(2, 0, 0) v3 = Version(1, 0, 0) print(v1 < v2) # True print(v1 == v3) # True print(v1 >= v3) # True - 由 @total_ordering 自动生成 # 可以用作字典键或集合元素 versions = {v1, v2, v3} print(len(versions)) # 2 ``` ## 容器协议 ### 实现自定义序列 ```python class Deck: """实现序列协议的扑克牌组""" ranks = [str(n) for n in range(2, 11)] + list('JQKA') suits = '♠♥♦♣' def __init__(self): self._cards = [f"{rank}{suit}" for suit in self.suits for rank in self.ranks] def __len__(self): """支持 len()""" return len(self._cards) def __getitem__(self, position): """支持索引和切片""" return self._cards[position] def __contains__(self, card): """支持 in 操作符""" return card in self._cards def __iter__(self): """支持迭代""" return iter(self._cards) deck = Deck() print(len(deck)) # 52 print(deck[0]) # 2♠ print(deck[-1]) # A♣ print(deck[:3]) # ['2♠', '3♠', '4♠'] print('A♠' in deck) # True # 可以直接迭代 for card in deck[:5]: print(card, end=' ') # 2♠ 3♠ 4♠ 5♠ 6♠ ``` ## 属性访问控制 ### `__getattr__` vs `__getattribute__` ```python class LazyLoader: """演示属性访问钩子""" def __init__(self): self._cache = {} def __getattr__(self, name): """ 只在属性未找到时调用 适合实现懒加载 """ print(f"Loading {name}...") value = f"Loaded: {name}" self._cache[name] = value setattr(self, name, value) # 缓存到实例 return value obj = LazyLoader() print(obj.data) # Loading data... \n Loaded: data print(obj.data) # Loaded: data (第二次不触发 __getattr__) ``` ### 使用 `__slots__` 优化内存 ```python import sys class PointWithDict: def __init__(self, x, y): self.x = x self.y = y class PointWithSlots: __slots__ = ('x', 'y') def __init__(self, x, y): self.x = x self.y = y # 内存对比 p1 = PointWithDict(1, 2) p2 = PointWithSlots(1, 2) print(sys.getsizeof(p1.__dict__)) # 约 104 字节 # p2 没有 __dict__ # 创建大量对象时差异明显 import tracemalloc tracemalloc.start() points_dict = [PointWithDict(i, i) for i in range(10000)] print(tracemalloc.get_traced_memory()) # 较大 tracemalloc.reset_peak() points_slots = [PointWithSlots(i, i) for i in range(10000)] print(tracemalloc.get_traced_memory()) # 约为前者的 40% ``` :::{warning} 使用 `__slots__` 的限制: - 不能动态添加属性 - 不能使用多重继承(如果多个父类都定义了非空 `__slots__`) - 子类必须也定义 `__slots__`(否则失去优化效果) ::: ## 运算符重载 ### 数值运算 ```python class Vector: def __init__(self, x, y): self.x = x self.y = y def __add__(self, other): """v1 + v2""" if isinstance(other, Vector): return Vector(self.x + other.x, self.y + other.y) return NotImplemented def __radd__(self, other): """other + v(当 other 不支持 + 操作时)""" return self + other def __mul__(self, scalar): """v * scalar""" if isinstance(scalar, (int, float)): return Vector(self.x * scalar, self.y * scalar) return NotImplemented def __rmul__(self, scalar): """scalar * v""" return self * scalar def __abs__(self): """abs(v) - 向量长度""" return (self.x ** 2 + self.y ** 2) ** 0.5 def __bool__(self): """bool(v) - 非零向量为 True""" return bool(abs(self)) def __repr__(self): return f"Vector({self.x}, {self.y})" v1 = Vector(3, 4) v2 = Vector(1, 2) print(v1 + v2) # Vector(4, 6) print(v1 * 2) # Vector(6, 8) print(3 * v1) # Vector(9, 12) print(abs(v1)) # 5.0 print(bool(v1)) # True print(bool(Vector(0, 0))) # False ``` ## 最佳实践总结 ::::{grid} 1 :gutter: 2 :::{grid-item-card} 命名约定 - `_name`: 内部使用,不应该从外部访问 - `__name`: 名称改编,防止子类覆盖 - `__name__`: 系统定义的特殊方法 ::: :::{grid-item-card} 返回 NotImplemented 当操作不支持时,返回 `NotImplemented` 而不是抛出异常。这允许 Python 尝试反向操作。 ```python def __eq__(self, other): if not isinstance(other, MyClass): return NotImplemented # ✅ # raise TypeError(...) # ❌ ``` ::: :::{grid-item-card} 遵循协议 实现相关方法时,确保行为一致: - `__eq__` 和 `__hash__` 应该一起实现 - `__lt__` 和 `__eq__` 可用 `@total_ordering` 补全 ::: ::::