生成器与迭代器

迭代器协议

实现 __iter____next__ 的对象即为迭代器。

class Counter:
    def __init__(self, stop):
        self.current = 0
        self.stop = stop
 
    def __iter__(self):
        return self
 
    def __next__(self):
        if self.current >= self.stop:
            raise StopIteration
        self.current += 1
        return self.current
 
for n in Counter(3):
    print(n)   # 1 2 3

iter()next() 是内置函数,分别调用对象的 __iter____next__


生成器函数

包含 yield 的函数返回生成器对象,惰性求值,节省内存。

def countdown(n):
    while n > 0:
        yield n
        n -= 1
 
gen = countdown(3)
next(gen)   # 3
next(gen)   # 2
next(gen)   # 1
next(gen)   # StopIteration
 
# 直接迭代
for n in countdown(5):
    print(n)

send() 与双向通信

def accumulator():
    total = 0
    while True:
        value = yield total   # yield 既输出又接收
        if value is None:
            break
        total += value
 
gen = accumulator()
next(gen)          # 启动(推进到第一个 yield)
gen.send(10)       # 10
gen.send(20)       # 30
gen.send(5)        # 35

生成器表达式

类似列表推导式,但用圆括号,惰性求值。

# 列表推导式 — 立即求值,占用内存
squares_list = [x**2 for x in range(1_000_000)]
 
# 生成器表达式 — 惰性求值
squares_gen = (x**2 for x in range(1_000_000))
 
# 作为函数参数时可省略括号
total = sum(x**2 for x in range(100))

yield from

委托子生成器,简化嵌套生成器。

def flatten(nested):
    for item in nested:
        if isinstance(item, list):
            yield from flatten(item)   # 递归委托
        else:
            yield item
 
list(flatten([1, [2, [3, 4]], 5]))   # [1, 2, 3, 4, 5]
 
# 合并多个可迭代对象
def chain(*iterables):
    for it in iterables:
        yield from it
 
list(chain([1, 2], [3, 4], [5]))   # [1, 2, 3, 4, 5]

itertools 常用函数

import itertools
 
# 无限迭代器
itertools.count(10, 2)          # 10, 12, 14, ...
itertools.cycle([1, 2, 3])      # 1, 2, 3, 1, 2, 3, ...
itertools.repeat("x", 3)        # 'x', 'x', 'x'
 
# 有限迭代器
list(itertools.islice(range(100), 5))            # [0, 1, 2, 3, 4]
list(itertools.chain([1,2], [3,4]))              # [1, 2, 3, 4]
list(itertools.compress("ABCD", [1,0,1,0]))     # ['A', 'C']
list(itertools.takewhile(lambda x: x<5, [1,3,5,7]))  # [1, 3]
list(itertools.dropwhile(lambda x: x<5, [1,3,5,7]))  # [5, 7]
 
# 组合
list(itertools.combinations("ABC", 2))          # [('A','B'), ('A','C'), ('B','C')]
list(itertools.permutations("AB", 2))           # [('A','B'), ('B','A')]
list(itertools.product([1,2], [3,4]))           # [(1,3),(1,4),(2,3),(2,4)]
 
# 分组(需先排序)
data = [("a",1),("a",2),("b",3)]
for key, group in itertools.groupby(data, key=lambda x: x[0]):
    print(key, list(group))

生成器 vs 列表

特性列表生成器
求值时机立即惰性
内存占用O(n)O(1)
可多次迭代❌(单次消费)
支持索引
适合场景需多次访问大数据流、管道
# 大文件逐行处理(不载入内存)
def read_large_file(path):
    with open(path, encoding="utf-8") as f:
        yield from f
 
for line in read_large_file("huge.log"):
    process(line)

相关链接