标准库

os / sys

import os, sys
 
# 当前目录与路径
os.getcwd()
os.path.join("/tmp", "a", "b.txt")
os.path.exists("file.txt")
os.path.abspath(".")
os.path.dirname(__file__)
 
# 目录操作
os.makedirs("a/b/c", exist_ok=True)
os.listdir(".")
os.rename("old.txt", "new.txt")
os.remove("file.txt")
 
# 环境变量
os.environ.get("HOME", "/tmp")
os.environ["MY_VAR"] = "value"
 
# sys
sys.argv           # 命令行参数列表
sys.exit(0)        # 退出,0 为成功
sys.path           # 模块搜索路径
sys.version        # Python 版本字符串
print("err", file=sys.stderr)

pathlib

from pathlib import Path
 
p = Path("data") / "file.txt"
p.read_text(encoding="utf-8")
p.write_text("hello", encoding="utf-8")
p.exists()
p.is_file()
p.parent          # Path("data")
p.stem            # "file"
p.suffix          # ".txt"
p.resolve()       # 绝对路径
 
for f in Path(".").rglob("*.py"):
    print(f)

datetime

from datetime import datetime, date, timedelta, timezone
 
now = datetime.now()
today = date.today()
 
# 格式化
now.strftime("%Y-%m-%d %H:%M:%S")
datetime.strptime("2024-01-15", "%Y-%m-%d")
 
# 运算
deadline = now + timedelta(days=7)
delta = deadline - now
delta.days        # 7
delta.total_seconds()
 
# 时区(UTC)
utc_now = datetime.now(tz=timezone.utc)

collections

from collections import defaultdict, Counter, deque, namedtuple, OrderedDict
 
# defaultdict:键不存在时自动创建默认值
word_count = defaultdict(int)
for w in ["a", "b", "a"]:
    word_count[w] += 1    # {'a': 2, 'b': 1}
 
# Counter:计数器
c = Counter("mississippi")
c.most_common(3)           # [('i',4),('s',4),('p',2)]
c["i"]                     # 4
 
# deque:双端队列(O(1) 两端操作)
dq = deque([1, 2, 3], maxlen=5)
dq.appendleft(0)
dq.rotate(1)               # [3, 0, 1, 2]
 
# namedtuple:具名元组
Point = namedtuple("Point", ["x", "y"])
p = Point(1.0, 2.0)
p.x, p.y                   # 1.0, 2.0

itertools

import itertools
 
list(itertools.chain([1,2], [3,4]))           # [1,2,3,4]
list(itertools.islice(range(100), 5))         # [0,1,2,3,4]
list(itertools.combinations("ABC", 2))        # [('A','B'),('A','C'),('B','C')]
list(itertools.product([0,1], repeat=3))      # 8 个三元组
list(itertools.accumulate([1,2,3,4]))         # [1,3,6,10]
 
# 分组(数据需预先排序)
for key, grp in itertools.groupby([1,1,2,3,3], key=lambda x: x):
    print(key, list(grp))

functools

from functools import lru_cache, partial, reduce, wraps, cached_property
 
# LRU 缓存
@lru_cache(maxsize=256)
def fib(n):
    return n if n < 2 else fib(n-1) + fib(n-2)
 
fib.cache_info()    # CacheInfo(hits=..., misses=..., ...)
 
# 偏函数
def power(base, exp): return base ** exp
square = partial(power, exp=2)
square(5)           # 25
 
# reduce
reduce(lambda a, b: a * b, [1,2,3,4,5])   # 120
 
# cached_property(3.8+):计算一次后缓存
class Circle:
    def __init__(self, r): self.r = r
    @cached_property
    def area(self): return 3.14159 * self.r ** 2

re(正则表达式)

import re
 
# 搜索与匹配
re.match(r"\d+", "123abc")         # 从头匹配
re.search(r"\d+", "abc123")        # 任意位置
re.findall(r"\d+", "a1 b2 c3")    # ['1','2','3']
 
# 替换与分割
re.sub(r"\s+", " ", "a  b   c")   # 'a b c'
re.split(r"[,;]+", "a,b;;c")      # ['a','b','c']
 
# 预编译(复用时更高效)
pattern = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})")
m = pattern.match("2024-01-15")
m.group("year")    # '2024'
m.groupdict()      # {'year':'2024','month':'01','day':'15'}
 
# 常用修饰符
re.IGNORECASE      # 忽略大小写
re.MULTILINE       # ^ $ 匹配每行首尾
re.DOTALL          # . 匹配换行符

json / pickle

import json, pickle
 
# JSON(跨语言,仅支持基础类型)
data = {"name": "Alice", "scores": [95, 87]}
json.dumps(data, ensure_ascii=False, indent=2)
json.loads('{"x": 1}')
 
# Pickle(Python 专用,支持任意对象)
with open("data.pkl", "wb") as f:
    pickle.dump(data, f)
 
with open("data.pkl", "rb") as f:
    loaded = pickle.load(f)

threading / concurrent.futures

import threading
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 
# 基本线程
def task(n):
    print(f"Task {n}")
 
t = threading.Thread(target=task, args=(1,))
t.start()
t.join()
 
# 线程池(I/O 密集型)
with ThreadPoolExecutor(max_workers=4) as pool:
    results = list(pool.map(task, range(10)))
 
# 进程池(CPU 密集型)
with ProcessPoolExecutor() as pool:
    results = list(pool.map(heavy_compute, data))

subprocess

import subprocess
 
# 捕获输出
result = subprocess.run(
    ["git", "log", "--oneline", "-5"],
    capture_output=True,
    text=True,
    check=True           # 非零退出码抛 CalledProcessError
)
print(result.stdout)
 
# 管道组合
p = subprocess.Popen(["ls", "-la"], stdout=subprocess.PIPE)
out, _ = p.communicate()

logging

import logging
 
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    handlers=[
        logging.StreamHandler(),                   # 控制台
        logging.FileHandler("app.log"),            # 文件
    ]
)
 
logger = logging.getLogger(__name__)
logger.debug("调试信息")
logger.info("正常信息")
logger.warning("警告")
logger.error("错误")
logger.exception("含 traceback 的错误")   # 在 except 块内使用

argparse

import argparse
 
parser = argparse.ArgumentParser(description="示例 CLI 工具")
parser.add_argument("input",              help="输入文件")
parser.add_argument("-o", "--output",     default="out.txt", help="输出文件")
parser.add_argument("-v", "--verbose",    action="store_true")
parser.add_argument("-n", "--count",      type=int, default=10)
 
args = parser.parse_args()
print(args.input, args.output, args.verbose, args.count)

相关链接