- Item16:使用get而不是In和KeyError来处理缺失的字典键
# 比如有三明治店,需要添加新的面包:
counters = {
'pumpernickel': 2,
'sourdough': 1,
}
# 用if来处理,需要访问三次key(一次是in,一次是取数,一次是赋值)。
key = 'wheat'
if key in counters:
count = counters[key]
else:
count = 0
counters[key] = count + 1
# 利用try-except,可以减少一次in的访问。
try:
count = counters[key]
except KeyError:
count = 0
counters[key] = count + 1
# 或者使用dict内置的get方法,同样可以达到效果:
count = counters.get(key, 0)
counters[key] = count + 1
再次体会从in到try-except的过程:
if key not in counters:
counters[key] = 0
counters[key] += 1
if key in counters:
counters[key] += 1
else:
counters[key] = 1
try:
counters[key] += 1
except KeyError:
counters[key] = 1
如果是列表,
votes = {
'baguette': ['Bob', 'Alice'],
'ciabatta': ['Coco', 'Deb'],
}
key = 'brioche'
who = 'Elmer'
# in方式
if key in votes:
names = votes[key]
else:
votes[key] = names = [] # 利用三元赋值,以及指针的特性,直接对names操作就不用再次访问votes。
names.append(who)
print(votes)
>>>
{'baguette': ['Bob', 'Alice'],
'ciabatta': ['Coco', 'Deb'],
'brioche': ['Elmer']}
# try-except方式
try:
names = votes[key]
except KeyError:
votes[key] = names = []
names.append(who)
# get方式
names = votes.get(key)
if names is None:
votes[key] = names = []
names.append(who)
# get+walrus方式(item10)
if (names := votes.get(key)) is None:
votes[key] = names = []
names.append(who)
也可以用dict的setdefault方法达到目的。但是setdefault的命名不够直观。它在无key的时候,set第二个参数。有key的时候,返回当前key对应的value。
names = votes.setdefault(key, [])
names.append(who)
不仅如此,这个默认的value,不会每次复制新的进去,而是浅拷贝。
data = {}
key = 'foo'
value = []
data.setdefault(key, value)
print('Before:', data)
value.append('hello')
print('After: ', data)
>>>
Before: {'foo': []}
After: {'foo': ['hello']}
如果回到最开始的例子,用setdefault来实现。其中,setdefault对缺失的key,需要做两次赋值操作,比较多余。
count = counters.setdefault(key, 0)
counters[key] = count + 1
替代的方式,可以使用下面的item17。
- Item17:用defaultdict而不是setdefault来处理内部状态的缺失项
比如现在有一个字典,记录去过的国家对应的城市。
visits = {
'Mexico': {'Tulum', 'Puerto Vallarta'},
'Japan': {'Hakone'},
}
visits.setdefault('France', set()).add('Arles') # Short
if (japan := visits.get('Japan')) is None: # Long
visits['Japan'] = japan = set()
japan.add('Kyoto')
print(visits)
>>>
{'Mexico': {'Tulum', 'Puerto Vallarta'},
'Japan': {'Kyoto', 'Hakone'},
'France': {'Arles'}}
可以尝试用类来封装这个复杂的过程。但是dict.setdefault方法的命名依然让人迷惑。
class Visits:
def __init__(self):
self.data = {}
def add(self, country, city):
city_set = self.data.setdefault(country, set())
city_set.add(city)
这时用collections的defaultdict来指定默认的value,可以使代码更加清晰可读。
from collections import defaultdict
class Visits:
def __init__(self):
self.data = defaultdict(set)
def add(self, country, city):
self.data[country].add(city)
visits = Visits()
visits.add('England', 'Bath')
visits.add('England', 'London')
print(visits.data)
>>>
defaultdict(, {'England': {'London', 'Bath'}})
- Item18:知道如何用missing构建基于键的默认值
尽管get和defaultdict可以减少函数调用,有时候setdefault和defaultdict都无法起效。
比如,现在有一个文件及句柄的一个字典。如果对应句柄有的话,直接读文件。没有的话,尝试打开文件,并且存句柄:
pictures = {}
path = 'profile_1234.png'
# 用walrus来做(open调用少)
if (handle := pictures.get(path)) is None:
try:
handle = open(path, 'a+b')
except OSError:
print(f'Failed to open path {path}')
raise
else:
pictures[path] = handle
handle.seek(0)
image_data = handle.read()
# 或者用setdefault来做。(每次都会调用open,开销大。)
try:
handle = pictures.setdefault(path, open(path, 'a+b'))
except OSError:
print(f'Failed to open path {path}')
raise
else:
handle.seek(0)
image_data = handle.read()
from collections import defaultdict
def open_picture(profile_path):
try:
return open(profile_path, 'a+b')
except OSError:
print(f'Failed to open path {profile_path}')
raise
pictures = defaultdict(open_picture)
handle = pictures[path]
handle.seek(0)
image_data = handle.read()
>>>
Traceback ...
TypeError: open_picture() missing 1 required positional
argument: 'profile_path'
# 问题就在defaultdict默认接收的函数,是无参的。幸运的是,可以继承dict,然后实现__missing__方法达到效果。
class Pictures(dict):
def __missing__(self, key):
value = open_picture(key)
self[key] = value
return value
pictures = Pictures()
handle = pictures[path]
handle.seek(0)
image_data = handle.read()
函数篇:
- Item19:当拆包多个返回值时,不要赋值给超过三个变量(返回一个class或者namedtuple实例)
def get_stats(numbers):
minimum = min(numbers)
maximum = max(numbers)
return minimum, maximum
lengths = [63, 73, 72, 60, 67, 66, 71, 61, 72, 70]
minimum, maximum = get_stats(lengths) # Two return values
print(f'Min: {minimum}, Max: {maximum}')
>>>
Min: 60, Max: 73
类似的赋值行为:
first, second = 1, 2
assert first == 1
assert second == 2
def my_function():
return 1, 2
first, second = my_function()
assert first == 1
assert second == 2
def get_avg_ratio(numbers):
average = sum(numbers) / len(numbers)
scaled = [x / average for x in numbers]
scaled.sort(reverse=True)
return scaled
longest, *middle, shortest = get_avg_ratio(lengths)
print(f'Longest: {longest:>4.0%}')
print(f'Shortest: {shortest:>4.0%}')
>>>
Longest: 108%
Shortest: 89%
语句过长,且容易产生问题,比如变量命名颠倒等。(尽量减少返回的变量)
def get_stats(numbers):
minimum = min(numbers)
maximum = max(numbers)
count = len(numbers)
average = sum(numbers) / count
sorted_numbers = sorted(numbers)
middle = count // 2
if count % 2 == 0:
lower = sorted_numbers[middle - 1]
upper = sorted_numbers[middle]
median = (lower + upper) / 2
else:
median = sorted_numbers[middle]
return minimum, maximum, average, median, count
minimum, maximum, average, median, count = get_stats(lengths)
print(f'Min: {minimum}, Max: {maximum}')
print(f'Average: {average}, Median: {median}, Count {count}')
>>>
Min: 60, Max: 73
Average: 67.5, Median: 68.5, Count 10
- Item20:抛出异常比返回None要好
假如现在要一个数除以另一个数:
def careful_divide(a, b):
try:
return a / b
except ZeroDivisionError:
return None
那么实际是根据是否为None来处理异常:
x, y = 1, 0
result = careful_divide(x, y)
if result is None:
print('Invalid inputs')
编程者此时可能会错误地以为是返回False来处理,那么此时当结果为0的时候,运行结果错误:
x, y = 0, 5
result = careful_divide(x, y)
if not result:
print('Invalid inputs') # This runs! But shouldn't
>>>
Invalid inputs
此时,可以拆成两部分返回,一部分是是否正常,另一部分是返回值:
def careful_divide(a, b):
try:
return True, a / b
except ZeroDivisionError:
return False, None
那么这样就可以用拆包的方式来进行:
success, result = careful_divide(x, y)
if not success:
print('Invalid inputs')
_, result = careful_divide(x, y)
if not result:
print('Invalid inputs')
但是用户容易又被None和0的问题困惑。
此时不要返回None,而是抛出异常:
def careful_divide(a, b):
try:
return a / b
except ZeroDivisionError as e:
raise ValueError('Invalid inputs')
x, y = 5, 2
try:
result = careful_divide(x, y)
except ValueError:
print('Invalid inputs')
else:
print('Result is %.1f' % result)
>>>
Result is 2.5
最好加上返回的类别,然后在文档中表明抛出什么异常。这样,用户就可以处理异常。整理如下:
def careful_divide(a: float, b: float) -> float:
"""Divides a by b.
Raises:
ValueError: When the inputs cannot be divided.
"""
try:
return a / b
except ZeroDivisionError as e:
raise ValueError('Invalid inputs')
- Item21:知道闭包是如何跟变量作用域交互的
当需要某个信息(2,3,5,7)的最优先级排序时,可以通过以下方式实现:
def sort_priority(values, group):
def helper(x):
if x in group:
return (0, x) # 先按照是否在组内,再按照组内大小排序。
return (1, x)
values.sort(key=helper)
numbers = [8, 3, 1, 2, 5, 4, 7, 6]
group = {5, 3, 2, 7}
sort_priority(numbers, group)
print(numbers)
>>>
[2, 3, 5, 7, 1, 4, 6, 8]
def sort_priority2(numbers, group):
found = False
def helper(x):
if x in group:
found = True # Seems simple
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
found = sort_priority2(numbers, group)
print('Found:', found)
print(numbers)
>>>
Found: False
[2, 3, 5, 7, 1, 4, 6, 8]
主要利用了闭包和作用域。此处的found其实是helper里面的found,而不是sort_priority2的found。
def sort_priority2(numbers, group):
found = False # Scope: 'sort_priority2'
def helper(x):
if x in group:
found = True # Scope: 'helper' -- Bad!
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
可以利用nonlocal关键字,注意:nonlocal关键字不会向上遍历到模块级的作用域。(global可以)
def sort_priority3(numbers, group):
found = False # Scope: 'sort_priority2'
def helper(x):
nonlocal found # Added
if x in group:
found = True
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
但是nonlocal可能会由于嵌套导致代码变得复杂和难以读懂,此时可以用类来包裹。
class Sorter:
def __init__(self, group):
self.group = group
self.found = False
def __call__(self, x):
if x in self.group:
self.found = True
return (0, x)
return (1, x)
sorter = Sorter(group)
numbers.sort(key=sorter)
assert sorter.found is True
- Item22:用变量位置参数减少视觉噪声
假设我想打印一段message还有一段数值(有则打印,没有就不打印)。
def log(message, values):
if not values:
print(message)
else:
values_str = ', '.join(str(x) for x in values)
print(f'{message}: {values_str}')
log('My numbers are', [1, 2])
log('Hi there', [])
>>>
My numbers are: 1, 2
Hi there
当没有数值的时候,需要传递空列表,比较繁琐。最好应该不传递数值。
此时可以使用*来处理:
def log(message, *values): # The only difference
if not values:
print(message)
else:
values_str = ', '.join(str(x) for x in values)
print(f'{message}: {values_str}')
log('My numbers are', 1, 2)
log('Hi there') # Much better
>>>
My numbers are: 1, 2
Hi there
还记得*表达式吗(Item-13)?它会转换成元组然后再传递给函数。
favorites = [7, 33, 99]
log('Favorite colors', *favorites)
>>>
Favorite colors: 7, 33, 99
所以对于*args这种情况,应该传递少的参数量。如果传递太大的generator会内存溢出。
def my_generator():
for i in range(10):
yield i
def my_func(*args):
print(args)
it = my_generator()
my_func(*it)
>>>
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
使用*args还有另一个问题是:尽量应该对应上参数和位置,如果对不上就会出错。(比较难以检查出来的bug)
def log(sequence, message, *values):
if not values:
print(f'{sequence} - {message}')
else:
values_str = ', '.join(str(x) for x in values)
print(f'{sequence} - {message}: {values_str}')
log(1, 'Favorites', 7, 33) # New with *args OK
log(1, 'Hi there') # New message only OK
log('Favorite numbers', 7, 33) # Old usage breaks
>>>
1 - Favorites: 7, 33
1 - Hi there
Favorite numbers - 7: 33
- Item23:用Keyword参数来提供可选的操作
可以按顺序传递函数的参数。
def remainder(number, divisor):
return number % divisor
assert remainder(20, 7) == 6
也可以以(部分)乱序地方式来指定keyword达到相同的功能
remainder(20, 7)
remainder(20, divisor=7)
remainder(number=20, divisor=7)
remainder(divisor=7, number=20)
每个参数只能指定一次,下面是指定两次导致的error
remainder(number=20, 7)
>>>
Traceback ...
SyntaxError: positional argument follows keyword argument
使用**操作符可以以字典的形式传递keywords和对应的values
my_kwargs = {
'number': 20,
'divisor': 7,
}
assert remainder(**my_kwargs) == 6
当然是可以混用的:
my_kwargs = {
'divisor': 7,
}
assert remainder(number=20, **my_kwargs) == 6
my_kwargs = {
'number': 20,
}
other_kwargs = {
'divisor': 7,
}
assert remainder(**my_kwargs, **other_kwargs) == 6
keywords的灵活性,可以比较明显地在参数赋值的时候,增加可读性。
def print_parameters(**kwargs):
for key, value in kwargs.items():
print(f'{key} = {value}')
print_parameters(alpha=1.5, beta=9, gamma=4)
>>>
alpha = 1.5
beta = 9
gamma = 4
假如以水桶流入水作为例子:
def flow_rate(weight_diff, time_diff):
return weight_diff / time_diff
weight_diff = 0.5
time_diff = 3
flow = flow_rate(weight_diff, time_diff)
print(f'{flow:.3} kg per second')
>>>
0.167 kg per second
此时需要知道某时间长度能有多重的水流,如下:
def flow_rate(weight_diff, time_diff, period):
return (weight_diff / time_diff) * period
引入了period,最简单的是一秒的输入:
def flow_rate(weight_diff, time_diff, period = 1):
return (weight_diff / time_diff) * period
此时,period变得可选了。
flow_per_second = flow_rate(weight_diff, time_diff)
flow_per_hour = flow_rate(weight_diff, time_diff,
period=3600)
如果想控制重量的单位,不是用kg作为单位的时候,可以在后面扩展新的参数名:
def flow_rate(weight_diff, time_diff, period=1, units_per_kg=1):
return ((weight_diff * units_per_kg) / time_diff) * period
这样做,是在不影响原有函数的操作下进行的扩展(因为默认的参数已经指定了相同的行为。)
pounds_per_hour = flow_rate(weight_diff, time_diff, period=3600, units_per_kg=2.2)
最好还是指定以参数名的方式来调用函数,这样显得比较可读和清晰。
- Item24:用None和Docstrings来指定动态的默认参数
当我们想要默认打印当前时间的日志时,可能会以如下程序操作:
from time import sleep
from datetime import datetime
def log(message, when=datetime.now()):
print(f'{when}: {message}')
log('Hi there!')
sleep(0.1)
log('Hello again!')
>>>
2019-07-06 14:06:15.120124: Hi there!
2019-07-06 14:06:15.120124: Hello again!
但是,并不像预期一样,每次调用都调用一次now(),而是最开始加载模块的时候只调用了一次。
而是应该,默认参数设置为None,然后加一段注释,并且在程序中加以控制。
def log(message, when=None):
"""Log a message with a timestamp.
Args:
message: Message to print.
when: datetime of when the message occurred.
Defaults to the present time.
"""
if when is None:
when = datetime.now()
print(f'{when}: {message}')
log('Hi there!')
sleep(0.1)
log('Hello again!')
>>>
2019-07-06 14:06:15.222419: Hi there!
2019-07-06 14:06:15.322555: Hello again!
同样地,下面的例子是类似的:
import json
def decode(data, default={}):
try:
return json.loads(data)
except ValueError:
return default
foo = decode('bad data')
foo['stuff'] = 5
bar = decode('also bad')
bar['meep'] = 1
print('Foo:', foo)
print('Bar:', bar)
>>>
Foo: {'stuff': 5, 'meep': 1}
Bar: {'stuff': 5, 'meep': 1}
default每次都是指定的那个{},所以在返回之后的操作时,都是同一个实例。
可以套一个Optional来指定类型注解可能为None或者datetime。
from typing import Optional
def log_typed(message: str,
when: Optional[datetime]=None) -> None:
"""Log a message with a timestamp.
Args:
message: Message to print.
when: datetime of when the message occurred.
Defaults to the present time.
"""
if when is None:
when = datetime.now()
print(f'{when}: {message}')
- Item25:用Keyword-Only和Positional-Only来加强表述清晰。
如果现在有一个实现安全除法的操作:
def safe_division(number, divisor,
ignore_overflow,
ignore_zero_division):
try:
return number / divisor
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
每次调用需要指定参数对应的数值,显得可读性较差。
result = safe_division(1.0, 10**500, True, False)
print(result)
>>>
0
result = safe_division(1.0, 0, False, True)
print(result)
>>>
inf
使用默认的参数,默认不开启选项,然后在使用到哪一个特性的时候,就开启哪个特性。
def safe_division(number, divisor,
ignore_overflow = False, # changed
ignore_zero_division = False): # changed
try:
return number / divisor
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
分别开启忽略溢出、忽略除数为0的情况,可读性有所提升。
result = safe_division_b(1.0, 10**500, ignore_overflow=True)
print(result)
result = safe_division_b(1.0, 0, ignore_zero_division=True)
print(result)
>>>
0
inf
但是,依然可以用这种顺序的参数进行传递:
assert safe_division_b(1.0, 10**500, True, False) == 0
可以多加一个号,来强制“”后的参数使用keyword参数名进行调用。
def safe_division_c(number, divisor, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
safe_division_c(1.0, 10**500, True, False)
>>>
Traceback ...
TypeError: safe_division_c() takes 2 positional arguments but
4 were given
可以看到,使用keyword来进行参数的命名依然是成功的。
result = safe_division_c(1.0, 0, ignore_zero_division=True)
assert result == float('inf')
try:
result = safe_division_c(1.0, 0)
except ZeroDivisionError:
pass # Expected
然而,乱序填入参数的问题还是没有解决:
assert safe_division_c(number=2, divisor=5) == 0.4
assert safe_division_c(divisor=5, number=2) == 0.4
assert safe_division_c(2, divisor=5) == 0.4
同时,当用户依赖于变量名的时候,如果修改了函数的变量名,而不是依赖于位置的时候,
将会出错:
def safe_division_c(numerator, denominator, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
...
safe_division_c(number=2, divisor=5)
>>>
Traceback ...
TypeError: safe_division_c() got an unexpected keyword
argument 'number'
python3.8引入了“/”号特性。可以只指定“/”前的参数为使用位置进行填写的参数。
def safe_division_d(numerator, denominator, /, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
...
assert safe_division_d(2, 5) == 0.4 # 正常
safe_division_d(numerator=2, denominator=5) # 异常
>>>
Traceback ...
TypeError: safe_division_d() got some positional-only
arguments passed as keyword arguments: 'numerator, denominator'
"/"和"*"之间的参数,既可以直接按位置传递,也可以按keyword传递。
def safe_division_e(numerator, denominator, /,
ndigits=10, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
try:
fraction = numerator / denominator # Changed
return round(fraction, ndigits) # Changed
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
result = safe_division_e(22, 7)
print(result)
result = safe_division_e(22, 7, 5)
print(result)
result = safe_division_e(22, 7, ndigits=2)
print(result)
>>>
3.1428571429
3.14286
3.14
- Item26:用functools.wraps来定义函数的Decorators
装饰器很有用,比如打log,debug,register方法等都可以使用。
比如,现在想要打印调用函数的参数和结果:
def trace(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
print(f'{func.__name__}({args!r}, {kwargs!r}) '
f'-> {result!r}')
return result
return wrapper
可以来trace斐波那契函数:
@trace
def fibonacci(n):
"""Return the n-th Fibonacci number"""
if n in (0, 1):
return n
return (fibonacci(n - 2) + fibonacci(n - 1))
trace4的结果如下:
fibonacci(4)
>>>
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((1,), {}) -> 1
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((3,), {}) -> 2
fibonacci((4,), {}) -> 3
装饰器的命名并不叫fibonacci:
print(fibonacci)
>>>
.wrapper at 0x108955dc0>
trace函数返回在其主体中定义的wrapper,而不是原本的func。因此,使用help的时候,出来的不是fibonacci的注释,而是wrapper的注释:
help(fibonacci)
>>>
Help on function wrapper in module __main__:
wrapper(*args, **kwargs)
由于无法确定原始函数的位置,序列化也无法进行:
import pickle
pickle.dumps(fibonacci)
>>>
Traceback ...
AttributeError: Can't pickle local object 'trace..wrapper'
解决方案是:在wrapper前面加上@wraps(func)注解。
from functools import wraps
def trace(func):
@wraps(func)
def wrapper(*args, **kwargs):
...
return wrapper
@trace
def fibonacci(n):
...
这样,这两个功能都能正常运行:
help(fibonacci)
>>>
Help on function fibonacci in module __main__:
fibonacci(n)
Return the n-th Fibonacci number
print(pickle.dumps(fibonacci))
>>>
b'\x80\x04\x95\x1a\x00\x00\x00\x00\x00\x00\x00\x8c\x08__main_
_\x94\x8c\tfibonacci\x94\x93\x94.'
使用wraps可以保持一些标准属性,如:(name, module, annotations)。可以确保功能正确性。