1 数据结构和算法
1.1 Unpacking a sequence into separate variable(解包,赋值)
>>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] >>> name, shares, price, (year, mon, day) = data
#可以用下划线来替代缺省值,节省变量名 >>> >>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] >>> _, shares, price, _ = data
1.2 Unpacking elements from iterables of arbitrary length (从任意长度解包)
*middle可以表示中间的所有域,middle是一个list
def drop_first_last(grades): first, *middle, last = grades return avg(middle)
>>> record = ('Dave', '[email protected]', '773-555-1212', '847-555-1212') >>> name, email, *phone_numbers = user_record >>> phone_numbers ['773-555-1212', '847-555-1212']
处理变长的数据结构和类型:
records = [ ('foo', 1, 2), ('bar', 'hello'), ('foo', 3, 4), ] def do_foo(x, y): print('foo', x, y) def do_bar(s): print('bar', s) for tag, *args in records: if tag == 'foo': do_foo(*args) elif tag == 'bar': do_bar(*args)
1.3 Keeping the last N items
deque 双向链表,可以指定最大长度。
>>> q = deque(maxlen=3) >>> q.append(1) >>> q.append(2) >>> q.append(3) >>> q deque([1, 2, 3], maxlen=3) >>> q.append(4) >>> q deque([2, 3, 4], maxlen=3) #最右边的第1个元素被抛出
只保留文件最后5行记录:
from collections import deque def search(lines, pattern, history=5): previous_lines = deque(maxlen=history) for line in lines: if pattern in line: yield line, previous_lines #yield返回一个迭代器 previous_lines.append(line) # Example use on a file if __name__ == '__main__': with open('somefile.txt') as f: for line, prevlines in search(f, 'python', 5): #遍历迭代器 for pline in prevlines: print(pline, end='') print(line, end='') print('-'*20)
1.4 Finding the largest or smallest N itesms (找到最大或最小的N个值)
heapq 模块含有nlargest(),nsmallest()
import heapq nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2] print(heapq.nlargest(3, nums)) # Prints [42, 37, 23] print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]
这两个函数可以接受参数key,用来指定复杂结构的排序方法
import heapq portfolio = [{'name': 'IBM', 'shares': 100, 'price': 91.1}, {'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}, {'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'ACME', 'shares': 75, 'price': 115.65} ] cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price']) print (cheap) #return [{'price': 16.35, 'shares': 45, 'name': 'YHOO'}, {'price': 21.09, 'shares': 200, 'name': 'FB'}, {'price': 31.75, 'shares': 35, 'name': 'HPQ'}]
1.6 defaultdict()
d = defaultdict(list) for key, value in pairs: d[key].append(value)
1.7 Keeping dictionaries in order
OrderedDict()
from collections import OrderedDict d = OrderedDict() d['foo'] = 1 d['bar'] = 2 d['spam'] = 3 d['grok'] = 4 for key in d: print(key, d[key]) # Outputs "foo 1", "bar 2", "spam 3", "grok 4"
1.8 Calculating with dictionaries
prices = { 'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.20, 'FB': 10.75 } min_price = min(zip(prices.values(), prices.keys())) # min_price is (10.75, 'FB') max_price = max(zip(prices.values(), prices.keys())) # max_price is (612.78, 'AAPL') #zip()只能生效一次,再次用需要重新构建
1.9 Finding commonalities in two dictionaries (找到两个字典的相同点)
a = { 'x' : 1, 'y' : 2, 'z' : 3 } b = { 'w' : 10, 'x' : 11, 'y' : 2 } # Find keys in common,similar as set a.keys() & b.keys() # { 'x', 'y' } # Find keys in a that are not in b a.keys() - b.keys() # { 'z' } # Find (key,value) pairs in common a.items() & b.items() # { ('y', 2) } #Filter or remove some keys # Make a new dictionary with certain keys removed c = {key:a[key] for key in a.keys() - {'z', 'w'}} # c is {'x': 1, 'y': 2}
1.10 Removing duplicates and maintaining order (去重且保持原来的顺序)
#如果值是可以排序的,例如list
def dedupe(items): seen = set() for item in items: if item not in seen: yield item seen.add(item)
>>> a = [1, 5, 2, 1, 9, 1, 5, 10] >>> list(dedupe(a)) [1, 5, 2, 9, 10]
#如果只是为了去重,则可以用set
>>> a [1, 5, 2, 1, 9, 1, 5, 10] >>> set(a) {1, 2, 10, 5, 9}
#一个有用的场景:读文件的时候过滤掉重复的行
with open(somefile,'r') as f: for line in dedupe(f):