Python cookbook-读书笔记01

1 数据结构和算法

1.1 Unpacking a sequence into separate variable(解包,赋值)

>>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]

>>> name, shares, price, (year, mon, day) = data
#可以用下划线来替代缺省值,节省变量名

>>> >>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]

>>> _, shares, price, _ = data

1.2 Unpacking elements from iterables of arbitrary length (从任意长度解包)

*middle可以表示中间的所有域,middle是一个list

def drop_first_last(grades):

    first, *middle, last = grades

    return avg(middle)
>>> record = ('Dave', '[email protected]', '773-555-1212', '847-555-1212')

>>> name, email, *phone_numbers = user_record

>>> phone_numbers

['773-555-1212', '847-555-1212']

 处理变长的数据结构和类型:

records = [

('foo', 1, 2),

('bar', 'hello'),

('foo', 3, 4),

]

def do_foo(x, y):

    print('foo', x, y)



def do_bar(s):

    print('bar', s)



for tag, *args in records:

    if tag == 'foo':

        do_foo(*args)

    elif tag == 'bar':

        do_bar(*args)

1.3 Keeping the last N items 

  deque 双向链表,可以指定最大长度。

>>> q = deque(maxlen=3)

>>> q.append(1)

>>> q.append(2)

>>> q.append(3)

>>> q

deque([1, 2, 3], maxlen=3)

>>> q.append(4)

>>> q

deque([2, 3, 4], maxlen=3)

#最右边的第1个元素被抛出

只保留文件最后5行记录:

from collections import deque



def search(lines, pattern, history=5):

    previous_lines = deque(maxlen=history)

    for line in lines:

        if pattern in line:

            yield line, previous_lines #yield返回一个迭代器

    previous_lines.append(line)

# Example use on a file

if __name__ == '__main__':

    with open('somefile.txt') as f:

        for line, prevlines in search(f, 'python', 5): #遍历迭代器

            for pline in prevlines:

                print(pline, end='')

            print(line, end='')

            print('-'*20)               

1.4 Finding the largest or smallest N itesms (找到最大或最小的N个值)

  heapq 模块含有nlargest(),nsmallest()

import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]

print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]

print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

这两个函数可以接受参数key,用来指定复杂结构的排序方法

import heapq



portfolio = [{'name': 'IBM', 'shares': 100, 'price': 91.1},

{'name': 'AAPL', 'shares': 50, 'price': 543.22},

{'name': 'FB', 'shares': 200, 'price': 21.09},

{'name': 'HPQ', 'shares': 35, 'price': 31.75},

{'name': 'YHOO', 'shares': 45, 'price': 16.35},

{'name': 'ACME', 'shares': 75, 'price': 115.65}

]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])

print (cheap)

#return

[{'price': 16.35, 'shares': 45, 'name': 'YHOO'}, {'price': 21.09, 'shares': 200, 'name': 'FB'}, {'price': 31.75, 'shares': 35, 'name': 'HPQ'}]

 1.6 defaultdict()

d = defaultdict(list)

for key, value in pairs:

    d[key].append(value)

1.7 Keeping dictionaries in order

 OrderedDict()

from collections import OrderedDict

d = OrderedDict()

d['foo'] = 1

d['bar'] = 2

d['spam'] = 3

d['grok'] = 4



for key in d:

print(key, d[key])

# Outputs "foo 1", "bar 2", "spam 3", "grok 4"

1.8 Calculating with dictionaries

prices = {

'ACME': 45.23,

'AAPL': 612.78,

'IBM': 205.55,

'HPQ': 37.20,

'FB': 10.75

}

min_price = min(zip(prices.values(), prices.keys()))

# min_price is (10.75, 'FB')

max_price = max(zip(prices.values(), prices.keys()))

# max_price is (612.78, 'AAPL')



#zip()只能生效一次,再次用需要重新构建

1.9 Finding commonalities in two dictionaries (找到两个字典的相同点)

a = {

'x' : 1,

'y' : 2,

'z' : 3

}

b = {

'w' : 10,

'x' : 11,

'y' : 2

}

# Find keys in common,similar as set

a.keys() & b.keys() # { 'x', 'y' }

# Find keys in a that are not in b

a.keys() - b.keys() # { 'z' }

# Find (key,value) pairs in common

a.items() & b.items() # { ('y', 2) }



#Filter or remove some keys

# Make a new dictionary with certain keys removed

c = {key:a[key] for key in a.keys() - {'z', 'w'}}

# c is {'x': 1, 'y': 2}

1.10 Removing duplicates and maintaining order (去重且保持原来的顺序)

 #如果值是可以排序的,例如list

def dedupe(items):

    seen = set()

    for item in items:

        if item not in seen:

            yield item

            seen.add(item)
>>> a = [1, 5, 2, 1, 9, 1, 5, 10]

>>> list(dedupe(a))

[1, 5, 2, 9, 10]

#如果只是为了去重,则可以用set

>>> a

[1, 5, 2, 1, 9, 1, 5, 10]

>>> set(a)

{1, 2, 10, 5, 9}

#一个有用的场景:读文件的时候过滤掉重复的行

with open(somefile,'r') as f:

     for line in dedupe(f):

 

你可能感兴趣的:(python)