Collections :collections 模块包含了内建类型之外的一些有用的工具,例如 counter,defaultdict,orderedDict,deque,以及nametuple ,其中counter,deque以及defaultdict是最常用 的类
1:计数器(counter)
如果你想统计一个单词在给家的序列中一共出现了多少次,诸如此类的操作就可以用到counter,来看看如何统计一个list中出的item次数:
from collections import Counter
l = ['cat','is','this','is','cat','is']
c = Counter(l)
print(c)
### 输出结果
Counter({'is': 3, 'cat': 2, 'this': 1})
若要统计这其中不同单词的个数,我们可以这么做
from collections import Counter l = ['cat','is','this','is','cat','is'] c = Counter(l) print(c) print((len(set(l)))) ### 输出结果 Counter({'is': 3, 'cat': 2, 'this': 1}) 3
counter是对字典类型的补充,具备字典的所有功能 然后加自己的功能
1:Counter 类方法,里面给出了详细的例子
class Counter(dict): '''Dict subclass for counting hashable items. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. >>> c = Counter('abcdeabcdabcaba') # count elements from a string >>> c.most_common(3) # three most common elements [('a', 5), ('b', 4), ('c', 3)] >>> sorted(c) # list all unique elements ['a', 'b', 'c', 'd', 'e'] >>> ''.join(sorted(c.elements())) # list elements with repetitions 'aaaaabbbbcccdde' >>> sum(c.values()) # total of all counts 15 >>> c['a'] # count of letter 'a' 5 >>> for elem in 'shazam': # update counts from an iterable ... c[elem] += 1 # by adding 1 to each element's count >>> c['a'] # now there are seven 'a' 7 >>> del c['b'] # remove all 'b' >>> c['b'] # now there are zero 'b' 0 >>> d = Counter('simsalabim') # make another counter >>> c.update(d) # add in the second counter >>> c['a'] # now there are nine 'a' 9 >>> c.clear() # empty the counter >>> c Counter() Note: If a count is set to zero or reduced to zero, it will remain in the counter until the entry is deleted or the counter is cleared: >>> c = Counter('aaabbc') >>> c['b'] -= 2 # reduce the count of 'b' by two >>> c.most_common() # 'b' is still in, but its count is zero [('a', 3), ('c', 1), ('b', 0)] ''' # References: # http://en.wikipedia.org/wiki/Multiset # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3
2:most_common -- :提取最常用的前多少
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abcdeabcdabcaba').most_common(3) [('a', 5), ('b', 4), ('c', 3)] '''
输出结果
l = ['cat','is','this','is','cat','is','name'] print(Counter(l).most_common(4)) print(Counter(l).most_common(3)) print(Counter(l).most_common(2)) print(Counter(l).most_common(1)) ### 输出结果 [('is', 3), ('cat', 2), ('this', 1), ('name', 1)] [('is', 3), ('cat', 2), ('this', 1)] [('is', 3), ('cat', 2)] [('is', 3)]
3:elements ---: 返回包含所有元素的迭代器,小于时忽略
def elements(self): '''Iterator over elements repeating each as many times as its count. >>> c = Counter('ABCABC') >>> sorted(c.elements()) ['A', 'A', 'B', 'B', 'C', 'C'] # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) >>> product = 1 >>> for factor in prime_factors.elements(): # loop over factors ... product *= factor # and multiply them >>> product 1836 Note, if an element's count has been set to zero or is a negative number, elements() will ignore it. ''' # Emulate Bag.do from Smalltalk and Multiset.begin from C++. return _chain.from_iterable(_starmap(_repeat, self.items())) # Override dict methods where necessary
4:未实现的方法
@classmethod def fromkeys(cls, iterable, v=None): # There is no equivalent method for counters because setting v=1 # means that no element can have a count greater than one. raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
5: update ---: 更新,添加新的内容
def update(*args, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. >>> c = Counter('which') >>> c.update('witch') # add elements from another iterable >>> d = Counter('watch') >>> c.update(d) # add elements from another counter >>> c['h'] # four 'h' in which, witch, and watch 4 ''' # The regular dict.update() operation makes no sense here because the # replace behavior results in the some of original untouched counts # being mixed-in with all of the other counts for a mismash that # doesn't have a straight-forward interpretation in most counting # contexts. Instead, we implement straight-addition. Both the inputs # and outputs are allowed to contain zero and negative counts. if not args: raise TypeError("descriptor 'update' of 'Counter' object " "needs an argument") self, *args = args if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) iterable = args[0] if args else None if iterable is not None: if isinstance(iterable, Mapping): if self: self_get = self.get for elem, count in iterable.items(): self[elem] = count + self_get(elem, 0) else: super(Counter, self).update(iterable) # fast path when counter is empty else: _count_elements(self, iterable) if kwds: self.update(kwds)
6:subtract -----:删除匹配的值
def subtract(*args, **kwds):
'''Like dict.update() but subtracts counts instead ofreplacing them.
Counts can be reduced belowzero. Both the inputs and outputs are
allowed to contain zero and negativecounts.
Source can be an iterable, adictionary, or another Counter instance.
### 使用方法
>>> c = Counter('which')
>>> c.subtract('witch') # subtract elements from anotheriterable
>>>c.subtract(Counter('watch')) #subtract elements from another counter
>>> c['h'] # 2 in which, minus 1in witch, minus 1 in watch
0
>>> c['w'] # 1 in which, minus 1in witch, minus 1 in watch
-1
'''
if not args:
raise TypeError("descriptor 'subtract' of 'Counter' object "
"needs anargument")
self, *args= args
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
iterable = args[0] if argselse None
if iterable is not None:
self_get = self.get
if isinstance(iterable, Mapping):
for elem,count in iterable.items():
self[elem] = self_get(elem, 0) - count
else:
for elemin iterable:
self[elem] = self_get(elem, 0) - 1
if kwds:
self.subtract(kwds)
7: copy ---: 复制
def copy(self): 'Return a shallow copy.' return self.__class__(self)
使用方法:
l= Counter(['cat','is',])
l1 = l.copy()
print(l1)
print(l)
### 输出结果
Counter({'cat': 1, 'is': 1})
Counter({'cat': 1, 'is': 1})
二:
有序字典(orderedDict)
对字典类型的补充,他实现了对字典对象中元素的排序。
1:OrderedDict 类方法
class OrderedDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys tovalues.
# The inherited dict provides__getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
# Big-O running times for all methodsare the same as regular dictionaries.
# The internal self.__map dict mapskeys to links in a doubly linked list.
# The circular doubly linked liststarts and ends with a sentinel element.
# The sentinel element never getsdeleted (this simplifies the algorithm).
# The sentinel is in self.__hardrootwith a weakref proxy in self.__root.
# The prev links are weakref proxies(to prevent circular references).
# Individual links are kept alive bythe hard reference in self.__map.
# Those hard references disappearwhen a key is deleted from an OrderedDict.
2:clear ----: 请空,返回空
def clear(self):
'od.clear() -> None. Remove all items from od.'
root = self.__root
root.prev = root.next= root
self.__map.clear()
dict.clear(self)
使用方法:
d= {'name':'zhang','age':24}
print(OrderedDict(d).clear())
### 输出结果
None
3: popitem ----: 删除只保留一组值并返回,last值为True时,默认是最后一组,当为False时,返回第一组,默认值为True
def popitem(self, last=True):
'''od.popitem() -> (k, v), return and remove a (key,value) pair.
Pairs are returned in LIFO order iflast is true or FIFO order if false.
'''
if not self:
raise KeyError('dictionary is empty')
root = self.__root
if last:
link = root.prev
link_prev = link.prev
link_prev.next = root
root.prev = link_prev
else:
link = root.next
link_next = link.next
root.next = link_next
link_next.prev = root
key = link.key
del self.__map[key]
value= dict.pop(self, key)
return key,value
使用方法:
d= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
print("default:",d.popitem())
print("------------------------")
print(d)
print("False:",d.popitem(last=False))
print("------------------------")
print(d)
print("True:",d.popitem(last=True))
print("------------------------")
print(d)
### 输出结果
default: ('天天学习', '好好向上')
------------------------
OrderedDict([('name', 'zhang'), ('age', 24), ('job', 'IT')])
False: ('name', 'zhang')
------------------------
OrderedDict([('age', 24), ('job', 'IT')])
True: ('job', 'IT')
------------------------
OrderedDict([('age', 24)])
4:move_to_end ---:移动指定的key值至原值的结尾,last 默认值为True,移动到末尾,当为False时,移动key数据对至原值的开头
def move_to_end(self, key, last=True):
'''Move an existing element to the end (or beginning iflast==False).
Raises KeyError if the element doesnot exist.
When last=True, acts like a fastversion of self[key]=self.pop(key).
'''
link= self.__map[key]
link_prev = link.prev
link_next = link.next
link_prev.next = link_next
link_next.prev = link_prev
root = self.__root
if last:
last = root.prev
link.prev= last
link.next = root
last.next= root.prev = link
else:
first = root.next
link.prev= root
link.next= first
root.next = first.prev = link
使用方法:
d= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d.move_to_end('job',last=False)
print("False_job:",d)
d = OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d.move_to_end('job')
print("======================================================================")
print("True_job:",d)
### 输出结果
False_job: OrderedDict([('job', 'IT'), ('name', 'zhang'), ('age', 24), ('天天学习', '好好向上')])
======================================================================
True_job: OrderedDict([('name', 'zhang'), ('age', 24), ('天天学习', '好好向上'), ('job', 'IT')])
5: pop ----: 删除,删除指定的key值并返回,如果key 不存在则报错
def pop(self, key, default=__marker):
'''od.pop(k[,d]) -> v, remove specified key and returnthe corresponding
value. If key is not found, d is returned if given,otherwise KeyError
is raised.
'''
if key in self:
result = self[key]
del self[key]
return result
if defaultis self.__marker:
raise KeyError(key)
return default
使用方法:
d= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d1 = d.pop('name')
print(d1)
### 输出结果
zhang
6:setdefault --: 添加 可选值default 默认为None
def setdefault(self, key, default=None):
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=dif k not in od'
if key in self:
return self[key]
self[key] = default
return default
使用方法:
d = OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d.setdefault('phone','133')
d.setdefault('addr')
print(d)
### 输出结果
OrderedDict([('name', 'zhang'), ('age', 24), ('job', 'IT'), ('天天学习', '好好向上'), ('phone', '133'), ('addr', None)])
7: copy ---: 复制
def copy(self):
'od.copy() -> a shallow copy of od'
return self.__class__(self)
使用方法:
d= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d1 = d.copy()
print(d1)
### 输出结果
OrderedDict([('name', 'zhang'), ('age', 24), ('job', 'IT'), ('天天学习', '好好向上')])
8:fromkeys ---: 生成新数据,如果未指定value,将默认为None(感觉为什么这个功能要加在OrderedDict里)
@classmethod
def fromkeys(cls, iterable, value=None):
'''OD.fromkeys(S[, v]) -> New ordered dictionarywith keys from S.
If not specified, the value defaultsto None.
'''
self = cls()
for key in iterable:
self[key] = value
return self
测试的没头没脑的(和d 没有关系了都)
d= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d1 = d.fromkeys(('name','SH')) ### 未指定 value值
print(d1)
print("==========================")
d1 = d.fromkeys(('name','li'),value='123') ### 指定 value值
print(d1)
### 输出结果
OrderedDict([('name', None), ('SH', None)])
==========================
OrderedDict([('name', '123'), ('li', '123')])
三:
默认字典(defaultdict)
对字典的类型的补充,默认给字典的值设置了一个类型,dict的setdefault()方法在key不存在时会建立一个默认值。与此相反,defaultdict会在初始化时指定默认值。
1:类方法:接收一个类型做为参数而不是一个元素
class defaultdict(dict):
"""
defaultdict(default_factory[, ...])--> dict with default factory
The default factory is called withoutarguments to produce
a new value when a key is notpresent, in __getitem__ only.
A defaultdict compares equal to adict with the same items.
All remaining arguments are treatedthe same as if they were
passed to the dict constructor,including keyword arguments.
"""
使用方法:
from collections import defaultdict
def test1():
return "namezhang"
l = defaultdict(test1,job="it")
print(l)
### 输出结果
defaultdict(<function test1 at 0x029C31E0>, {'job': 'it'})
2: copy ---: 复制
def copy(self): # real signatureunknown; restored from __doc__
""" D.copy() -> ashallow copy of D. """
pass
示例
有如下值集合[11,22,33,44,55,66,77,88,99,90...],将所有大于 66 的值保存至字典的第一个key中,将小于 66 的值保存至第二个key的值中。
即: {'k1': 大于66 , 'k2':小于66}
原生字典的实现方式
values = [11, 22,33,44,55,66,77,88,99,90]
my_dict = {}
for value in values:
if value>66:
ifmy_dict.has_key('k1'):
my_dict['k1'].append(value)
else:
my_dict['k1'] =[value]
else:
ifmy_dict.has_key('k2'):
my_dict['k2'].append(value)
else:
my_dict['k2'] =[value]
默认字典的实现方式
from collections importdefaultdict
values = [11, 22,33,44,55,66,77,88,99,90]
my_dict =defaultdict(list)
,
for value in values:
if value>66:
my_dict['k1'].append(value)
else:
my_dict['k2'].append(value)
四:可命令元组(namedtuple)
生成可以使用名字来访问元素内容的tuple子类,根据namedtuple可以创建一个包含tuple所有功能以及其他功能的类型
使用方法:
import collections
persion = collections.namedtuple('persion','nameage gender')
print(type(persion))
bob = persion(name= 'bob',age= 30,gender='male')
print(bob)
zhang = persion(name= 'zhang',age= 25,gender='male')
for persion in [bob,zhang]:
print("%s is %d years old %s" % persion)
### 输出 结果
<class 'type'>
persion(name='bob', age=30, gender='male')
bob is 30 years old male
zhang is 25 years old male
可调用的方法
class persion(builtins.tuple)
| persion(name, age, gender)
|
| Method resolution order:
| persion
| builtins.tuple
| builtins.object
|
| Methods defined here:
|
| __getnewargs__(self)
| Return self as a plain tuple. Used by copy and pickle.
|
| __getstate__(self)
| Exclude the OrderedDict from pickling
|
| __repr__(self)
| Return a nicely formatted representation string
|
| _asdict(self)
| Return a new OrderedDict which maps field names to their values.
|
| _replace(_self, **kwds)
| Return a new persion object replacing specified fields with new values
|
| ----------------------------------------------------------------------
| Classmethods defined here:
|
| _make(iterable, new=, len=) from builtins.type
| Make a new persion object from a sequence or iterable
|
| ----------------------------------------------------------------------
| Static methods defined here:
|
| __new__(_cls,name, age, gender)
| Create new instance of persion(name, age, gender)
|
| ----------------------------------------------------------------------
| Datadescriptors defined here:
|
| __dict__
| Anew OrderedDict mapping field names to their values
|
| age
| Alias for field number 1
|
| gender
| Alias for field number 2
|
| name
| Alias for field number 0
|
| ----------------------------------------------------------------------
| Dataand other attributes defined here:
|
| _fields = ('name', 'age', 'gender')
|
| _source = "from builtins import property as _property,tupl..._itemget...
|
| ----------------------------------------------------------------------
| Methodsinherited from builtins.tuple:
|
| __add__(self, value, /)
| Return self+value.
|
| __contains__(self, key, /)
| Return key in self.
|
| __eq__(self, value, /)
| Return self==value.
|
| __ge__(self, value, /)
| Return self>=value.
|
| __getattribute__(self, name, /)
| Return getattr(self, name).
|
| __getitem__(self, key, /)
| Return self[key].
|
| __gt__(self, value, /)
| Return self>value.
|
| __hash__(self, /)
| Return hash(self).
|
| __iter__(self, /)
| Implement iter(self).
|
| __le__(self, value, /)
| Return self<=value.
|
| __len__(self, /)
| Return len(self).
|
| __lt__(self, value, /)
| Return self integer -- return number of occurrences of value
|
| index(...)
| T.index(value, [start, [stop]]) -> integer -- return first index ofvalue.
| Raises ValueError if the value is notpresent.
五:双向队列(deque)
一个线程安全的双向队列,可以从两端添加删除元素,支持序列的常用操作。
1: 类方法
class deque(object):
"""
deque([iterable[, maxlen]]) -->deque object
Build an ordered collection withoptimized access from its endpoints.
"""
2: append ---: 添加:从右侧添加
def append(self, *args, **kwargs): # real signature unknown
""" Add an element tothe right side of the deque. """
pass
使用方法:
from collections import deque
l = deque(['name','age'])
l.append('job')
print(l)
### 输出结果
deque(['name', 'age', 'job'])
3: appendleft: ----: 左侧添加
def appendleft(self, *args, **kwargs): # real signature unknown
""" Add an element tothe left side of the deque. """
pass
使用方法:
from collections import deque
l = deque(['name','age'])
l.appendleft('job')
print(l)
### 输出结果
deque(['job', 'name', 'age'])
4: clear ---: 清空
def clear(self, *args, **kwargs): # real signature unknown
""" Remove allelements from the deque. """
pass
使用方法:
from collections import deque
l = deque(['name','age'])
l.clear()
print(l)
### 输出结果
deque([])
5: count ---:计算出现次数
def count(self, value): # real signature unknown; restored from __doc__
""" D.count(value)-> integer -- return number of occurrences of value """
return 0
使用方法:
from collections import deque
l = deque(['1','2','1','2','3'])
print(l.count('1'))
### 输出结果
2
6: extend ----:扩展,从右侧添加
def extend(self, *args, **kwargs): # real signature unknown
""" Extend the rightside of the deque with elements from the iterable """
pass
使用方法:
from collections import deque
l = deque(['1','2','1','2','3'])
l.extend('5')
print(l)
### 输出结果
deque(['1', '2', '1', '2', '3', '5'])
7: extendleft ---:左侧扩展添加,与extend相反方向
def extendleft(self, *args, **kwargs): # real signature unknown
""" Extend the leftside of the deque with elements from the iterable """
pass
8: pop --- :删除并返回最右侧的数据
def pop(self, *args, **kwargs): # real signature unknown
""" Remove and returnthe rightmost element. """
pass
使用方法:
l = deque(['1','2','1','2','3'])
print(l.pop())
### 输出结果
3
9: popleft ---: 删除并返回最左侧的数据,与pop相反
def popleft(self, *args, **kwargs): # real signature unknown
""" Remove and returnthe leftmost element. """
pass
10: remove ---: 删除第一次出现的值
def remove(self, value): # real signature unknown; restored from __doc__
""" D.remove(value) --remove first occurrence of value. """
pass
使用方法:
from collections import deque l = deque(['1','2','1','2','3']) l.remove('2') print(l) ### 输出结果 deque(['1', '1', '2', '3'])
11: reverse ---:取自己的反转值
def reverse(self): # real signatureunknown; restored from __doc__
""" D.reverse() --reverse *IN PLACE* """
pass
使用方法:
l = deque(['1','2','1','2','3']) l.reverse() print(l) ### 输出结果 deque(['3', '2', '1', '2', '1'])
12: rotate ---: 指定个数反转默认是1,也就是从队列的第右侧拿数据放到最左边
def rotate(self, *args, **kwargs): # real signature unknown
""" Rotate the deque nsteps to the right (default n=1). If nis negative, rotates left. """
pass
使用方法:
l = deque(['1','2','3','4','5']) l.rotate(3) print(l) l = deque(['1','2','3','4','5']) l.rotate() print(l) ### 输出结果 deque(['3', '4', '5', '1', '2']) deque(['5', '1', '2', '3', '4'])