#并集
list(set(a).union(set(b)))
#交集
list(set(a).intersection(set(b)))
#差集,a-b,去掉a中有而b中没有的
list(set(a).difference(set(b)))
panel = pd.Panel(np.random.rand(3,4,5), items=list('abc'), major_axis=list('hijk'), minor_axis=list('vwxyz'))
print(panel)
Dimensions: 3 (items) x 4 (major_axis) x 5 (minor_axis)
Items axis: a to c
Major_axis axis: h to k
Minor_axis axis: v to z
#取切片a
print(panel['a'])
v w x y z
h 0.404568 0.693127 0.880035 0.853612 0.719653
i 0.926706 0.899753 0.655383 0.383503 0.811610
j 0.230682 0.867128 0.899255 0.474246 0.857168
k 0.617902 0.529676 0.024016 0.383864 0.518112
#按major_axis切片取
print(panel.major_xs('i'))
a b c
v 0.926706 0.171814 0.557603
w 0.899753 0.175296 0.788721
x 0.655383 0.934858 0.975449
y 0.383503 0.797520 0.324475
z 0.811610 0.689040 0.465611
#如果不知道major_axis的名字,可按index来取,先用panel.major_axis(index)得到名字
print(panel.major_xs(panel.major_axis[1]))
a b c
v 0.926706 0.171814 0.557603
w 0.899753 0.175296 0.788721
x 0.655383 0.934858 0.975449
y 0.383503 0.797520 0.324475
z 0.811610 0.689040 0.465611
#minor_axis切片同上
Pandas 去重
df = pd.DataFrame({'a': [1,2,3,4,5,6], 'b':[1,2,1,6,1,8]}, index=list('hijklm'))
df
Out[239]:
a b
h 1 1
i 2 2
j 3 1
k 4 6
l 5 1
m 6 8
#可见df的b列有三个1,我们希望保留第一个即h行,如果想保留最后一个即l行keep='last'。如果要联合多列去重,例:subset=['a', 'b'],a和b列都相同才会去重
df.drop_duplicates(subset='b', keep='first')
Out[240]:
a b
h 1 1
i 2 2
k 4 6
m 6 8
给成绩分段打标签
In [50]: scores
Out[50]: array([61, 96, 69, 48, 0, 2, 17, 71, 54, 10])
In [51]: levels = [0, 60, 75, 85, 100]
In [52]: bs = np.digitize(scores, bins = levels)
In [53]: bs
Out[53]: array([2, 4, 2, 1, 1, 1, 1, 2, 1, 1])
In [54]: marks = np.array(['', '不及格', '及格', '良好', '优秀'])
In [55]: marks[bs]
Out[55]:
array(['及格', '优秀', '及格', '不及格', '不及格', '不及格', '不及格', '及格', '不及格', '不及格'],
dtype='
from collections import Counter #引入Counter
a = [29,36,57,12,79,43,23,56,28,11,14,15,16,37,24,35,17,24,33,15,39,46,52,13]
b = dict(Counter(a))
print ([key for key,value in b.items()if value > 1]) #只展示重复元素
print ({key:value for key,value in b.items()if value > 1}) #展现重复元素和重复次数