.sort_index()方法在指定轴上根据索引进行排序,默认升序
.sort_index(axis=0, ascending=True)
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])
b
Out[4]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
d 10 11 12 13 14
b 15 16 17 18 19
b.sort_index()
Out[5]:
0 1 2 3 4
a 5 6 7 8 9
b 15 16 17 18 19
c 0 1 2 3 4
d 10 11 12 13 14
b.sort_index(ascending=False)
Out[6]:
0 1 2 3 4
d 10 11 12 13 14
c 0 1 2 3 4
b 15 16 17 18 19
a 5 6 7 8 9
c =b.sort_index(axis=1,ascending=False)
c
Out[8]:
4 3 2 1 0
c 4 3 2 1 0
a 9 8 7 6 5
d 14 13 12 11 10
b 19 18 17 16 15
c = c.sort_index()
c
Out[10]:
4 3 2 1 0
a 9 8 7 6 5
b 19 18 17 16 15
c 4 3 2 1 0
d 14 13 12 11 10
.sort_values()方法在指定轴上根据数值进行排序,默认升序
Series.sort_values(axis=0,ascending=True)
DataFrame.sort_values(by,axis=0,ascending=True)
by:axis轴上的某个索引或索引列表
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])
b
Out[4]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
d 10 11 12 13 14
b 15 16 17 18 19
c = b.sort_values(2,ascending=False)
c
Out[6]:
0 1 2 3 4
b 15 16 17 18 19
d 10 11 12 13 14
a 5 6 7 8 9
c 0 1 2 3 4
c = c.sort_values('a',axis=1,ascending=False)
c
Out[8]:
4 3 2 1 0
b 19 18 17 16 15
d 14 13 12 11 10
a 9 8 7 6 5
c 4 3 2 1 0
NaN统一放到排序末尾
import pandas as pd
import numpy as np
a = pd.DataFrame(np.arange(12).reshape(3,4),index=['a','b','c'])
a
Out[4]:
0 1 2 3
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','b','d'])
b
Out[6]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
b 10 11 12 13 14
d 15 16 17 18 19
c =a + b
c
Out[8]:
0 1 2 3 4
a 5.0 7.0 9.0 11.0 NaN
b 14.0 16.0 18.0 20.0 NaN
c 8.0 10.0 12.0 14.0 NaN
d NaN NaN NaN NaN NaN
c.sort_values(2,ascending = False)
Out[9]:
0 1 2 3 4
b 14.0 16.0 18.0 20.0 NaN
c 8.0 10.0 12.0 14.0 NaN
a 5.0 7.0 9.0 11.0 NaN
d NaN NaN NaN NaN NaN
c.sort_values(2,ascending(2,ascending = True))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-7fe7e363a39a> in <module>
----> 1 c.sort_values(2,ascending(2,ascending = True))
NameError: name 'ascending' is not defined
c.sort_values(2,ascending = True)
Out[11]:
0 1 2 3 4
a 5.0 7.0 9.0 11.0 NaN
c 8.0 10.0 12.0 14.0 NaN
b 14.0 16.0 18.0 20.0 NaN
d NaN NaN NaN NaN NaN
import pandas as pd
a = pd.Series([9,8,7,6],index=['a','b','c','d'])
a
Out[3]:
a 9
b 8
c 7
d 6
dtype: int64
a.describe()
Out[4]:
count 4.000000
mean 7.500000
std 1.290994
min 6.000000
25% 6.750000
50% 7.500000
75% 8.250000
max 9.000000
dtype: float64
type(a.describe())
Out[5]: pandas.core.series.Series
a.describa()['count'] #一定要注意规范书写代码
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-6-30160ee76872> in <module>
----> 1 a.describa()['count']
~\anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5273 return self[name]
-> 5274 return object.__getattribute__(self, name)
5275
5276 def __setattr__(self, name: str, value) -> None:
AttributeError: 'Series' object has no attribute 'describa'
a.describe()['count']
Out[7]: 4.0
a.describe()['max']
Out[8]: 9.0
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])
b
Out[4]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
d 10 11 12 13 14
b 15 16 17 18 19
b.cumsum()
Out[5]:
0 1 2 3 4
c 0 1 2 3 4
a 5 7 9 11 13
d 15 18 21 24 27
b 30 34 38 42 46
b.cumprod()
Out[6]:
0 1 2 3 4
c 0 1 2 3 4
a 0 6 14 24 36
d 0 66 168 312 504
b 0 1056 2856 5616 9576
b.cummin()
Out[7]:
0 1 2 3 4
c 0 1 2 3 4
a 0 1 2 3 4
d 0 1 2 3 4
b 0 1 2 3 4
b.cummax()
Out[8]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
d 10 11 12 13 14
b 15 16 17 18 19
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','b','d'])
b
Out[4]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
b 10 11 12 13 14
d 15 16 17 18 19
b.rolling(2).sum()
Out[5]:
0 1 2 3 4
c NaN NaN NaN NaN NaN
a 5.0 7.0 9.0 11.0 13.0
b 15.0 17.0 19.0 21.0 23.0
d 25.0 27.0 29.0 31.0 33.0
b.rolling(3).sum()
Out[6]:
0 1 2 3 4
c NaN NaN NaN NaN NaN
a NaN NaN NaN NaN NaN
b 15.0 18.0 21.0 24.0 27.0
d 30.0 33.0 36.0 39.0 42.0
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','b','d'])
b
Out[4]:
0 1 2 3 4
c 0 1 2 3 4
a 5 6 7 8 9
b 10 11 12 13 14
d 15 16 17 18 19
b.rolling(2).sum()
Out[5]:
0 1 2 3 4
c NaN NaN NaN NaN NaN
a 5.0 7.0 9.0 11.0 13.0
b 15.0 17.0 19.0 21.0 23.0
d 25.0 27.0 29.0 31.0 33.0
b.rolling(3).sum()
Out[6]:
0 1 2 3 4
c NaN NaN NaN NaN NaN
a NaN NaN NaN NaN NaN
b 15.0 18.0 21.0 24.0 27.0
d 30.0 33.0 36.0 39.0 42.0
import pandas as pd
hprice = pd.Series([3.04,22.93,12.75,22.6,12.33],index=['2008','2009','2010','2011','2012'])
m2 = pd.Series([8.18,18.38,9.13,7.82,6.69],index=['2008','2009','2010','2011','2012'])
hprice.corr(m2)
Out[10]: 0.5239439145220387