选择单独一列,通过这种方式获取一个Series,等价于df.A
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("------------获取一列数据,包括index的值------------")
print(df['A'])
输出结果:
A B C D
2013-01-01 -1.254715 0.208110 -0.961949 -0.370242
2013-01-02 0.080537 0.262347 1.113138 -2.134468
2013-01-03 1.021890 -0.701764 1.200650 2.187048
2013-01-04 -1.061324 -2.099654 -0.036560 1.200454
2013-01-05 -1.517845 0.198959 1.660779 -0.749684
2013-01-06 0.789730 -0.066244 0.622702 -0.876385
---------------获取一列数据--------------
2013-01-01 -1.254715
2013-01-02 0.080537
2013-01-03 1.021890
2013-01-04 -1.061324
2013-01-05 -1.517845
2013-01-06 0.789730
Freq: D, Name: A, dtype: float64
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("------------通过[]获取一些行的数据------------")
print(df[0:3])
输出结果:
A B C D
2013-01-01 -0.472598 0.634512 -1.154252 0.650860
2013-01-02 -1.020034 2.215081 0.152721 -1.132280
2013-01-03 -0.976544 -0.292899 0.022719 -0.468406
2013-01-04 -1.367970 -0.087343 -0.590131 -0.279336
2013-01-05 -1.295620 -1.630914 -1.304561 -1.334444
2013-01-06 -0.790731 1.043622 -0.772808 0.564674
------------通过[]获取一些行的数据------------
A B C D
2013-01-01 -0.472598 0.634512 -1.154252 0.650860
2013-01-02 -1.020034 2.215081 0.152721 -1.132280
2013-01-03 -0.976544 -0.292899 0.022719 -0.468406
再如:
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("------------通过[]获取一些行的数据------------")
print(df['20130102':'20130104'] )
输出结果为:
A B C D
2013-01-01 -1.253099 0.378643 -0.119608 1.971239
2013-01-02 -0.425589 -0.689410 -1.373875 -1.027356
2013-01-03 -1.302570 -0.182313 1.564062 0.059378
2013-01-04 0.637397 0.330284 0.607045 -0.928848
2013-01-05 0.106358 1.431572 0.789533 1.483123
2013-01-06 2.089771 0.823074 0.362509 -0.206203
------------通过[]获取一些行的数据------------
A B C D
2013-01-02 -0.425589 -0.689410 -1.373875 -1.027356
2013-01-03 -1.302570 -0.182313 1.564062 0.059378
2013-01-04 0.637397 0.330284 0.607045 -0.928848
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("------------Selected by label------------")
print(df.loc[dates[0]])
输出结果为:
A B C D
2013-01-01 1.280903 0.512179 -0.231190 0.036669
2013-01-02 -0.540552 -0.878910 0.918104 1.878132
2013-01-03 -1.106968 -0.176439 -1.247349 0.994883
2013-01-04 0.260098 -0.885501 0.346970 2.435448
2013-01-05 1.110520 1.756852 -2.025549 0.566382
2013-01-06 1.405610 -1.663422 -0.071328 -0.778265
------------Selected by label------------
A 1.280903
B 0.512179
C -0.231190
D 0.036669
Name: 2013-01-01 00:00:00, dtype: float64
选择多行多列的值:
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("----------Selecting on a multi-axis by label:--------------")
print(df.loc[:,['A','B']])
输出结果为:
A B C D
2013-01-01 1.595626 -0.564944 -0.668520 0.809201
2013-01-02 0.151399 0.647508 -1.653127 -1.161560
2013-01-03 -0.530367 0.024272 0.799212 -1.079254
2013-01-04 1.060353 0.047422 1.886798 0.254401
2013-01-05 0.064728 -1.073669 -1.503000 -1.328167
2013-01-06 2.126969 -0.535444 0.078469 1.392002
----------Selecting on a multi-axis by label:--------------
A B
2013-01-01 1.595626 -0.564944
2013-01-02 0.151399 0.647508
2013-01-03 -0.530367 0.024272
2013-01-04 1.060353 0.047422
2013-01-05 0.064728 -1.073669
2013-01-06 2.126969 -0.535444
再如:
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("----------Showing label slicing, both endpoints are included:--------------")
print(df.loc['20130102':'20130104', ['A', 'B']] )
print("----------Reduction in the dimensions of the returned object:--------------")
print(df.loc['20130102', ['A', 'B']])
print("----------For getting a scalar value:--------------")
print(df.loc[dates[0],'A'])
print("----------For getting fast access to a scalar (equivalent to the prior method):--------------")
print(df.at[dates[0],'A'])
输出结果为:
A B C D
2013-01-01 0.417995 -0.026418 -1.027092 -0.042631
2013-01-02 0.184714 1.289753 2.064584 -2.566320
2013-01-03 0.155585 -0.043835 -0.204965 -0.502541
2013-01-04 -1.698213 0.914601 -0.576331 -0.722310
2013-01-05 0.023068 2.280261 2.014296 -1.111354
2013-01-06 0.456681 0.202428 0.612209 -0.525896
----------Showing label slicing, both endpoints are included:--------------
A B
2013-01-02 0.184714 1.289753
2013-01-03 0.155585 -0.043835
2013-01-04 -1.698213 0.914601
----------Reduction in the dimensions of the returned object:--------------
A 0.184714
B 1.289753
Name: 2013-01-02 00:00:00, dtype: float64
----------For getting a scalar value:--------------
0.4179945574941799
----------For getting fast access to a scalar (equivalent to the prior method):--------------
0.4179945574941799
import numpy as np
import pandas as pd
# 通过设置开始时间,并设置间隔了多少月
dates = pd.date_range('20130101',periods=6)
# 随机生成一个6行4列的值
# print(np.random.randn(6,4))
# 设置dates为行,ABCD为列的标题值,np.random.randn(6, 4)为行和列中的值
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
print("----------Select via the position of the passed integers:----------")
print(df.iloc[3])
print("----------By integer slices, acting similar to numpy/python:----------")
print(df.iloc[3:5,0:2])
print("----------By lists of integer position locations, similar to the numpy/python style:----------")
print(df.iloc[[1,2,4],[0,2]])
print("----------For slicing rows explicitly:----------")
print(df.iloc[1:3,:])
print("----------For getting a value explicitly:----------")
print(df.iloc[1,1])
print("----------For getting fast access to a scalar (equivalent to the prior method)::----------")
print(df.iat[1, 1])
输出结果为:
A B C D
2013-01-01 0.156959 -0.026162 -0.687972 -0.631711
2013-01-02 -0.331658 0.069760 0.820701 1.330417
2013-01-03 -0.842929 0.240588 1.073343 1.231400
2013-01-04 -0.433373 1.680165 -1.624269 -0.568833
2013-01-05 -0.023643 0.766268 0.210271 -0.107263
2013-01-06 0.856357 0.932945 -1.367324 0.766313
----------Select via the position of the passed integers:----------
A -0.433373
B 1.680165
C -1.624269
D -0.568833
Name: 2013-01-04 00:00:00, dtype: float64
----------By integer slices, acting similar to numpy/python:----------
A B
2013-01-04 -0.433373 1.680165
2013-01-05 -0.023643 0.766268
----------By lists of integer position locations, similar to the numpy/python style:----------
A C
2013-01-02 -0.331658 0.820701
2013-01-03 -0.842929 1.073343
2013-01-05 -0.023643 0.210271
----------For slicing rows explicitly:----------
A B C D
2013-01-02 -0.331658 0.069760 0.820701 1.330417
2013-01-03 -0.842929 0.240588 1.073343 1.231400
----------For getting a value explicitly:----------
0.06976023059625595
----------For getting fast access to a scalar (equivalent to the prior method)::----------
0.06976023059625595