Pandas df.loc查询数据
本系列教程采用的数据为上证01号股票数据,感谢Tushare金融社区的数据分享
import pandas as pd
读取数据
df=pd.read_excel('000001.SZ.xlsx',encoding='gbk')
df.head()
|
ts_code |
trade_date |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
0 |
000001.SZ |
20160104 |
12.00 |
12.03 |
11.23 |
11.33 |
-0.66 |
-5.50 |
563497.87 |
660376.1531 |
11.99 |
1 |
000001.SZ |
20160105 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
755531.3537 |
11.33 |
2 |
000001.SZ |
20160106 |
11.42 |
11.56 |
11.39 |
11.53 |
0.13 |
1.14 |
515706.44 |
591698.5204 |
11.40 |
3 |
000001.SZ |
20160107 |
11.41 |
11.41 |
10.91 |
10.94 |
-0.59 |
-5.12 |
174761.10 |
194869.4935 |
11.53 |
4 |
000001.SZ |
20160108 |
11.21 |
11.29 |
10.90 |
11.12 |
0.18 |
1.65 |
747527.58 |
831334.5462 |
10.94 |
df.set_index('trade_date',inplace=True)
df.loc[:,'ts_code']=df['ts_code'].str.replace('.SZ','').astype('int32')
df.head()
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
trade_date |
|
|
|
|
|
|
|
|
|
|
20160104 |
1 |
12.00 |
12.03 |
11.23 |
11.33 |
-0.66 |
-5.50 |
563497.87 |
660376.1531 |
11.99 |
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
755531.3537 |
11.33 |
20160106 |
1 |
11.42 |
11.56 |
11.39 |
11.53 |
0.13 |
1.14 |
515706.44 |
591698.5204 |
11.40 |
20160107 |
1 |
11.41 |
11.41 |
10.91 |
10.94 |
-0.59 |
-5.12 |
174761.10 |
194869.4935 |
11.53 |
20160108 |
1 |
11.21 |
11.29 |
10.90 |
11.12 |
0.18 |
1.65 |
747527.58 |
831334.5462 |
10.94 |
使用label值查询数据
df.loc[20160108,'open']
11.21
df.loc[[20160107,20160108],'open']
trade_date
20160107 11.41
20160108 11.21
Name: open, dtype: float64
df.loc[[20160107,20160108],['open','low']]
|
open |
low |
trade_date |
|
|
20160107 |
11.41 |
10.91 |
20160108 |
11.21 |
10.90 |
使用数值区间进行范围查询
df.loc[20160106:20160108,'open']
trade_date
20160106 11.42
20160107 11.41
20160108 11.21
Name: open, dtype: float64
df.loc[20160107,'open':'low']
open 11.41
high 11.41
low 10.91
Name: 20160107, dtype: float64
df.loc[20160106:20160108,'open':'low']
|
open |
high |
low |
trade_date |
|
|
|
20160106 |
11.42 |
11.56 |
11.39 |
20160107 |
11.41 |
11.41 |
10.91 |
20160108 |
11.21 |
11.29 |
10.90 |
使用条件表达式查询
df['open']<11.5
trade_date
20160104 False
20160105 True
20160106 True
20160107 True
20160108 True
...
20201225 False
20201228 False
20201229 False
20201230 False
20201231 False
Name: open, Length: 1218, dtype: bool
df.loc[(df['open']<11.3) & (df['high']>11.5),:]
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
trade_date |
|
|
|
|
|
|
|
|
|
|
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
7.555314e+05 |
11.33 |
20170828 |
1 |
11.10 |
11.54 |
11.10 |
11.35 |
0.24 |
2.16 |
1603938.32 |
1.828380e+06 |
11.11 |
20170904 |
1 |
11.18 |
11.72 |
11.17 |
11.72 |
0.51 |
4.55 |
1352325.57 |
1.551875e+06 |
11.21 |
20170921 |
1 |
11.26 |
11.51 |
11.20 |
11.46 |
0.17 |
1.51 |
692407.93 |
7.886053e+05 |
11.29 |
20171027 |
1 |
11.19 |
11.56 |
11.18 |
11.56 |
0.38 |
3.40 |
1360086.42 |
1.548544e+06 |
11.18 |
20171107 |
1 |
11.27 |
12.09 |
11.25 |
11.92 |
0.64 |
5.67 |
2477163.26 |
2.914269e+06 |
11.28 |
自定义函数或lambda函数查询
df.loc[lambda df: (df['open']<11.3) & (df['high']>11.5),:]
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
trade_date |
|
|
|
|
|
|
|
|
|
|
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
7.555314e+05 |
11.33 |
20170828 |
1 |
11.10 |
11.54 |
11.10 |
11.35 |
0.24 |
2.16 |
1603938.32 |
1.828380e+06 |
11.11 |
20170904 |
1 |
11.18 |
11.72 |
11.17 |
11.72 |
0.51 |
4.55 |
1352325.57 |
1.551875e+06 |
11.21 |
20170921 |
1 |
11.26 |
11.51 |
11.20 |
11.46 |
0.17 |
1.51 |
692407.93 |
7.886053e+05 |
11.29 |
20171027 |
1 |
11.19 |
11.56 |
11.18 |
11.56 |
0.38 |
3.40 |
1360086.42 |
1.548544e+06 |
11.18 |
20171107 |
1 |
11.27 |
12.09 |
11.25 |
11.92 |
0.64 |
5.67 |
2477163.26 |
2.914269e+06 |
11.28 |
def query_my_data(df):
return (df.index>20160105) & (df['close']==11.54)
df.loc[query_my_data,:]
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
trade_date |
|
|
|
|
|
|
|
|
|
|
20170912 |
1 |
11.38 |
11.54 |
11.27 |
11.54 |
0.16 |
1.4100 |
846183.42 |
962032.172 |
11.38 |
20171031 |
1 |
11.55 |
11.58 |
11.39 |
11.54 |
-0.02 |
-0.1700 |
627491.38 |
721237.474 |
11.56 |
20171102 |
1 |
11.36 |
11.58 |
11.26 |
11.54 |
0.14 |
1.2300 |
604308.48 |
689229.873 |
11.40 |
20190222 |
1 |
11.35 |
11.57 |
11.15 |
11.54 |
0.18 |
1.5845 |
1184047.79 |
1343020.336 |
11.36 |
Pnada新增数据列
直接赋值的方法
df.loc[:,"today_change"]=df['close']-df['open']
df.head()
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
today_change |
trade_date |
|
|
|
|
|
|
|
|
|
|
|
20160104 |
1 |
12.00 |
12.03 |
11.23 |
11.33 |
-0.66 |
-5.50 |
563497.87 |
660376.1531 |
11.99 |
-0.67 |
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
755531.3537 |
11.33 |
0.13 |
20160106 |
1 |
11.42 |
11.56 |
11.39 |
11.53 |
0.13 |
1.14 |
515706.44 |
591698.5204 |
11.40 |
0.11 |
20160107 |
1 |
11.41 |
11.41 |
10.91 |
10.94 |
-0.59 |
-5.12 |
174761.10 |
194869.4935 |
11.53 |
-0.47 |
20160108 |
1 |
11.21 |
11.29 |
10.90 |
11.12 |
0.18 |
1.65 |
747527.58 |
831334.5462 |
10.94 |
-0.09 |
df.apply方法
对series进行操作,需指定axis
def get_change(x):
if x['today_change']>0:
return "up"
else:
return "down"
df.loc[:,"当日变动"]=df.apply(get_change,axis=1)
df['当日变动'].value_counts()
down 621
up 597
Name: 当日变动, dtype: int64
df.head()
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
today_change |
当日变动 |
trade_date |
|
|
|
|
|
|
|
|
|
|
|
|
20160104 |
1 |
12.00 |
12.03 |
11.23 |
11.33 |
-0.66 |
-5.50 |
563497.87 |
660376.1531 |
11.99 |
-0.67 |
down |
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
755531.3537 |
11.33 |
0.13 |
up |
20160106 |
1 |
11.42 |
11.56 |
11.39 |
11.53 |
0.13 |
1.14 |
515706.44 |
591698.5204 |
11.40 |
0.11 |
up |
20160107 |
1 |
11.41 |
11.41 |
10.91 |
10.94 |
-0.59 |
-5.12 |
174761.10 |
194869.4935 |
11.53 |
-0.47 |
down |
20160108 |
1 |
11.21 |
11.29 |
10.90 |
11.12 |
0.18 |
1.65 |
747527.58 |
831334.5462 |
10.94 |
-0.09 |
down |
条件选择赋值
df['pct_observe']=''
df.loc[df['pct_chg']>0,'pct_observe']='+'
df.loc[df['pct_chg']<=0,'pct_observe']='-'
df.head()
|
ts_code |
open |
high |
low |
close |
change |
pct_chg |
vol |
amount |
pre_close |
today_change |
当日变动 |
pct_observe |
trade_date |
|
|
|
|
|
|
|
|
|
|
|
|
|
20160104 |
1 |
12.00 |
12.03 |
11.23 |
11.33 |
-0.66 |
-5.50 |
563497.87 |
660376.1531 |
11.99 |
-0.67 |
down |
- |
20160105 |
1 |
11.27 |
11.57 |
11.15 |
11.40 |
0.07 |
0.62 |
663269.95 |
755531.3537 |
11.33 |
0.13 |
up |
+ |
20160106 |
1 |
11.42 |
11.56 |
11.39 |
11.53 |
0.13 |
1.14 |
515706.44 |
591698.5204 |
11.40 |
0.11 |
up |
+ |
20160107 |
1 |
11.41 |
11.41 |
10.91 |
10.94 |
-0.59 |
-5.12 |
174761.10 |
194869.4935 |
11.53 |
-0.47 |
down |
- |
20160108 |
1 |
11.21 |
11.29 |
10.90 |
11.12 |
0.18 |
1.65 |
747527.58 |
831334.5462 |
10.94 |
-0.09 |
down |
+ |
源代码和相应数据下载请点击 https://github.com/MiaoChenglin125/Data-Acquisition/tree/master/pandas%E6%93%8D%E4%BD%9C