• 找到所有的使用次数超过800的猫的名字
• 获取用次数最高的名字
#!/user/bin/env python
#-*-coding: utf-8-*-
#@Time : 2020/9/414:55
#@Author : GodSpeed
#@File : Pandas第二次作业01.py
#@Software : PyCharm
import pandas as pd
import numpy as np
'''
练习1
读取 catNames2.csv 文件,完成需求如下:
• 找到所有的使用次数超过800的猫的名字
• 获取用次数最高的名字
'''
# 读取 catNames2.csv 文件
df_catNames2 = pd.read_csv('catNames2.csv')
print(df_catNames2)
'''
Row_Labels Count_AnimalName
0 1 1
1 2 2
2 40804 1
3 90201 1
4 90203 1
... ... ...
16215 37916 1
16216 38282 1
16217 38583 1
16218 38948 1
16219 39743 1
[16220 rows x 2 columns]
'''
# 找到所有的使用次数超过800的猫的名字
# 采用布尔索引
Row_Labels_b_800 = df_catNames2[df_catNames2.Count_AnimalName>800]
#print(type(Row_Labels_b_800)) #
print(Row_Labels_b_800.Row_Labels.values)
# ['BELLA' 'CHARLIE' 'COCO' 'MAX' 'ROCKY']
# 获取用次数最高的名字
# 转换为Series
# 方法1 argma用获取
'''
Numpy 中提供 argmax 函数返回的是输入列表中最大值的位置
def argmax(a, axis=None, out=None)
a : array_like
Input array.
axis : int, optional
out : array, optional
如果提供,结果将插入此数组。它应该具有适当的形状和数据类型。
Returns
'''
#index = np.argmax(df_catNames2.iloc[:,1])
index = np.argmax(df_catNames2['Count_AnimalName'])
#argmax返回的是最大数的索引.argmax有一个参数axis,默认是0,表示第几维的最大值
#print(df_catNames2.iloc[index,0])
print(df_catNames2.loc[index,'Row_Labels'])
#BELLA
# 方法2
# 获取降序排列的第一个值
#print(type(df_catNames2)) #
#print(df_catNames2['Count_AnimalName'].sort_values(ascending=False))
print(df_catNames2.sort_values(by='Count_AnimalName',ascending=False))
'''
Row_Labels Count_AnimalName
1156 BELLA 1195
9140 MAX 1153
2660 CHARLIE 856
3251 COCO 852
12368 ROCKY 823
... ... ...
6884 J-LO 1
6888 JOANN 1
6890 JOAO 1
6891 JOAQUIN 1
16219 39743 1
[16220 rows x 2 columns]
BELLA
Process finished with exit code 0
'''
print(df_catNames2.sort_values(by='Count_AnimalName', ascending=False).iloc[0,0])
#BELLA
#!/user/bin/env python
#-*-coding: utf-8-*-
#@Time : 2020/9/417:16
#@Author : GodSpeed
#@File : Pandas第二次作业02.py
#@Software : PyCharm
import pandas as pd
import numpy as np
# 2.1 读取 五粮液2020.xlsx 数据,指定 索引为0列 为 行索引
df_wly = pd.read_excel('五粮液2020.xlsx',index_col=[0])
#print(df_wly)
'''
ts_code trade_date open high low close pre_close
0 000858.SZ 20200903 235.40 243.00 235.19 238.64 235.00
1 000858.SZ 20200902 235.20 239.78 233.80 235.00 236.24
2 000858.SZ 20200901 237.48 239.80 233.10 236.24 240.00
3 000858.SZ 20200831 242.00 246.70 240.00 240.00 240.50
4 000858.SZ 20200828 235.98 244.86 233.20 240.50 238.08
.. ... ... ... ... ... ... ...
159 000858.SZ 20200108 128.99 129.76 128.05 128.89 129.37
160 000858.SZ 20200107 129.50 131.07 129.00 129.37 129.20
161 000858.SZ 20200106 130.00 130.25 128.52 129.20 130.55
162 000858.SZ 20200103 131.60 132.07 129.61 130.55 132.08
163 000858.SZ 20200102 132.00 133.50 129.59 132.08 133.01
[164 rows x 7 columns]
'''
#2.2 查看 该数据的基本信息
print(df_wly.shape) # 查看数组形状,返回值为元组
#(164, 6)
print(df_wly.dtypes) # 查看列数据类型
'''
trade_date int64
open float64
high float64
low float64
close float64
pre_close float64
dtype: object
'''
print(df_wly.ndim) # 数据维度,返回为整数
#2
print(df_wly.index) # 行索引
'''
Index(['000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ',
'000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ',
...
'000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ',
'000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ', '000858.SZ'],
dtype='object', name='ts_code', length=164)
'''
print(df_wly.columns) # 列索引
'''
Index(['trade_date', 'open', 'high', 'low', 'close', 'pre_close'], dtype='object')
'''
#print(df_wly.values) # 值
#print(df_wly.head()) # 显示头部几行,默认前5行
#print(df_wly.tail()) # 显示末尾几行,默认后5行
print(df_wly.info()) # 相关信息概述
'''
Index: 164 entries, 000858.SZ to 000858.SZ
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 trade_date 164 non-null int64
1 open 164 non-null float64
2 high 164 non-null float64
3 low 164 non-null float64
4 close 164 non-null float64
5 pre_close 164 non-null float64
dtypes: float64(5), int64(1)
memory usage: 8.3+ KB
None
'''
#diff 现在对df进行一阶差分操作:df.diff()
# 这个操作实际等效于:df - df.shift(1)
# df.shift(1)是将df在纵轴上向下平移一次,即df.shitf(1)为:
#现在看一下:df - df.shift(1)是否与df.diff()相同,答案是肯定的。
#函数的完整参数为:DataFrame.diff(periods=1, axis=0)。
# 其中axis 表示纵轴还是横轴,periods表示平移的条目数。
print(df_wly.diff(periods=1))
'''
trade_date open high low close pre_close
ts_code
000858.SZ NaN NaN NaN NaN NaN NaN
000858.SZ -1.0 -0.20 -3.22 -1.39 -3.64 1.24
000858.SZ -1.0 2.28 0.02 -0.70 1.24 3.76
000858.SZ -70.0 4.52 6.90 6.90 3.76 0.50
000858.SZ -3.0 -6.02 -1.84 -6.80 0.50 -2.42
... ... ... ... ... ... ...
000858.SZ -1.0 -0.01 -1.44 -0.20 -1.88 0.48
000858.SZ -1.0 0.51 1.31 0.95 0.48 -0.17
000858.SZ -1.0 0.50 -0.82 -0.48 -0.17 1.35
000858.SZ -3.0 1.60 1.82 1.09 1.35 1.53
000858.SZ -1.0 0.40 1.43 -0.02 1.53 0.93
[164 rows x 6 columns]
'''
问题:
trade_date为时间格式字符串,但是经过diff后,却按照普通数据进行运算
解决方案:
trade_date可以运用函数映射,把trade_date数据转换为pd.to_datetime后,在计算,代码如下:
#函数映射,时间格式转换
f2 = lambda x:pd.to_datetime(x,format="%Y%m%d")
df_col = df_wly['trade_date']
df_wly['trade_date'] = df_col.apply(f2)
print(df_wly.diff())
'''
trade_date open high low close pre_close
ts_code
000858.SZ NaT NaN NaN NaN NaN NaN
000858.SZ -1 days -0.20 -3.22 -1.39 -3.64 1.24
000858.SZ -1 days 2.28 0.02 -0.70 1.24 3.76
000858.SZ -1 days 4.52 6.90 6.90 3.76 0.50
000858.SZ -3 days -6.02 -1.84 -6.80 0.50 -2.42
... ... ... ... ... ... ...
000858.SZ -1 days -0.01 -1.44 -0.20 -1.88 0.48
000858.SZ -1 days 0.51 1.31 0.95 0.48 -0.17
000858.SZ -1 days 0.50 -0.82 -0.48 -0.17 1.35
000858.SZ -3 days 1.60 1.82 1.09 1.35 1.53
000858.SZ -1 days 0.40 1.43 -0.02 1.53 0.93
[164 rows x 6 columns]
'''
#2.4 计算其 pre_close 的增长率
print(df_wly['pre_close'].pct_change())
#2.5 将 pre_close 的增长率添加至 wly_data 数据中
df_wly['pct_change'] = df_wly['pre_close'].pct_change()
print(df_wly)
'''
trade_date open high low close pre_close pct_change
ts_code
000858.SZ 2020-09-03 235.40 243.00 235.19 238.64 235.00 NaN
000858.SZ 2020-09-02 235.20 239.78 233.80 235.00 236.24 0.005277
000858.SZ 2020-09-01 237.48 239.80 233.10 236.24 240.00 0.015916
000858.SZ 2020-08-31 242.00 246.70 240.00 240.00 240.50 0.002083
000858.SZ 2020-08-28 235.98 244.86 233.20 240.50 238.08 -0.010062
... ... ... ... ... ... ... ...
000858.SZ 2020-01-08 128.99 129.76 128.05 128.89 129.37 0.003724
000858.SZ 2020-01-07 129.50 131.07 129.00 129.37 129.20 -0.001314
000858.SZ 2020-01-06 130.00 130.25 128.52 129.20 130.55 0.010449
000858.SZ 2020-01-03 131.60 132.07 129.61 130.55 132.08 0.011720
000858.SZ 2020-01-02 132.00 133.50 129.59 132.08 133.01 0.007041
[164 rows x 7 columns]
'''
# 2.6 将 pct_change 该列 呈现的 NaN 用0填充
df_wly['pct_change'].fillna(0,inplace=True)
print(df_wly)
'''
trade_date open high low close pre_close pct_change
ts_code
000858.SZ 2020-09-03 235.40 243.00 235.19 238.64 235.00 0.000000
000858.SZ 2020-09-02 235.20 239.78 233.80 235.00 236.24 0.005277
000858.SZ 2020-09-01 237.48 239.80 233.10 236.24 240.00 0.015916
000858.SZ 2020-08-31 242.00 246.70 240.00 240.00 240.50 0.002083
000858.SZ 2020-08-28 235.98 244.86 233.20 240.50 238.08 -0.010062
... ... ... ... ... ... ... ...
000858.SZ 2020-01-08 128.99 129.76 128.05 128.89 129.37 0.003724
000858.SZ 2020-01-07 129.50 131.07 129.00 129.37 129.20 -0.001314
000858.SZ 2020-01-06 130.00 130.25 128.52 129.20 130.55 0.010449
000858.SZ 2020-01-03 131.60 132.07 129.61 130.55 132.08 0.011720
000858.SZ 2020-01-02 132.00 133.50 129.59 132.08 133.01 0.007041
[164 rows x 7 columns]
'''
#2.7 查看 pre_close 与 pct_change 的相关性
print(df_wly['pre_close'].corr(df_wly['pct_change']))
#-0.02569885399397465
# 2.8 将 pct_change 这列乘以100 保留两位小数 成为百分比
f3 = lambda x:"%.02f"%(x*100)+r"%"
df_wly['pct_change'] = df_wly['pct_change'].apply(f3)
#print(df_wly['pct_change'])
print(df_wly)
'''
trade_date open high low close pre_close pct_change
ts_code
000858.SZ 2020-09-03 235.40 243.00 235.19 238.64 235.00 0.00%
000858.SZ 2020-09-02 235.20 239.78 233.80 235.00 236.24 0.53%
000858.SZ 2020-09-01 237.48 239.80 233.10 236.24 240.00 1.59%
000858.SZ 2020-08-31 242.00 246.70 240.00 240.00 240.50 0.21%
000858.SZ 2020-08-28 235.98 244.86 233.20 240.50 238.08 -1.01%
... ... ... ... ... ... ... ...
000858.SZ 2020-01-08 128.99 129.76 128.05 128.89 129.37 0.37%
000858.SZ 2020-01-07 129.50 131.07 129.00 129.37 129.20 -0.13%
000858.SZ 2020-01-06 130.00 130.25 128.52 129.20 130.55 1.04%
000858.SZ 2020-01-03 131.60 132.07 129.61 130.55 132.08 1.17%
000858.SZ 2020-01-02 132.00 133.50 129.59 132.08 133.01 0.70%
[164 rows x 7 columns]
Process finished with exit code 0
'''