阿里巴巴股票数据集 提取码: spyv
import numpy as np
from dateutil.parser import parse
# 指定打开的文件名
# 不需要的行需要skip掉
# 默认没有分隔符,所以需要指定delimiter
# 不加载全部的情况下需要指定加载哪些列usecols
# 希望把每一列加载到单独的数组中需要设置unpack=True,并指定对应的变量名
stock_info = np.loadtxt('./BABA_stock.csv', skiprows=1, unpack=True, usecols=(1, 2, 3, 4, 5), delimiter=',')
stock_info = stock_info[:, ::-1]
print(stock_info.shape)
print(stock_info)
close_info = stock_info[0]
open_info = stock_info[2]
# 上涨天数
rise_count = close_info[(close_info-open_info) > 0].size
print('上涨的天数: {}'.format(rise_count))
# 下跌的天数
fail_count = len(close_info[(close_info-open_info) < 0])
print('下跌的天数: {}'.format(fail_count))
# 上涨的概率
rise_percent = (close_info[(close_info-open_info) > 0]).size / stock_info.shape[1]
print('上涨的概率是: {:.2%}'.format(rise_percent))
# 下跌的概率
fail_percent = len(close_info[(close_info-open_info) < 0]) / stock_info.shape[1]
print('下跌的概率是: {:.2%}'.format(fail_percent))
(5, 755)
[[7.897000e+01 7.901000e+01 7.946000e+01 ... 1.849800e+02 1.889100e+02
1.830700e+02]
[1.169603e+07 7.761672e+06 1.472746e+07 ... 8.848863e+06 1.256109e+07
1.461019e+07]
[7.945000e+01 7.852000e+01 7.920000e+01 ... 1.851500e+02 1.877100e+02
1.880600e+02]
[7.975000e+01 7.912000e+01 8.048000e+01 ... 1.860600e+02 1.897900e+02
1.881700e+02]
[7.846000e+01 7.771000e+01 7.847000e+01 ... 1.837500e+02 1.871400e+02
1.825600e+02]]
上涨的天数: 358
下跌的天数: 395
上涨的概率是: 47.42%
下跌的概率是: 52.32%
2016年4月15日到2019年4月15日这三年一共有755个交易日
上涨的天数是358天
下跌的天数是395天
上涨的概率是47.42%
下跌的概率是52.32%
# 日线转换成周线
# 什么是周线
high_info = stock_info[3]
low_info = stock_info[4]
# loadtxt方法有一个参数converters参数,可以利用自定义的函数把string做转换
from dateutil.parser import parse
def convert_date(d):
return parse(d).weekday()
stock_info = np.loadtxt('./BABA_stock.csv', delimiter=',', usecols=(0, 1, 3, 4, 5), skiprows=1, dtype='S', converters={0: convert_date})
# print(stock_info)
print(stock_info.shape)
# 倒序排列
stock_info = stock_info[::-1, :].astype('f8')
# 需要按照周分组
# 先找到星期一的数据的索引
week_split = np.where(stock_info[:, 0] == 0)[0]
# week_split
# 按照周一去分组,split返回给定索引的分组
# 可以指定任意间隔的索引,所以split以一个list的形式返回
week_infos_temp = np.split(stock_info, week_split)
# print(type(week_infos_temp))
# week_infos_temp
# 为了简单起见,我们这里只使用一周数据有五天的
week_info = [x for x in week_infos_temp if len(x) == 5]
# week_info
# 每个星期的数据都是一样的了, 我们把它转成ndarray
w = np.array(week_info)
print(w.shape)
print(w[:3])
week_close = w[:, -1, 1]
week_open = w[:, 0, 2]
week_high = w[:, :, 3].max(axis=1)
week_low = w[:, :, 4].min(axis=1)
w_info = np.array([week_close, week_open, week_high, week_low])
# 一周的数据放一行,可以直接用转置矩阵
print(w_info.T[:5])
# 把数据保存到文件
np.savetxt('./week_info_baba.csv', w_info.T, header='close, open, high, low', delimiter=',', fmt='%.2f')
(755, 5)
(114, 5, 5)
[[[ 0. 79.01 78.52 79.12 77.71 ]
[ 1. 79.46 79.2 80.48 78.47 ]
[ 2. 81.21 79. 81.735 78.99 ]
[ 3. 80.78 81.08 81.78 80.2 ]
[ 4. 79.89 80.12 85.89 79.155 ]]
[[ 0. 78.84 79.8 79.84 78.68 ]
[ 1. 78.61 78.85 79.56 77.8001]
[ 2. 77.65 78.3 78.62 76.57 ]
[ 3. 76.4 77.65 77.9 76.09 ]
[ 4. 76.94 76.81 77.28 75.66 ]]
[[ 0. 76.61 76.89 77. 75.94 ]
[ 1. 75.91 75.96 76.27 75.425 ]
[ 2. 75.82 75.64 75.91 75.01 ]
[ 3. 78.83 79.52 79.94 78.1 ]
[ 4. 79.2 78.35 79.72 78.25 ]]]
[[79.89 78.52 85.89 77.71 ]
[76.94 79.8 79.84 75.66 ]
[79.2 76.89 79.94 75.01 ]
[77.16 78.94 80.485 76.97 ]
[78.79 77.86 80.2 77.59 ]]
# 加载数据,把date这一列设置为索引,简单起见,只用收盘价进行分析
import numpy as np
import pandas as pd
df = pd.read_csv('./BABA_stock.csv', index_col='date', usecols=[0, 1])
# 先查看下数据
print(df.head())
# 将索引转换成datetime形式
df.index = pd.DatetimeIndex(df.index.str.strip("'"))
print(df.index)
# 数据中最近的日期排在前面,按照日期重新排序
df.sort_index(inplace=True)
print(df.head())
print(df.describe())
close
date
'2019/04/15' 183.07
'2019/04/12' 188.91
'2019/04/11' 184.98
'2019/04/10' 186.19
'2019/04/09' 187.19
DatetimeIndex(['2019-04-15', '2019-04-12', '2019-04-11', '2019-04-10',
'2019-04-09', '2019-04-08', '2019-04-05', '2019-04-04',
'2019-04-03', '2019-04-02',
...
'2016-04-28', '2016-04-27', '2016-04-26', '2016-04-25',
'2016-04-22', '2016-04-21', '2016-04-20', '2016-04-19',
'2016-04-18', '2016-04-15'],
dtype='datetime64[ns]', name='date', length=755, freq=None)
close
date
2016-04-15 78.97
2016-04-18 79.01
2016-04-19 79.46
2016-04-20 81.21
2016-04-21 80.78
close
count 755.000000
mean 143.366954
std 40.345464
min 74.230000
25% 102.925000
50% 152.110000
75% 179.155000
max 210.860000
2016年4月15日到2019年4月15日这三年一共有755个交易日
收盘价最高210.86
收盘价最低是74.23
平均收盘股价是143.366954
# 策略一:股价超出10日均线买入,跌破十日均线卖出
# 先计算十日均线数据
ma10 = df.rolling(10).mean().dropna()
# ma10
# 买点
ma10_model = df['close'] - ma10['close'] > 0
# print(ma10_model)
# 第一个值是False,第二个值是True,在True的时候买入,需要自定义一个移动窗口处理函数
# 因为卖的时候还需要定义类似的函数,所以把折两个函数放在一起
# 可以在自定义函数中print一些信息,例如w值,以方便调试---这也是调试的一种方式
def get_deal_date(w, is_buy=True):
if is_buy == True:
return True if w[0] == False and w[1] == True else False
else:
return True if w[0] == True and w[1] == False else False
# raw=False没有的话会有警告信息
# 如果删除na值,会有缺失,所以这里用0填充,转换为bool值方便后面取值
se_buy = ma10_model.rolling(2).apply(get_deal_date, raw=False).fillna(0).astype('bool')
# print(se_buy)
# apply的args接收数组或者字典给自定义参数传参
se_sale = ma10_model.rolling(2).apply(get_deal_date, raw=False, args=[False]).fillna(0).astype('bool')
# print(se_sale)
# 具体的买卖点
buy_info = df[se_buy.values]
sale_info = df[se_sale.values]
# print(buy_info)
# print(sale_info)
# 买和卖的索引值不一样,不过数据都有63条,所以删除时间索引信息
no_index_buy_info = buy_info.reset_index(drop=True)
no_index_sale_info = sale_info.reset_index(drop=True)
# print(no_index_buy_info.head())
# print(no_index_sale_info.head())
# 每次交易盈利情况
profit = no_index_sale_info - no_index_buy_info
# print(profit)
print(profit.sum())
print(profit.describe())
close 36.07
dtype: float64
close
count 63.000000
mean 0.572540
std 6.419356
min -9.190000
25% -3.380000
50% -1.120000
75% 3.435000
max 17.840000
# 假设有一万美元,最终盈利是多少
all_money = 10000
remain = all_money
for i in range(len(no_index_buy_info)):
buy_count = remain / no_index_buy_info.iloc[i]
remain = buy_count * no_index_sale_info.iloc[i]
profit_money = remain - all_money
if i == len(no_index_buy_info) - 1:
print(profit_money)
print(remain)
# 如果加上每次交易金额的万分之三手续费
all_money = 10000
remain = all_money
fee = 0.0003
for i in range(len(no_index_buy_info)):
buy_count = remain / no_index_buy_info.iloc[i]
remain = buy_count * no_index_sale_info.iloc[i] * (1 - fee)
profit_money = remain - all_money
if i == len(no_index_buy_info) - 1:
print(profit_money)
print(remain)
close 3799.294014
dtype: float64
close 13799.294014
dtype: float64
close 3540.898129
dtype: float64
close 13540.898129
dtype: float64