python进阶—numpy案例(一)

首先总结一下numpy如何读取txt,excel,csv等格式的文档。

import numpy as np

arr2 = np.arange(12).reshape(3,4)

# 保存数据到txt文档(fmt:数据格式,delimiter:分隔符)
np.savetxt(r"C:\Users\67505\Desktop\numpyData.txt", arr2, fmt="%d", delimiter=" ", newline="\r\n")
# 从txt文档中读取数据(dtype:数据格式)
values1 = np.loadtxt(r"C:\Users\67505\Desktop\numpyData.txt", delimiter=" ", 
         unpack=False,dtype=int)  # unpack=True表示分别存储不同元素
# 从csv文档中读取数据(usecols表示我们需要获取的列)
values2 = np.loadtxt(r"C:\Users\67505\Desktop\numpyData.csv",delimiter=",",usecols=(6,7),unpack=True)

案例:股票价格

股票价格数据:股票代码、时间、开盘价、最高价、最低价、收盘价、交易数

# 股票价格
c, v = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", delimiter=",", usecols=(6, 7), unpack=True)
# 计算成交量加权平均价格
vwap = np.average(c, weights=v)
# 时间加权平均价格
t = np.arange(len(c))
twap = np.average(c, weights=t)
# 计算平均价格
mean = np.mean(c)
# 计算收盘价的最大值和最小值
c_max = np.max(c)
c_min = np.min(c)
# 计算收盘价的极差
c_ptp = np.ptp(c)
# 计算收盘价的中位数
c_median = np.median(c)
# 计算方差
c_var = np.var(c)
# 计算股票收益率
returns = np.diff(c) / c[:-1]
# 计算对数收益率
logReturn = np.diff(np.log(c))
# 计算哪些交易日收益率为正数
posretindices = np.where(returns > 0)

# 日期转换
def date_str_to_week(s):
    import datetime
    return datetime.datetime.strptime(s.decode("utf-8"), "%d-%m-%Y").date().weekday()

# 日期分析(converters:数据列和转换函数之间进行映射的字典)
dates, close = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", delimiter=",", 
               converters={1: date_str_to_week},usecols=(1, 6), unpack=True)
# 创建数组保存工作日的平均收盘价
weekday_average = np.zeros(5)
for i in range(5):
    weekday_index = np.where(dates == i)  # 获取每个指定工作日的索引
    prices = np.take(close, weekday_index)
    mean_close = np.mean(prices)
    weekday_average[i] = mean_close
# 查看哪个工作日的平均收盘价最高或者最低
top_weekday = np.argmax(weekday_average)  # 获取最高平均收盘价对应的索引值
low_weekday = np.argmin(weekday_average)  # 获取最低平均收盘价对应的索引值

    按照周汇总计算每周开盘价(monday_open)、收盘价(friday_close)、最高价(week_high)、最低价(week_low)

dates, start, high, low, close = np.loadtxt(r"C:\Users\67505\Desktop\表1.csv", 
                 delimiter=",",converters={1: date_str_to_week},usecols=(1, 3, 4, 5, 6), unpack=True)
# 周汇总数据
dates = dates[:16]
close = close[:16]
# 找到第一个星期一
first_monday = np.ravel(np.where(dates == 0))[0]
# 找到最后一个星期五
last_friday = np.ravel(np.where(dates == 4))[-1]
# 创建数组保存三周内每一天的索引值
weeks_indices = np.arange(first_monday, last_friday + 1)
# 数组切分,每个子数组5个元素
weeks_indices = np.split(weeks_indices, 3)


def summarize(a, s, h, l, c):
    """
    获取一周之内的开盘价,最高价,最低价,收盘价
    :param a: 日期索引
    :param s: 开盘子数组
    :param h: 最高价数组
    :param l: 最低价数组
    :param c: 收盘价数组
    :return:
        monday_start:  一周开盘价
        week_high:     一周最高价
        week_low:      一周最低价
        week_close:    一周收盘价
    """
    monday_start = s[a[0]]
    week_high = np.max(np.take(h, a))
    week_low = np.min(np.take(l, a))
    week_close = c[a[-1]]
    return monday_start, week_high, week_low, week_close


week_summary = np.apply_along_axis(summarize, 1, weeks_indices, start, high, low, close)

计算真实波动幅度均值(ATR)

# 设置交易时间段
N = 20
# 时间段最高价
period_high = high[-N:]
# 时间段最低价
period_low = low[-N:]
# 前一个交易日的收盘价
previous_close = close[-N - 1:-1]
# 计算当日股价范围(period_high-period_low),
# #当日最高价和前一个交易日收盘价之差(period_high-previous_close)
# #前一个交易日收盘价和当日最低价之差(previous_close - period_low)
true_range = np.maximum(period_high - period_low, period_high - previous_close, 
             previous_close - period_low)
# 设置ATR初始值
ATR = np.zeros(N)
# 设置ATR初始值
ATR[0] = np.mean(true_range)
# 计算剩下的ATR值
for i in range(1, N):
    ATR[i] = (N * ATR[i - 1] + true_range[i]) / N

线性模型预测股价

假设:当前股价可以用之前股价的线性组合表示,也就是说当前股价等于之前的股价与各自的系数相乘再做加和的结果。

# 线性模型预测股价
# 获取一个包含N个股票价格的数组
stock_price_vec = close[-5:]
# 反转数组
stock_price_vec = stock_price_vec[::-1]
# 初始化一个5*5的二维数组
A = np.zeros((5, 5))
# 用股票价格数组填充二维数组
for i in range(5):
    A[i,] = close[-5 - i - 1:-1 - i]
x, residuals, rank, s = np.linalg.lstsq(A, stock_price_vec)
# 预测下一次股价
next_stock_price = np.dot(stock_price_vec, x)

案例:计算股票的相关系数(corr)和净额成交量(OBV)

bhp = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程数据集\ch4code\ch4code\BHP.csv", delimiter=",", usecols=(6,),
                 unpack=True)
vale = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程数据集\ch4code\ch4code\VALE.csv", delimiter=",", usecols=(6,),
                  unpack=True)
# 计算收益率(diff函数计算前者与后者的差值,收益率=(a1-a2)/a1)
bhp_returns = np.diff(bhp) / bhp[:-1]
vale_returns = np.diff(vale) / vale[:-1]
# 计算收益率协方差矩阵,cov(x,y)=E(x-E(x))(y-E(y))
covariance = np.cov(bhp_returns, vale_returns)
# 计算收益率协方差矩阵的迹(主对角元素之和)
return_trace = covariance.trace()
# 计算相关系数矩阵,corr(x,y)=cov(x,y)/(x.std()*y.std())
corr = np.corrcoef(bhp_returns, vale_returns)
# 绘图
t = np.arange(len(bhp_returns))
plot(t, bhp_returns, lw=2)
plot(t, vale_returns, lw=2)
show()
# 计算OBV(净额成交量),sign(closePrice(t)-closePrice(t-1))*volumn(t)+OBV(t-1)
close_price, volumn = np.loadtxt(r"C:\Users\67505\Desktop\numpy教程数据集\ch4code\ch4code\BHP.csv", delimiter=",",
                                 unpack=True, usecols=(6, 7))
change = np.diff(close_price)
# 设置初始化OBV值
OBV = np.zeros(len(volumn) - 1)
for i in range(len(volumn) - 1):  # change.shape=(28,),故减1
    OBV[i] = np.sign(change[i]) * volumn[i] + OBV[i - 1]
print(OBV)

 

你可能感兴趣的:(python数据分析)