注:这是我参加招行Fintech精英训练营金融工程课程跟着做的笔记,代码是在Pycharm上写的。
里面用到的股票数据均来自雅虎财经(https://finance.yahoo.com/),数据下载方法我在(一)中有介绍。
量化交易基础:使用python处理金融数据
01-04 时间序列统计分析
#4. 计算全局统计值
import pandasas pd
import numpyas np
import os
import matplotlib.pyplotas plt
def symbol_to_path(symbol, base_dir="data"):
return os.path.join(base_dir,"{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df =pd.DataFrame(index=dates)
for symbolin symbols:
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col="Date",parse_dates=True,
usecols=['Date','Adj Close'],
na_values=['nan'])
df_temp=df_temp.rename(columns={'Adj Close':symbol})
df=df.join(df_temp)
if symbol=='SPY':#drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
## 删除指定列中包含缺失值的行,此处即为删除SPY列中包含缺失值的行
return df
def plot_data(df, title="Stock prices"):
ax = df.plot(title=title,fontsize=2)
ax.set_xlabel("Data")
ax.set_ylabel("Price")
plt.show()#must be called to show plots in some environments
def get_rolling_mean(values, window):
"""Return rolling mean of given values, using specified window size."""
return values.rolling(window).mean()
def get_rolling_std(values, window):
"""Return rolling standard deviation of given values, using specified window size."""
return values.rolling(window).std()
def get_bollinger_bands(rm, rstd):
"""Return upper and lower Bollinger Bands."""
upper_band=rm+2*rstd
lower_band=rm-2*rstd
return upper_band, lower_band
def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()#copy given dataframe to match size and column names
#computer daily returns for row 1 onwards
# daily_returns[1:]=(df[1:]/df[:-1].values)-1
daily_returns = (df / df.shift(1)) -1 #much easier with pandas!
daily_returns.iloc[0,:]=0 #set daily returns for row 0 to 0,otherwise nan default
return daily_returns
def test_run():
#dates=pd.date_range('2010-01-01','2010-12-31')
#symbols=['SPY','XOM','GOOG','GLD']
#df=get_data(symbols,dates)
#plot_data(df)
#print(df.mean()) #compute the mean of each column
#print(df.median())
#print(df.std())
#rolling statistics
#dates = pd.date_range('2010-01-01', '2010-12-31')
#symbols=['SPY']
#df=get_data(symbols,dates)
#ax=df['SPY'].plot(title="SPY rolling mean",label='SPY')
#rm_SPY=df['SPY'].rolling(20).mean()
#rm_SPY.plot(label='Rolling mean',ax=ax)
#plt.show()
#Bollinger Bands
#symbols = ['SPY']
#df = get_data(symbols, dates)
#rm_SPY = get_rolling_mean(df['SPY'],window=20)
# rstd_SPY = get_rolling_std(df['SPY'],window=20)
# upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
# # Plot raw SPY values, rolling mean and Bollinger Bands
# ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
# rm_SPY.plot(label='Rolling mean', ax=ax)
# upper_band.plot(label='upper band', ax=ax)
# lower_band.plot(label='lower band', ax=ax)
# # Add axis labels and legend
# ax.set_xlabel("Date")
# ax.set_ylabel("Price")
# ax.legend(loc='upper left')
# #plt.show()
# Compute daily returns
dates = pd.date_range('2010-07-01', '2010-07-31')# one month only
symbols = ['SPY', 'XOM']
df = get_data(symbols, dates)
plot_data(df)
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns")
if __name__=="__main__":
test_run()