注:这是我参加招行Fintech精英训练营金融工程课程跟着做的笔记,代码是在Pycharm上写的。
里面用到的股票数据均来自雅虎财经。
雅虎财经股票数据下载方法
点击进入雅虎财经首页:https://finance.yahoo.com/
在搜索框中输入关注的股票或公司缩写,以苹果(AAPL)为例,输入“AAPL”。
得到如下界面。点击Historical Data,即可选择时间、变量和频率,Apply之后,再点击下方Download Data即可。
量化交易基础:使用python处理金融数据
01-02:处理多支股票
##主要是pandas
#8.创建空的dataframe
import pandasas pd
def test_run():
start_date ='2010-01-22'
end_date ='2010-01-26'
dates = pd.date_range(start_date,end_date)
#create and empty dataframe
df1 = pd.DataFrame(index = dates)
#read spy data into temporary dataframe
dfSPY = pd.read_csv("SPY.csv",index_col ="Date",
parse_dates=True, usecols= ['Date','Adj Close'],
na_values= ['nan'])
#rename 'adj close' column yo 'SPY' to revent clash
dfSPY = dfSPY.rename(columns={'Adj Close':'SPY'})
###先left join再删掉nan方法
#join the two dataframes using DataFrame.join()
#df1 = df1.join(dfSPY)
#drop nan values
#df1= df1.dropna()
#print(df1)
#或直接向join的how传入参数,{{left, right, outer, inner},default为left,outer为取并集,inner为交集,left是保留左边dataframe的index,right是右边的index
#这样就不用像上面一样还要删掉nan了
df1 = df1.join(dfSPY, how='inner')
#read in more stocks
symbols = ['GOOG','IBM','GLD']
for symbolin symbols:
df_temp = pd.read_csv("{}.csv".format(symbol), index_col="Date",
parse_dates=True, usecols=['Date','Adj Close'],
na_values=['nan'])
df_temp = df_temp.rename(columns={'Adj Close': symbol})
df1 = df1.join(df_temp)
if __name__ =="__main__":
test_run()
12. 练习:读取数据的实用函数
##utility functions
import os
import pandasas pd
#os.path.join(path1,path2,...):将path进行组合,若其中有绝对路径,则之前的path将被删除。
def symbol_to_path(symbol, base_dir="data"):
return os.path.join(base_dir,"{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df =pd.DataFrame(index=dates)
if 'SPY' not in symbols:# add SPY for reference, if absent
symbols.insert(0, 'SPY')
for symbolin symbols:
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col="Date",parse_dates=True,
usecols=['Date','Adj Close'],
na_values=['nan'])
df_temp=df_temp.rename(columns={'Adj Close':symbol})
df=df.join(df_temp)
if symbol=='SPY':#drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
## 删除指定列中包含缺失值的行,此处即为删除SPY列中包含缺失值的行
return df
def test_run():
dates= pd.date.range('2010-01-22','2010-12-31')
symbols = ['GOOG','IBM','GLD']#SPY will be addded in get_data()
df = get_data(symbols,dates)
print df
#slice by row range (dates) using DataFrame.ix[] selector
print(df.ix['2010-01-01':'2010-01-31'])
#slice by column (symbols)
print(df['GOOG'])
print(df[['IBM','GLD']])
#slice buy row and column
print(df.ix['2010-03-10':'2010-03-15',['SPY','IBM']])
if __name__=="__main__":
test_run()
##17. 绘制多只股票图形
import matplotlibas plt
def plot_data(df, title="Stock prices"):
ax = df.plot(title=title,fontsize=2)
ax.set_xlabel("Data")
ax.set_ylabel("Price")
plt.show()#must be called to show plots in some environments
# 18. slice and plot
import os
import pandasas pd
import matplotlib.pyplotas plt
def plot_selected(df, columns, start_index, end_index):
"""Plot the desired columns over index values in the given range."""
plot_data(df.ix[start_index:end_index, columns],title="Selected data")
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if 'SPY' not in symbols:# add SPY for reference, if absent
symbols.insert(0, 'SPY')
for symbolin symbols:
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
df_temp = df_temp.rename(columns={'Adj Close': symbol})
df = df.join(df_temp)
if symbol =='SPY':# drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df/df.ix[0,:]
# 19. normalization
def normalize_data(df):
return df
def plot_data(df, title="Stock prices"):
"""Plot stock prices with a custom title and meaningful axis labels."""
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
def test_run():
# Define a date range
dates = pd.date_range('2010-01-01', '2010-12-31')
# Choose stock symbols to read
symbols = ['GOOG', 'IBM', 'GLD']# SPY will be added in get_data()
# Get stock data
df = get_data(symbols, dates)
# Slice and plot
plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')
if __name__ =="__main__":
test_run()