Fintech金融工程课程笔记(一:使用Python处理多支股票)

注:这是我参加招行Fintech精英训练营金融工程课程跟着做的笔记,代码是在Pycharm上写的。

里面用到的股票数据均来自雅虎财经。

雅虎财经股票数据下载方法

点击进入雅虎财经首页:https://finance.yahoo.com/

在搜索框中输入关注的股票或公司缩写,以苹果(AAPL)为例,输入“AAPL”。

得到如下界面。点击Historical Data,即可选择时间、变量和频率,Apply之后,再点击下方Download Data即可。

雅虎财经股票数据下载界面

量化交易基础:使用python处理金融数据

01-02:处理多支股票


##主要是pandas

#8.创建空的dataframe

import pandasas pd

def test_run():

start_date ='2010-01-22'

    end_date ='2010-01-26'

    dates = pd.date_range(start_date,end_date)

#create and empty dataframe

    df1 = pd.DataFrame(index = dates)

#read spy data into temporary dataframe

    dfSPY = pd.read_csv("SPY.csv",index_col ="Date",

                        parse_dates=True, usecols= ['Date','Adj Close'],

                        na_values= ['nan'])

#rename 'adj close' column yo 'SPY' to revent clash

    dfSPY = dfSPY.rename(columns={'Adj Close':'SPY'})

###先left join再删掉nan方法

#join the two dataframes using DataFrame.join()

#df1 = df1.join(dfSPY)

#drop nan values

#df1= df1.dropna()

#print(df1)

#或直接向join的how传入参数,{{left, right, outer, inner},default为left,outer为取并集,inner为交集,left是保留左边dataframe的index,right是右边的index

#这样就不用像上面一样还要删掉nan了

    df1 = df1.join(dfSPY, how='inner')

#read in more stocks

    symbols = ['GOOG','IBM','GLD']

for symbolin symbols:

df_temp = pd.read_csv("{}.csv".format(symbol), index_col="Date",

                              parse_dates=True, usecols=['Date','Adj Close'],

                              na_values=['nan'])

df_temp = df_temp.rename(columns={'Adj Close': symbol})

df1 = df1.join(df_temp)

if __name__ =="__main__":

test_run()

12. 练习:读取数据的实用函数

##utility functions

import os

import pandasas pd

#os.path.join(path1,path2,...):将path进行组合,若其中有绝对路径,则之前的path将被删除。

def symbol_to_path(symbol, base_dir="data"):

return os.path.join(base_dir,"{}.csv".format(str(symbol)))

def get_data(symbols, dates):

"""Read stock data (adjusted close) for given symbols from CSV files."""

    df =pd.DataFrame(index=dates)

if 'SPY' not in symbols:# add SPY for reference, if absent

        symbols.insert(0, 'SPY')

for symbolin symbols:

df_temp = pd.read_csv(symbol_to_path(symbol),

                                  index_col="Date",parse_dates=True,

                                  usecols=['Date','Adj Close'],

                                  na_values=['nan'])

df_temp=df_temp.rename(columns={'Adj Close':symbol})

df=df.join(df_temp)

if symbol=='SPY':#drop dates SPY did not trade

            df = df.dropna(subset=["SPY"])

## 删除指定列中包含缺失值的行,此处即为删除SPY列中包含缺失值的行

    return  df

def test_run():

dates= pd.date.range('2010-01-22','2010-12-31')

symbols = ['GOOG','IBM','GLD']#SPY will be addded in get_data()

    df = get_data(symbols,dates)

print df

#slice by row range (dates) using DataFrame.ix[] selector

    print(df.ix['2010-01-01':'2010-01-31'])

#slice by column (symbols)

    print(df['GOOG'])

print(df[['IBM','GLD']])

#slice buy row and column

    print(df.ix['2010-03-10':'2010-03-15',['SPY','IBM']])

if __name__=="__main__":

test_run()

##17. 绘制多只股票图形

import matplotlibas plt

def plot_data(df, title="Stock prices"):

ax = df.plot(title=title,fontsize=2)

ax.set_xlabel("Data")

ax.set_ylabel("Price")

plt.show()#must be called to show plots in some environments

# 18. slice and plot

import os

import pandasas pd

import matplotlib.pyplotas plt

def plot_selected(df, columns, start_index, end_index):

"""Plot the desired columns over index values in the given range."""

    plot_data(df.ix[start_index:end_index, columns],title="Selected data")

def symbol_to_path(symbol, base_dir="data"):

"""Return CSV file path given ticker symbol."""

    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):

"""Read stock data (adjusted close) for given symbols from CSV files."""

    df = pd.DataFrame(index=dates)

if 'SPY' not in symbols:# add SPY for reference, if absent

        symbols.insert(0, 'SPY')

for symbolin symbols:

df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',

                              parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])

df_temp = df_temp.rename(columns={'Adj Close': symbol})

df = df.join(df_temp)

if symbol =='SPY':# drop dates SPY did not trade

            df = df.dropna(subset=["SPY"])

return df/df.ix[0,:]

# 19. normalization

def normalize_data(df):

return df

def plot_data(df, title="Stock prices"):

"""Plot stock prices with a custom title and meaningful axis labels."""

    ax = df.plot(title=title, fontsize=12)

ax.set_xlabel("Date")

ax.set_ylabel("Price")

plt.show()

def test_run():

# Define a date range

    dates = pd.date_range('2010-01-01', '2010-12-31')

# Choose stock symbols to read

    symbols = ['GOOG', 'IBM', 'GLD']# SPY will be added in get_data()

# Get stock data

    df = get_data(symbols, dates)

# Slice and plot

    plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')

if __name__ =="__main__":

test_run()

你可能感兴趣的:(Fintech金融工程课程笔记(一:使用Python处理多支股票))