01-04 Statistical analysis of time series

  • 计算全局统计值
#Global statistics
def test_run():
    #Read data
    dates = pd.date_range('2010-01-01', '2012-12-31')
    symbols = ['SPY', 'XOM', 'GOOG', 'GLD']
    df = get_data(symbols, dates)
    plot_data(df)
    #compute global statistics for each stock
    print df.mean() #平均数
    print df.medium() #中位数
    print df.std() #标准差
  • 计算滚动统计值
#Rolling statistics
def test_run():
   #Read data
   dates = pd.date_range('2010-01-01', '2012-12-31')
   symbols = ['SPY']
   df = get_data(symbols, dates)
   #Plot SPY data
   #retain matplotlib axis object
   ax = df['SPY'].plot(title = "SPY rolling mean", label = 'SPY')
   #compute rolling mean using a 20-day window
   rm_SPY = pd.rolling_mean(df['SPY'], window = 20)
   #Add rolling mean to sample plot
   rm_SPY.plot(label = 'Rolling mean', ax = ax)
   #Add axis labels and legend
   ax.set_xlabel("Date")
   ax.set_ylabel("Price")
   ax.legend(loc = 'upper left')
   plt.show()
  • 计算布林带
#每日回报(日收益率)daily returns
#daily_ret[t] = price[t] / price[t-1] - 1
#累计回报 cummulative returns
#cumret[t] = price[t] / price[0] - 1
#图像与 t - price 一样,只是归一化了

"""Bollinger Bands."""

import os
import pandas as pd
import matplotlib.pyplot as plt

def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))


def get_data(symbols, dates):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')

    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df = df.join(df_temp)
        if symbol == 'SPY':  # drop dates SPY did not trade
            df = df.dropna(subset=["SPY"])

    return df


def plot_data(df, title="Stock prices"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()


def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)


def get_rolling_std(values, window):
    """Return rolling standard deviation of given values, using specified window size."""
    return pd.rolling_std(values, window=window)


def get_bollinger_bands(rm, rstd):
    """Return upper and lower Bollinger Bands."""
    upper_band = rm + rstd * 2
    lower_band = rm - rstd * 2
    return upper_band, lower_band


def test_run():
    # Read data
    dates = pd.date_range('2012-01-01', '2012-12-31')
    symbols = ['SPY']
    df = get_data(symbols, dates)

    # Compute Bollinger Bands
    # 1. Compute rolling mean
    rm_SPY = get_rolling_mean(df['SPY'], window=20)

    # 2. Compute rolling standard deviation
    rstd_SPY = get_rolling_std(df['SPY'], window=20)

    # 3. Compute upper and lower bands
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
    
    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
    rm_SPY.plot(label='Rolling mean', ax=ax)
    upper_band.plot(label='upper band', ax=ax)
    lower_band.plot(label='lower band', ax=ax)

    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()


if __name__ == "__main__":
    test_run()

你可能感兴趣的:(01-04 Statistical analysis of time series)