《利用Python进行数据分析》笔记---第11章金融和经济数据应用

写在前面的话:

实例中的所有数据都是在GitHub上下载的,打包下载即可。
地址是:http://github.com/pydata/pydata-book

还有一定要说明的:

我使用的是Python2.7,书中的代码有一些有错误,我使用自己的2.7版本调通。

# coding: utf-8
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
from datetime import time

prices = pd.read_csv('D:\Source Code\pydata-book-master\ch11\stock_px.csv',parse_dates = True,index_col = 0)
volume = pd.read_csv('D:\Source Code\pydata-book-master\ch11\\volume.csv',parse_dates = True,index_col = 0)
prices = prices.ix['2011-09-06':'2011-09-14',['AAPL','JNJ','SPX','XOM']]
volume = volume.ix['2011-09-06':'2011-09-12',['AAPL','JNJ','XOM']]
prices
volume
prices * volume
vwap = (prices * volume).sum() / volume.sum()
vwap
vwap.dropna()
prices.align(volume,join='inner')

s1 = Series(range(3),index=['a','b','c'])
s2 = Series(range(4),index=['d','b','c','e'])
s3 = Series(range(3),index=['f','a','c'])
DataFrame({'one':s1,'two':s2,'three':s3})
DataFrame({'one':s1,'two':s2,'three':s3},index=list('face'))

ts1 = Series(np.random.randn(3),index=pd.date_range('2012-6-13',periods=3,freq='W-WED'))
ts1
ts1.resample('B')
ts1.resample('B',fill_method='ffill')

dates = pd.DatetimeIndex(['2012-6-12','2012-6-17','2012-6-18','2012-6-21','2012-6-22','2012-6-29'])
ts2 = Series(np.random.randn(6),index=dates)
ts2
ts1.reindex(ts2.index,method='ffill')
ts2 + ts1.reindex(ts2.index,method='ffill')

gdp = Series([1.78,1.94,2.08,2.01,2.15,2.31,2.46],index=pd.period_range('1984Q2',periods=7,freq='Q-SEP'))
infl = Series([0.025,0.045,0.037,0.04],index=pd.period_range('1982',periods=4,freq='A-DEC'))
gdp
infl
infl_q = infl.asfreq('Q-SEP',how='end')
infl_q
infl_q.reindex(gdp.index,method='ffill')

rng = pd.date_range('2012-06-01 09:30','2012-06-01 15:59',freq = 'T')
type(rng)
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1,4)])
rng
ts = Series(np.arange(len(rng),dtype = float),index = rng)
ts

ts[time(10,0)]
ts.at_time(time(10,0))
ts.between_time(time(10,0),time(10,1))

indexer = np.sort(np.random.permutation(len(ts))[700:])
irr_ts = ts.copy()
irr_ts[indexer] = np.nan
irr_ts['2012-06-01 09:50':'2012-06-01 10:00']

selection = pd.date_range('2012-06-01 10:00',periods=4,freq='B')
irr_ts.asof(selection)

data1 = DataFrame(np.ones((6,3),dtype = float),columns = ['a','b','c'],index = pd.date_range('6/12/2012',periods = 6))
data2 = DataFrame(np.ones((6,3),dtype = float)*2,columns = ['a','b','c'],index = pd.date_range('6/13/2012',periods = 6))
spliced = pd.concat([data1.ix[:'2012-06-14'],data2.ix['2012-06-15':]])
spliced
data2 = DataFrame(np.ones((6,4),dtype = float)*2,columns = ['a','b','c','d'],index = pd.date_range('6/13/2012',periods = 6))
spliced = pd.concat([data1.ix[:'2012-06-14'],data2.ix['2012-06-15':]])
spliced
spliced_filled =spliced.combine_first(data2)
spliced_filled
spliced.update(data2,overwrite=False)
spliced
cp_spliced =spliced.copy()
cp_spliced[['a','c']] = data1[['a','c']]
cp_spliced

import pandas.io.data as web
price = web.get_data_yahoo('AAPL','2011-01-01')['Adj Close']
price[-5:]
price['2011-10-03'] / price['2011-3-01'] - 1
returns = price.pct_change()
ret_index = (1 + returns).cumprod()
ret_index[0] = 1
ret_index
m_returns = ret_index.resample('BM',how = 'last').pct_change()
m_returns['2012']
m_rets = (1 + returns).resample('M',how = 'prod',kind = 'period') - 1
m_rets['2012']

import random;random.seed(0)
import string
N = 1000
def rands(n):
    choices = string.ascii_uppercase
    return ''.join([random.choice(choices) for _ in xrange(n)])
tickers = np.array([rands(5) for _ in xrange(N)])
tickers
M = 500
df = DataFrame({'Momentum':np.random.randn(M) / 200 + 0.03,
    'Value':np.random.randn(M) / 200 + 0.08,
    'ShortInterest':np.random.randn(M) / 200 - 0.02},
    index = tickers[:M])
df
ind_names = np.array(['FINANCIAL','TECH'])
sampler = np.random.randint(0,len(ind_names),N)
industries = Series(ind_names[sampler],index = tickers,name = 'industry')
industries
by_industry = df.groupby(industries)
by_industry.mean()
by_industry.describe()
def zscore(group):
    return (group - group.mean()) / group.std()
df_stand = by_industry.apply(zscore)
df_stand.groupby(industries).agg(['mean','std'])
ind_rank = by_industry.rank(ascending = False)
ind_rank.groupby(industries).agg(['min','max'])
by_industry.apply(lambda x : zscore(x.rank())).head()

from numpy.random import rand
fac1,fac2,fac3 = np.random.rand(3,1000)
ticker_subset = tickers.take(np.random.permutation(N)[:1000])
port = Series(0.7 * fac1 - 1.2 * fac2 + 0.3 * fac3 + rand(1000),index=ticker_subset)
factors = DataFrame({'f1':fac1,'f2':fac2,'f3':fac3},index=ticker_subset)
factors.corrwith(port)
pd.ols(y=port,x=factors).beta
def beta_exposure(chunk,factors=None):
    return pd.ols(y=chunk,x=factors).beta
by_ind = port.groupby(industries)
exposures = by_ind.apply(beta_exposure,factors=factors)
exposures.unstack()

import matplotlib.pyplot as plt
data = web.get_data_yahoo('SPY','2006-01-01','2012-07-27')  #设置一个尾巴值
data
px = data['Adj Close']
returns = px.pct_change()
def to_index(rets):
    index = (1 + rets).cumprod()
    first_loc = max(index.notnull().argmax() - 1,0)
    index.values[first_loc] = 1
    return index
def trend_signal(rets,lookback,lag):
    signal = pd.rolling_sum(rets,lookback,min_periods = lookback - 5)
    return signal.shift(lag)
signal = trend_signal(returns,100,3)
signal
trade_friday = signal.resample('W-FRI').resample('B',fill_method = 'ffill')
trade_rets = trade_friday.shift(1) * returns
to_index(trade_rets).plot()
plt.show()
vol = pd.rolling_std(returns,250,min_periods = 200) * np.sqrt(250)
def sharpe(rets,ann = 250):
    return rets.mean() / rets.std() * np.sqrt(ann)
trade_rets.groupby(pd.qcut(vol,4)).agg(sharpe)

你可能感兴趣的:(Python)