这节课主要讲了numpy,pandas,tushare,talib,matlabplot用法,如果有基础的化不用听了
能看懂如下样例代码就行了
####numpy数据结构####
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v, v, v] # matrix of numbers
m
m[1]
m[1][0]
v1 = [0.5, 1.5]
v2 = [1, 2]
m = [v1, v2]
c = [m, m] # cube of numbers
c
c[1][1][0]
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v, v, v]
m
v[0] = 'Python'
m
from copy import deepcopy
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = 3 * [deepcopy(v), ]
m
v[0] = 'Python'
m
import numpy as np
a = np.array([0, 0.5, 1.0, 1.5, 2.0])
type(a)
a[:2]
a.sum()
a.std()
a.cumsum()
a * 2
a ** 2
np.sqrt(a)
b = np.array([a, a * 2])
b
b[0]
b[0, 2] # third element of first row
b.sum()
b.sum(axis=0)
b.sum(axis=1)
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
df = pd.DataFrame([10,20,30,40],columns=['number'],index = ['a','b','c','d'])
df['floats'] = (1.5,2.5,3.5,4.5)
df.append(pd.DataFrame({'number':100,'floats':5.7},index=['z']))
df['squares'] = (1,4,9,16)
df[['number','squares']].mean()
df[['number','squares']].std()
a = np.random.standard_normal((9,4))
df = pd.DataFrame(a)
#print df
df.columns = [['No1','No2','No3','No4']]
#print df
#
#print df['No2'][3]
dates = pd.date_range('2015-1-1',periods = 9,freq = 'D')
#print dates
df.index = dates
print df
#MA
import pandas as pd
import tushare as ts
import talib as ta
#从tushare获取数据
sh = ts.get_hist_data('sh')
sh = sh.sort_index()
#print sh['close']
#使用TALIB计算20日均线
ma = ta.MA(sh['close'].values,20)
sh.loc[:,'tama'] = ma
sh[['close','tama']].plot(figsize = (15,5),grid = True)
#bollinger
import numpy as np
import pandas as pd
import tushare as ts
import talib as ta
sh = ts.get_hist_data('sh')
sh = sh.sort_index()
close = sh[['close','volume']]
#函数用的price必须为narray
upper,middle,lower = ta.BBANDS(
sh['close'].values,
timeperiod=20,
nbdevup = 2,
nbdevdn = 2,
matype = 0
)
close.loc[:, 'upper'] = upper
close.loc[:,'middle']=middle
close.loc[:,'lower']=lower
close[['close','middle','upper','lower']].plot(figsize=(15,8),grid = True)
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings; warnings.simplefilter('ignore')
# import seaborn as sns; sns.set()
get_ipython().magic(u'matplotlib inline')
####二维绘图####
##折线图
#一维数据集
np.random.seed(1000)
y = np.random.standard_normal(20)
x = range(len(y))
plt.plot(x, y)
plt.plot(y)
plt.plot(y.cumsum())
plt.plot(y.cumsum())
plt.grid(True) # adds a grid
plt.axis('tight')
plt.plot(y.cumsum())
plt.grid(True)
plt.xlim(-1, 20)
plt.ylim(np.min(y.cumsum()) - 1,
np.max(y.cumsum()) + 1)
plt.figure(figsize=(7, 4))
# the figsize parameter defines the
# size of the figure in (width, height)
plt.plot(y.cumsum(), 'b', lw=1.5)
plt.plot(y.cumsum(), 'ro')
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
#二维数据集
np.random.seed(2000)
y = np.random.standard_normal((20, 2)).cumsum(axis=0)
plt.figure(figsize=(7, 4))
plt.plot(y, lw=1.5)
# plots two lines
plt.plot(y, 'ro')
# plots two dotted lines
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
plt.figure(figsize=(7, 4))
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 1], lw=1.5, label='2nd')
plt.plot(y, 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
y[:, 0] = y[:, 0] * 100
plt.figure(figsize=(7, 4))
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 1], lw=1.5, label='2nd')
plt.plot(y, 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
fig, ax1 = plt.subplots()
plt.plot(y[:, 0], 'b', lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=8)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value 1st')
plt.title('A Simple Plot')
ax2 = ax1.twinx()
plt.plot(y[:, 1], 'g', lw=1.5, label='2nd')
plt.plot(y[:, 1], 'ro')
plt.legend(loc=0)
plt.ylabel('value 2nd')
plt.figure(figsize=(7, 5))
plt.subplot(211)
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.ylabel('value')
plt.title('A Simple Plot')
plt.subplot(212)
plt.plot(y[:, 1], 'g', lw=1.5, label='2nd')
plt.plot(y[:, 1], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.figure(figsize=(9, 4))
plt.subplot(121)
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('1st Data Set')
plt.subplot(122)
plt.bar(np.arange(len(y)), y[:, 1], width=0.5,
color='g', label='2nd')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.title('2nd Data Set')
##散点图
y = np.random.standard_normal((1000, 2))
plt.figure(figsize=(7, 5))
plt.plot(y[:, 0], y[:, 1], 'ro')
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
plt.figure(figsize=(7, 5))
plt.scatter(y[:, 0], y[:, 1], marker='o')
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
c = np.random.randint(0, 10, len(y))
plt.figure(figsize=(7, 5))
plt.scatter(y[:, 0], y[:, 1], c=c, marker='o')
plt.colorbar()
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
##直方图
plt.figure(figsize=(7, 4))
plt.hist(y, label=['1st', '2nd'], bins=25)
plt.grid(True)
plt.legend(loc=0)
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
plt.figure(figsize=(7, 4))
plt.hist(y, label=['1st', '2nd'], color=['b', 'g'],
stacked=True, bins=20)
plt.grid(True)
plt.legend(loc=0)
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
##箱线图
fig, ax = plt.subplots(figsize=(7, 4))
plt.boxplot(y)
plt.grid(True)
plt.setp(ax, xticklabels=['1st', '2nd'])
plt.xlabel('data set')
plt.ylabel('value')
plt.title('Boxplot')
#面积图
from matplotlib.patches import Polygon
def func(x):
return 0.5 * np.exp(x) + 1
a, b = 0.5, 1.5 # integral limits
x = np.linspace(0, 2)
y = func(x)
fig, ax = plt.subplots(figsize=(7, 5))
plt.plot(x, y, 'b', linewidth=2)
plt.ylim(ymin=0)
Ix = np.linspace(a, b)
Iy = func(Ix)
verts = [(a, 0)] + list(zip(Ix, Iy)) + [(b, 0)]
poly = Polygon(verts, facecolor='0.7', edgecolor='0.5')
ax.add_patch(poly)
plt.text(0.5 * (a + b), 1, r"$\int_a^b f(x)\mathrm{d}x$",
horizontalalignment='center', fontsize=20)
plt.figtext(0.9, 0.075, '$x$')
plt.figtext(0.075, 0.9, '$f(x)$')
ax.set_xticks((a, b))
ax.set_xticklabels(('$a$', '$b$'))
ax.set_yticks([func(a), func(b)])
ax.set_yticklabels(('$f(a)$', '$f(b)$'))
plt.grid(True)
##蜡烛图
import matplotlib.finance as mpf
start = (2014, 5, 1)
end = (2014, 6, 30)
quotes = mpf.quotes_historical_yahoo_ohlc('^GDAXI', start, end)
quotes[:2]
fig, ax = plt.subplots(figsize=(8, 5))
fig.subplots_adjust(bottom=0.2)
mpf.candlestick_ohlc(ax, quotes, width=0.6, colorup='b', colordown='r')
plt.grid(True)
ax.xaxis_date()
# dates on the x axis
ax.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=30)
fig, ax = plt.subplots(figsize=(8, 5))
mpf.plot_day_summary_ohlc(ax, quotes, colorup='b', colordown='r')
plt.grid(True)
ax.xaxis_date()
plt.title('DAX Index')
plt.ylabel('index level')
plt.setp(plt.gca().get_xticklabels(), rotation=30)
quotes = np.array(mpf.quotes_historical_yahoo_ochl('YHOO', start, end))
fig, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(8, 6))
mpf.candlestick_ohlc(ax1, quotes, width=0.6, colorup='b', colordown='r')
ax1.set_title('Yahoo Inc.')
ax1.set_ylabel('index level')
ax1.grid(True)
ax1.xaxis_date()
plt.bar(quotes[:, 0] - 0.25, quotes[:, 5], width=0.5)
ax2.set_ylabel('volume')
ax2.grid(True)
ax2.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=30)
####3d绘图####
strike = np.linspace(50, 150, 24)
ttm = np.linspace(0.5, 2.5, 24)
strike, ttm = np.meshgrid(strike, ttm)
strike[:2]
iv = (strike - 100) ** 2 / (100 * strike) / ttm
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(9, 6))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(strike, ttm, iv, rstride=2, cstride=2,
cmap=plt.cm.coolwarm, linewidth=0.5,
antialiased=True)
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
fig.colorbar(surf, shrink=0.5, aspect=5)
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(111, projection='3d')
ax.view_init(30, 60)
ax.scatter(strike, ttm, iv, zdir='z', s=25,
c='b', marker='^')
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
#####figure与subplot
#figure对象
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
plt.show()
from numpy.random import randn
plt.plot(randn(50).cumsum(), 'k--')
fig.show()
_ = ax1.hist(randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * randn(30))
plt.close('all')
fig, axes = plt.subplots(2, 3)
axes
#调整subplot周围的间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=None, hspace=None)
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
for j in range(2):
axes[i, j].hist(randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)
#####matplotlib基本设置
#颜色、标记和线型
plt.figure()
plt.plot(x,y,linestyle='--',color='g')
plt.plot(randn(30).cumsum(), 'ko--')
plt.plot(randn(30).cumsum(),color='k',linestyle='dashed',marker='o')
plt.close('all')
data = randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')
#设置标题、轴标签、刻度以及刻度标签
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum())
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
rotation=30, fontsize='small')
ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')
#添加图例
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
ax.legend(loc='best')
#注释以及在subplot上绘图
from datetime import datetime
import pandas as pd
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
data = pd.read_csv('d:/data/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']
spx.plot(ax=ax, style='k-')
crisis_data = [
(datetime(2007, 10, 11), 'Peak of bull market'),
(datetime(2008, 3, 12), 'Bear Stearns Fails'),
(datetime(2008, 9, 15), 'Lehman Bankruptcy')
]
for date, label in crisis_data:
ax.annotate(label, xy=(date, spx.asof(date) + 50),
xytext=(date, spx.asof(date) + 200),
arrowprops=dict(facecolor='black'),
horizontalalignment='left', verticalalignment='top')
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])
ax.set_title('Important dates in 2008-2009 financial crisis')
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
#图表的保存
fig
fig.savefig('figpath.svg')
fig.savefig('figpath.png', dpi=400, bbox_inches='tight')
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()
#matplotlib配置
plt.rc('figure', figsize=(10, 10))
font_options={'family':'monospace',
'weight':'bold','size':'small'}
plt.rc('font',**font_options)