Numpy学习笔记——常用函数

3.1 文件读写

3.2 动手实践:读写文件

savetxt():将数组保存到文件中

i2 = np.eye(2)
print i2
np.savetxt("eye.txt", i2)

3.3 CSV 文件

3.4 动手实践:读入CSV 文件

NumPy中的loadtxt函数可以方便地读取CSV文件,自动切分字段,并将数据载入NumPy数组。
usecols的参数为一个元组,以获取第7字段至第8字段的数据;
unpack参数设置为True,意思是分拆存储不同列的数据,即分别将收盘价和成交量的数组赋值给变量c和v;
调用average函数,并将v作为权重参数使用;

c,v=np.loadtxt('data.csv', delimiter=',', usecols=(6,7), unpack=True)

vwap = np.average(c, weights=v)
print "VWAP =", vwap
The output is
VWAP = 350.589549353
print "mean =", np.mean(c)
mean = 351.037666667

t = np.arange(len(c))
print "twap =", np.average(c, weights=t)

3.8 动手实践:找到最大值和最小值

ptp函数可以计算数组的取值范围

h,l=np.loadtxt('data.csv',delimiter=',', usecols=(4,5), unpack=True)
print "highest =", np.max(h)
print "lowest =", np.min(l)

print "Spread high price", np.ptp(h)
print "Spread low price", np.ptp(l)

3.9 统计分析

中位数函数、排序函数

//中位数
print "median =", np.median(c)
//排序
sorted_close = np.msort(c)
N = len(c)
//方差
print "variance =", np.var(c)
//取对数
np.log(c)

NumPy中的diff函数可以返回一个由相邻数组元素的差值构成的数组。

//计算股票收益率
returns = np.diff( arr ) / arr[ : -1]
//标准差
print "Standard deviation =", np.std(returns)
//获取满足指定条件的元素 where
posretindices = np.where(returns > 0)

3.13 日期分析

获取28-01-2011类型字符串的日期对应的星期几

def datestr2num(s):
    return datetime.datetime.strptime(s, "%d-%m-%Y").date().weekday()
//converters函数转换日期字符串为具体数字
dates, close=np.loadtxt('data.csv', delimiter=',', usecols=(1,6), converters={1:datestr2num}, unpack=True)
//创建一个包含5个元素的数组
averages = np.zeros(5)

take函数可以按照这些索引值从数组中取出相应的元素

for i in range(5):
    indices = np.where(dates == i)
    prices = np.take(close, indices)
    avg = np.mean(prices)
    print "Day", i, "prices", prices, "Average", avg
    averages[i] = avg
top = np.max(averages)
print "Highest average", top
//返回最大值的索引值
print "Top day of the week", np.argmax(averages)
bottom = np.min(averages)
print "Lowest average", bottom
//返回最小值的索引值
print "Bottom day of the week", np.argmin(averages)
def datestr2num(s):
    return datetime.strptime(s, "%d-%m-%Y").date().weekday()
dates, open, high, low, close=np.loadtxt('data.csv', delimiter=',', usecols=(1, 3, 4,5, 6), converters={1: datestr2num}, unpack=True)
close = close[:16]
dates = dates[:16]

# get first Monday
first_monday = np.ravel(np.where(dates == 0))[0]
print "The first Monday index is", first_monday

# get last Friday
last_friday = np.ravel(np.where(dates == 4))[-1]
print "The last Friday index is", last_friday

weeks_indices = np.arange(first_monday, last_friday + 1)
print "Weeks indices initial", weeks_indices
weeks_indices = np.split(weeks_indices, 3)
print "Weeks indices after split", weeks_indices

def summarize(a, o, h, l, c):
    monday_open = o[a[0]]
    week_high = np.max( np.take(h, a) )
    week_low = np.min( np.take(l, a) )
    friday_close = c[a[-1]]
    return("APPL", monday_open, week_high, week_low, friday_close)
weeksummary = np.apply_along_axis(summarize, 1, weeks_indices, open, high, low, close)
print "Week summary", weeksummary
np.savetxt("weeksummary.csv", weeksummary, delimiter=",", fmt="%s")

np.exp()函数

x = np.arange(5)
print "Exp", np.exp(x)

np.linspace()函数:返回将-1到0分成5分的点

print "Linspace", np.linspace(-1, 0, 5)

weights.sum():返回这个数组的和
weights/weights.sum():会将weights中的所有元素除以分母

weights = np.exp(np.linspace(-1. , 0. , N))
weights /= weights.sum()
print "Weights", weights

convolve函数:

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
ema = np.convolve(weights, c)[N-1:-N+1]
t = np.arange(N - 1, len(c))
plot(t, c[N-1:], lw=1.0)
plot(t, ema, lw=2.0)
show()

绘制布林带

import numpy as np
import sys
from matplotlib.pyplot import plot
from matplotlib.pyplot import show

N = int(sys.argv[1])
weights = np.ones(N) / N
print "Weights", weights

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
sma = np.convolve(weights, c)[N-1:-N+1]
deviation = []

for i in range(N - 1, C):
if i + N < C:
    dev = c[i: i + N]
    else:
    dev = c[-N:]
    averages = np.zeros(N)
    averages.fill(sma[i - N - 1])
    dev = dev - averages
    dev = dev ** 2
    dev = np.sqrt(np.mean(dev))
    deviation.append(dev)

deviation = 2 * np.array(deviation)
print len(deviation), len(sma)
upperBB = sma + deviation
lowerBB = sma - deviation

c_slice = c[N-1:]
between_bands = np.where((c_slice < upperBB) & (c_slice > lowerBB))

print lowerBB[between_bands]
print c[between_bands]
print upperBB[between_bands]
between_bands = len(np.ravel(between_bands))
print "Ratio between bands", float(between_bands)/len(c_slice)

t = np.arange(N - 1, C)
plot(t, c_slice, lw=1.0)
plot(t, sma, lw=2.0)
plot(t, upperBB, lw=3.0)
plot(t, lowerBB, lw=4.0)
show()

NumPy的linalg包是专门用于线性代数计算

import numpy as np
import sys

N = int(sys.argv[1])
c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
b = c[-N:]
b = b[::-1]
print "b", b
A = np.zeros((N, N), float)
print "Zeros N by N", A
for i in range(N):
    A[i, ] = c[-N - 1 - i: - 1 - i]
print "A", A
(x, residuals, rank, s) = np.linalg.lstsq(A, b)
print x, residuals, rank, s
print np.dot(b, x)

3.28 动手实践:绘制趋势线

import numpy as np
from matplotlib.pyplot import plot
from matplotlib.pyplot import show
def fit_line(t, y):
    A = np.vstack([t, np.ones_like(t)]).T
    return np.linalg.lstsq(A, y)[0]
h, l, c = np.loadtxt('data.csv', delimiter=',' , usecols=(4, 5, 6), unpack=True)
pivots = (h + l + c ) / 3
print "Pivots", pivots
t = np.arange(len(c))
sa, sb = fit_line(t, pivots - (h - l))
ra, rb = fit_line(t, pivots + (h - l))
support = sa * t + sb
resistance = ra * t + rb
condition = (c > support) & (c < resistance)
print "Condition", condition
between_bands = np.where(condition)
print support[between_bands]
print c[between_bands]
print resistance[between_bands]
between_bands = len(np.ravel(between_bands))
print "Number points between bands", between_bands
print "Ratio between bands", float(between_bands)/len(c)
print "Tomorrows support", sa * (t[-1] + 1) + sb
print "Tomorrows resistance", ra * (t[-1] + 1) + rb
a1 = c[c > support]
a2 = c[c < resistance]
print "Number of points between bands 2nd approach" ,len(np. intersect1d(a1, a2))
plot(t, c)
plot(t, support)
plot(t, resistance)
show()

3.30 动手实践:数组的修剪和压缩

clip方法返回一个修剪过的数组,也就是将所有比给定最大值还大的元素全部设为给定的最大值,而所有比给定最小值还小的元素全部设为给定的最小值。

a = np.arange(5)
print "a =", a
print "Clipped", a.clip(1, 2)

compress方法返回一个根据给定条件筛选后的数组

a = np.arange(4)
print a
print "Compressed", a.compress(a > 2)

prod方法,可以计算数组中所有元素的乘积

b = np.arange(1, 9)
print "b =", b
print "Factorial", b.prod()

调用cumprod方法,计算数组元素的累积乘积

print "Factorials", b.cumprod()

你可能感兴趣的:(Python,Numpy,numpy,函数,csv,数据)