numpy的学习
- 4.1 notebook的使用
- 4.2 numpy基础知识
- 4.3 numpy的索引和切片
- 4.4 numpy的基本运算
- 4.5 numpy的数据处理
- 4.6 numpy线性代数
- 4.7 伪随机数的生成
- 4.7 随机漫步实例
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm
4.1 notebook的使用
a = [1,2,3]
def add_sum(list,sum=0):
'''求和函数'''
for num in list:
sum+=num
return sum
4.2 numpy基础知识
my_arr = np.arange(1000000)
my_list = list(range(1000000))
for _ in range(100):
my_arr2 = my_arr*2
for _ in range(100):
mylist2 = [x*2 for x in my_list]
data = np.random.randn(2,3)
print(data)
print(data*10)
print(data+data)
print(data.shape)
print(data.dtype)
np.arange(15)
series1 = [1,2,3,5,6,7]
arr1 = np.array(series1)
print(arr1)
series2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(series2)
print(arr2)
print(arr2.ndim)
print(arr2.shape)
print(arr2.dtype)
arr3 = np.zeros(10)
arr4 = np.zeros((2,3))
arr5 = np.ones((3,5))
arr6 = np.empty((2,3,3))
print(arr3)
print(arr4)
print(arr5)
print(arr6)
arr = np.array([1,2,3,4,5])
print(arr.dtype)
arr1 = arr.astype(np.float64)
print(arr1.dtype)
arr2 = arr1.astype(np.int32)
print(arr2.dtype)
arr = np.array(['1','2','3','4','5'])
print(arr.dtype)
print(arr.astype('float64').dtype)
print(arr)
print(arr.astype('float64'))
arr1 = np.arange(10)
print(arr1.dtype)
arr2 = np.array([1.0,2.0,3.0,4.0,5.0])
print(arr2.dtype)
arr3 = arr1.astype(arr2.dtype)
print(arr3.dtype)
arr4 = arr3.astype('u4')
print(arr4)
arr = np.array([[1,2,3],[4,5,6]])
print(arr)
arr1 = 1/arr
print(arr1)
arr2 = arr*arr
print(arr2)
arrsqrt = arr**0.5
print(arrsqrt)
arr2> arr
4.3 numpy的索引和切片
arr = np.arange(10)
print(arr)
arr[4]
arr[5:8]
arr[5:8] = 12
print(arr)
slice_arr = arr[5:8]
slice_arr[1] = 10
print(arr)
slice_arr[:] = 10
print(arr)
slice_arr = arr[5:8].copy()
slice_arr[:] = 111
print(arr)
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr)
print(arr[0,1])
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(arr3d)
print(arr3d[0])
print(arr3d[1])
slice_arr3d = arr3d[0].copy()
arr3d[0] = 23
print(arr3d)
arr3d[0] = slice_arr3d
print(arr3d)
print(arr3d[0,1,2])
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2d[:2])
print(arr2d[:2,1:])
print(arr2d[1,:2])
print(arr2d[:,:2])
arr2d[:2, 1:] = 0
print(arr2d)
names = np.array(['zhao','qian','sun','li','fen','chen','chu','wei'])
data = np.random.randn(8,4)
print(names)
print(data)
names == 'sun'
print('\n')
print(data[names=='sun'])
print(data[names=='sun',:2])
print(data[names!='sun',:2])
print('\n')
cond = names=='sun'
print(data[~cond,:2])
print(data[(names=='sun')|(names=='zhao'),:2])
data[data<0] = 0
print(data)
data[names!='sun'] = 100
print(data)
arr = np.empty((8,4))
for i in range(8):
arr[i] = i
print(arr[[2,3,1,5]])
print(arr[[-1,-2,-3]])
print(arr[[1,2,3,4],[0,1,2,3]])
print(arr[[1,2,3,4]][:,[0,1,2,3]])
4.4 numpy的基本运算
- 转置:.T transpose() swapaxes()
- 元素级别的运算:根号、指数、maximum
arr = np.arange(15).reshape((3,5))
print(arr)
print(arr.T)
print(np.dot(arr.T,arr))
arr = np.arange(16).reshape((2,2,4))
print(arr)
print(arr.transpose((1,0,2)))
print(arr.swapaxes(1,2))
arr = np.arange(10)
print(np.sqrt(arr))
print(np.exp(arr))
x = np.random.randn(8)
y = np.random.randn(8)
np.maximum(x,y)
arr = np.random.randn(7)*5
remainder, whole_part = np.modf(arr)
print(remainder)
print(whole_part)
4.5 numpy的数据处理
- 随机数组的生成
- 可视化
- 条件逻辑对数组进行处理
- 描述性统计
- 排序
- 集合、唯一化
- 存储和加载
points = np.arange(-5,5,0.01)
xs,ys = np.meshgrid(points,points)
z = np.sqrt(xs ** 2 + ys ** 2)
print(z)
plt.imshow(z,cmap=plt.cm.gray)
plt.colorbar()
plt.title('Image plot of $\sqrt{x^2+y^2}$ for a grid of values')
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True,False,True,True,False])
result = [(x if c else y)
for x,y,c in zip(xarr,yarr,cond)]
print(result)
result = np.where(cond,xarr,yarr)
print(result)
arr = np.random.randn(3,4)
print(arr>0)
result = np.where(arr>0,2,-2)
print(result)
result = np.where(arr>0,2,arr)
print(result)
arr = np.random.randn(5,4)
print(arr)
print(arr.mean())
print(np.mean(arr))
print(arr.mean(axis=1))
print(arr.mean(axis=0))
print(arr.sum(1))
arr = np.array([0,1,2,3,4,5,6,7,8,9])
print(arr.cumsum())
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr.cumsum(axis=0))
print(arr.cumprod(axis=1))
arr = np.random.randn(100)
print((arr>0).sum())
bools = np.array([False,False,True,False])
print(bools.any())
print(bools.all())
arr = np.random.randn(6)
print(arr)
arr.sort()
print(arr)
arr = np.random.randn(3,4)
print(arr)
arr.sort(1)
print(arr)
large_arr = np.random.randn(1000)
large_arr.sort()
large_arr[int(0.05*len(large_arr))]
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
print(np.unique(names))
print(sorted(set(names)))
values = np.array([1,2,3,4,5,6,7,2])
print(np.in1d(values,[2,3,6]))
print(np.intersect1d(values,[2,3,5]))
print(np.union1d(values,[2,3,5,8]))
print(np.setdiff1d(values,[2,3,5,8]))
print(np.setxor1d(values,[2,3,9,0]))
arr = np.random.randn(3,4)
np.save('some_array',arr)
arr_load = np.load('some_array.npy')
print(arr_load)
np.savez('arr_group.npz',a=arr,b=arr_load)
group = np.load('arr_group.npz')
print(group['a'])
4.6 numpy线性代数
- 矩阵乘法
- 矩阵分解QR SVD
- 逆、行列式、特征值
- 方程组
x = np.array([[1,2,3],[4,5,6]])
y = np.array([[2,3,4],[5,6,7],[6,7,8]])
print(np.dot(x,y))
print(x.dot(y))
from numpy.linalg import inv,qr,det
X = np.random.randn(4,4)
mat = X.T.dot(X)
A = inv(mat)
Q = A.dot(mat)
P = A.dot(X.T)
print(P)
print(Q)
q,r= qr(mat)
print(q)
print(r)
print(mat.trace())
print(np.linalg.det(mat))
4.7 伪随机数的生成
data = np.random.normal(size=(4,4))
print(data)
from random import normalvariate
N=1000000
samples = [normalvariate(0,1) for _ in range(N)]
samples = np.random.normal(size=N)
np.random.seed(1234)
arr = np.random.randn(10)
print(arr)
np.random.seed(1233)
arr = np.random.randn(10)
print(arr)
4.7 随机漫步实例
- 纯python语法
- numpy累计求和
- 模拟随机游走的多次实现
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
step = 1 if random.randint(0,1) else -1
position += step
walk.append(position)
plt.plot(walk[:100])
nsteps = 1000
draws = np.random.randint(0,2,size=nsteps)
steps = np.where(draws>0,1,-1)
walk = steps.cumsum()
print(walk[:100])
plt.plot(walk[:100])
minwalk = walk.min()
maxwalk = walk.max()
print(minwalk,maxwalk)
t = (np.abs(walk)>=10).argmax()
print(t)
nwalks = 5000
nsteps = 1000
steps = np.random.randint(0,2,size=(nwalks,nsteps))
steps = np.where(steps>0,1,-1)
walk = np.cumsum(steps,axis=1)
print(walk[:100])
min_walk = walk.min()
max_walk = walk.max()
print(min_walk,max_walk)
hits30 = (np.abs(walk)>=30).any(1)
print(hits30)
hist30.sum()
crossing_times = (np.abs(walk[hits30])>=30).argmax(1)
print(crossing_times)
print(crossing_times.mean())