import numpy as np
np.__version__
'1.15.4'
为什么使用numpy
Python中的List
优点:不对存储的数据类型做约束
缺点:性能低,要检查每个元素的类型
L = [i for i in range(10)]
L
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
L[5]
5
L[5] = 100
L
[0, 1, 2, 3, 4, 100, 6, 7, 8, 9]
L[5] = 'welcome to dataAnalysis'
L
[0, 1, 2, 3, 4, 'welcome to dataAnalysis', 6, 7, 8, 9]
Python中的Array模块
优点:效率高
缺点:只能存一种数据类型,当作二维数组的时候,没有配备看成向量或者矩阵的相应运算
import array
arr = array.array('i', [i for i in range(10)])
arr
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
print(arr[5])
type(arr)
5
array.array
Numpy
NumPy 是一个运行速度非常快的数学库,主要用于数组计算
ndarray 对象是用于存放同类型元素的多维数组。
ndarray 中的每个元素在内存中都有相同存储大小的区域。
nparr =np.array([i for i in range(10)])
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
type(nparr)
numpy.ndarray
nparr[5] = 100
nparr
array([ 0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
nparr[5] = 'abc'
nparr
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
1 # numpy.array也不支持不同类型
----> 2 nparr[5] = 'abc'
3 nparr
ValueError: invalid literal for int() with base 10: 'abc'
nparr.dtype
dtype('int32')
nparr[5] = 3.78
nparr
array([0, 1, 2, 3, 4, 3, 6, 7, 8, 9])
机器学习和数据分析需要进行浮点数运算
nparr2 = np.array([1, 2, 3.1])
nparr2.dtype
dtype('float64')
npz = np.zeros(10, dtype=int)
npz
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
npz.astype(float)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
传入一个元组,矩阵
np.zeros((3, 5))
array([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])
np.zeros(shape=(3,5),dtype=int)
array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
np.ones(shape=(3,5),dtype=int)
array([[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]])
np.full(shape=(3,5),fill_value=666)
array([[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666]])
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
np.arange(30)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
[i for i in range(0, 20, 2)]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
np.arange(0, 20, 2)
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
np.arange(0, 1, 0.2)
array([0. , 0.2, 0.4, 0.6, 0.8])
np.linspace(0, 20, 10)
array([ 0. , 2.22222222, 4.44444444, 6.66666667, 8.88888889,
11.11111111, 13.33333333, 15.55555556, 17.77777778, 20. ])
np.linspace(0, 20 ,11)
array([ 0., 2., 4., 6., 8., 10., 12., 14., 16., 18., 20.])
随机数
import random
random.randint(0,1)
0
np.random.randint(0,2)
0
np.random.randint(0,1,size=10)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.random.randint(0, 10, size=(3, 5))
array([[1, 6, 2, 4, 8],
[6, 2, 8, 0, 5],
[4, 3, 3, 6, 4]])
np.random.random()
0.2649064457443836
np.random.random(10)
array([0.42376454, 0.41423952, 0.31159601, 0.9507154 , 0.78481039,
0.04563831, 0.2837555 , 0.79517423, 0.07130973, 0.36333672])
np.random.random(size=(3,5))
array([[0.74372819, 0.19361539, 0.55049249, 0.61318823, 0.59572234],
[0.52924052, 0.48043406, 0.3182225 , 0.23505616, 0.80289083],
[0.38699344, 0.73664022, 0.41708906, 0.77655059, 0.16897559]])
np.random.normal()
0.17708003261260866
np.random.normal(loc=10,scale=100)
142.79254007123254
np.random.normal(loc=10,scale=100,size=(3,5))
array([[ 141.99356156, 129.65905839, -161.21423339, -138.90031389,
-64.04098652],
[ -48.89692419, -30.48151914, -95.22605902, -131.9976962 ,
-123.3116925 ],
[ -8.08422052, -9.32540604, 34.75224011, -138.02281502,
-63.92428487]])