import numpy as np
data = np.array([1, 2, 3])
print(f"data:{data}") # data:[1 2 3]
print(type(data)) #
data1 = np.arange(10)
print(f"data1:{data1}") # data1:[0 1 2 3 4 5 6 7 8 9]
data2 = np.array(range(10))
print(f"data2:{data2}") # data2:[0 1 2 3 4 5 6 7 8 9]
data3 = np.array([1, 2, 3], dtype='i4')
print(data3) # [1 2 3]
print(type(data3)) #
print(data3.dtype) # int32
data4 = data3.astype('i1') # 改类型
print(data4.dtype) # int8
详情见 numpy数据类型
int8 ‘i1’ | int16 ‘i2’ | int32 ‘i3’ | int64 ‘i4’ |
---|---|---|---|
uint8 | uint16 | uint32 | uint64 |
float8 | float16 | float32 | float64 |
下面虽然不是numpy数据类型,但可以直接填在dtype=右边
int | 默认为int32,即’i4’ |
---|---|
float | 默认为float64,即’f8’ |
bool | 列表中元素0为False,其它为True |
data5 = np.array([random.random() for i in range(1, 10)])
print(data5)
# [0.6853715, 0.40404027, 0.7960919, 0.1866534, 0.59791729, 0.64194195, 0.57661904, 0.41688956, 0.02173108]
print(data5.dtype) # float64
# 1. 数组的round()
data6 = data5.round(2)
print(data6) # [0.69, 0.4, 0.8, 0.19, 0.6, 0.64, 0.58, 0.42, 0.02]
# 2. numpy的round()
data7 = np.round(data5, 2)
print(data7) # [0.69, 0.4, 0.8, 0.19, 0.6, 0.64, 0.58, 0.42, 0.02]
import numpy as np
t1 = np.arange(24).reshape(4, 6)
""" [[0, 1, 2, 3, 4, 5],
[6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]] """
print(np.shape(t1)) # (4, )
t2 = np.array(range(6)) # [0, 1, 2, 3, 4, 5]
print(t1 + t2)
""" [[ 0 2 4 6 8 10]
[ 6 8 10 12 14 16] 第1列加0,第2列加1,第3列加2...
[12 14 16 18 20 22]
[18 20 22 24 26 28]] """
t3 = np.arange(6).reshape(1, 6) # [[0, 1, 2, 3, 4, 5]]
print(t1 + t3)
""" [[ 0, 2, 4, 6, 8, 10],
[ 6, 8, 10, 12, 14, 16], 跟上面一样的结果
[12, 14, 16, 18, 20, 22],
[18, 20, 22, 24, 26, 28]] """
t4 = np.arange(4).reshape(4, 1)
""" [[0],
[1],
[2],
[3]] """
print(t1 + t4)
""" [[ 0 1 2 3 4 5]
[ 7 8 9 10 11 12] 第1行加0,第2行加1,第3行加2...
[14 15 16 17 18 19]
[21 22 23 24 25 26]] """
t5 = np.arange(18).reshape(3, 3, 2)
t6 = np.arange(9).reshape(3, 3, 1)
t7 = np.arange(3).reshape(3, 1, 1)
# t5 + t6 和 t5 + t7 都可以
t8 = np.arange(6).reshape(1, 3, 2)
t9 = np.arange(2).reshape(1, 1, 2)
# t5 + t8 和 t5 + t9 都可以
t10 = np.arange(6).reshape(3, 2)
# t5 + t10 可以
- 数组行和列相同,相同行列元素计算
- [1, 2, 3] + [4, 5, 6] = [5, 7, 9]
- 数组与一个数字计算,数组每行每列都与该数字计算
- [1, 2, 3] + 1 = [2, 3, 4]
- 二维数组可以与行相同,列为1的二维数组计算,结果是行计算
- (4, 6) + (4, 1) = (4, 6) 详见 t1 + t4
- 二维数组可以与列相同,行为1的二维数组计算,结果是列计算
- (4, 6) + (1, 6) = (6, 3) 详见 t1 + t3
- 二维数组可以与一维数组计算,结果是是列计算
- (4, 6) + [0, 1, 2, 3, 4, 5, 6] = (4, 6) 详见 t1 + t2
- 三维数组情况见上面
import numpy as np
us_file_address = "D:\\python_class_project\\dataAnalysis\\file\\us"
data = np.loadtxt(us_file_address, delimiter=",", dtype="i4")
print(data)
print('*' * 40)
# 取一行
newData = data[0]
# 取连续多行
newData = data[1:3] # [1, 3)
# 取不连续多行
newData = data[[0, 2]]
# 取一列
newData = data[:, 0]
# 取连续多列
newData = data[:, 2:]
# 取不连续多列
newData = data[:, [0, 2, 3]]
# 取一元素
newData = data[2, 3]
# 取一块
newData = data[2:, 0:3]
# 取多个指定元素
newData = data[[0, 3], [0, 3]]
import numpy as np
us_file_address = "D:\\python_class_project\\dataAnalysis\\file\\us"
data = np.loadtxt(us_file_address, delimiter=",", dtype="i4")
print(data)
""" [[ 145 64 78 20]
[ 241 111 45 20]
[1111 54 10 36]
[ 45 2 3 0]
[ 77 45 23 12]
[ 359 72 26 44]] """
newData = np.copy(data)
newData[:, [0, 2]] = 0
print(newData)
""" [[ 0 64 0 20]
[ 0 111 0 20]
[ 0 54 0 36]
[ 0 2 0 0]
[ 0 45 0 12]
[ 0 72 0 44]] """
newData = np.copy(data)
print(newData > 10)
""" [[ True True True True]
[ True True True True]
[ True True False True]
[ True False False False]
[ True True True True]
[ True True True True]] """
newData[newData > 10] = 1
print(newData)
""" [[ 1 1 1 1]
[ 1 1 1 1]
[ 1 1 10 1]
[ 1 2 3 0]
[ 1 1 1 1]
[ 1 1 1 1]] """
newData = np.copy(data)
newData = np.where(newData > 5, 100, 0)
print(newData)
""" [[100 100 100 100]
[100 100 100 100]
[100 100 100 100]
[100 0 0 0]
[100 100 100 100]
[100 100 100 100]] """
# 上面等价于下面
newData[newData > 5] = 100
newData[newData <= 5] = 0
print(newData)
""" [[100 100 100 100]
[100 100 100 100]
[100 100 100 100]
[100 0 0 0]
[100 100 100 100]
[100 100 100 100]] """
newData = np.copy(data)
newData = newData.clip(10, 100)
# <=10变为10 >=100变为100 之间不变
print(newData)
""" [[100 64 78 20]
[100 100 45 20]
[100 54 10 36]
[ 45 10 10 10]
[ 77 45 23 12]
[100 72 26 44]] """
import numpy as np
us_file_address = "D:\\python_class_project\\dataAnalysis\\file\\us"
data = np.loadtxt(us_file_address, delimiter=",", dtype="i4")
data = data.astype(float)
data[2, 2:] = np.nan
print(data)
""" [[ 145., 64., 78., 20.],
[ 241., 111., 45., 20.],
[1111., 54., nan, nan],
[ 45., 2., 3., 0.],
[ 77., 45., 23., 12.],
[ 359., 72., 26., 44.]] """
# nan != nan
print(data != data)
""" [[False, False, False, False],
[False, False, False, False],
[False, False, True, True],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False]] """
# 方法一
np.count_nonzero(data != data)
# 方法二
np.isnan(data)
print(np.isnan(data))
""" [[False, False, False, False],
[False, False, False, False],
[False, False, True, True],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False]] """
np.count_nonzero(np.isnan(data))
import numpy as np
us_file_address = "D:\\python_class_project\\dataAnalysis\\file\\us"
data = np.loadtxt(us_file_address, delimiter=",", dtype="i4")
data = data.astype(float)
data[2, 2:] = np.nan
# print(data)
""" [[ 145. 64. 78. 20.]
[ 241. 111. 45. 20.]
[1111. 54. nan nan]
[ 45. 2. 3. 0.]
[ 77. 45. 23. 12.]
[ 359. 72. 26. 44.]]"""
print(data.sum()) # nan
print(data.sum(axis=0)) # [1978. 348. nan nan] 每列求和
print(data.sum(axis=1)) # [307. 417. nan 50. 157. 501.] 每行求和
print(data.mean()) # nan
print(data.mean(axis=0)) # [329.66666667 58. nan nan] 每列均值
print(data.mean(axis=1)) # [76.75 104.25 nan 12.5 39.25 125.25] 每行均值
np.median(data, axis=0) # [193. 59. nan nan]
data.max(axis=0) # [1111., 111., nan, nan]
data.min(axis=0) # [45., 2., nan, nan]
np.ptp(data, axis=0) # [1066., 109., nan, nan]
data.std(axis=0) # [364.73308353, 32.56275992, nan, nan]
def fun(data):
for i in range(data.shape[1]):
data_col = data[:, i] # 一列
data_col_nan = np.count_nonzero(data_col != data_col) # 该列nan的个数
if data_col_nan > 0: # 判断该列是否有nan
data_col_not_nan = data_col[np.isnan(data_col) == False].sum() # 该列除nan外其它求和
data_col_not_nan_mean = data_col_not_nan / (data.shape[0] - data_col_nan) # 除nan以外均值
data_col[np.isnan(data_col) == True] = data_col_not_nan_mean # 将均值赋值给nan
data[:, i] = data_col
print(data)
""" [[ 145. 64. 78. 20.]
[ 241. 111. 45. 20.]
[1111. 54. nan nan]
[ 45. 2. 3. 0.]
[ 77. 45. 23. 12.]
[ 359. 72. 26. 44.]]"""
fun(data)
print(data)
""" [[ 145. 64. 78. 20. ]
[ 241. 111. 45. 20. ]
[1111. 54. 35. 19.2]
[ 45. 2. 3. 0. ]
[ 77. 45. 23. 12. ]
[ 359. 72. 26. 44. ]] """
import numpy as np
t1 = np.arange(1, 5).reshape(2, 2)
t2 = np.array(range(10, 18, 2)).reshape(2, 2)
t3 = np.vstack((t1, t2))
print(t3)
""" [[ 1 2]
[ 3 4]
[10 12]
[14 16]] """
t4 = np.vstack((t2, t1))
print(t4)
""" [[10 12]
[14 16]
[ 1 2]
[ 3 4]] """
t5 = np.hstack((t1, t2))
print(t5)
""" [[ 1 2 10 12]
[ 3 4 14 16]] """
data = np.arange(24).reshape(4, 6)
print(data)
""" [[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]
[18 19 20 21 22 23]] """
data[[1, 2]] = data[[2, 1]]
print(data)
""" [[ 0 1 2 3 4 5]
[12 13 14 15 16 17]
[ 6 7 8 9 10 11]
[18 19 20 21 22 23]] """
np.random.seed(10) # 随机种子
newData = np.random.randint(1, 10, (4, 4))
print(newData)
""" [[5 1 2 1]
[2 9 1 9]
[7 5 4 1]
[5 7 9 2]]"""
newData[:, [0, 3]] = newData[:, [3, 0]]
print(newData)
""" [[1 1 2 5]
[9 9 1 2]
[1 5 4 7]
[2 7 9 5]] """