NumPy学习笔记

产生随机多维数组(小数)a = np.random.random((3,3))
产生随机多维数组(整数)a = np.random.randint(0,10,size=[3,3])
求某列的最大值max_value = a[:,1].max()或者max_value = np.max(a[:,1])
求某列最大值的索引max_index = a[:,1].argmax()或者max_index = np.argmax(a[:,1])
判断两个数组是否完全相同:https://blog.csdn.net/tintinetmilou/article/details/78555486
numpy数组A按某列逆序排序:

ind_sort = A[:, 3].argsort()[::-1]   # 获取按某列逆序排序的索引
A_sort = A[ind_sort]
#读取文件
import numpy as np
world_alcohol = np.genfromtxt("world_alcohol.csv", dtype="U75", skip_header=1, delimiter=",")
#返回的world_alcohol是一个list of lists
country_is_algeria = world_alcohol[:, 2] == "Algeria"  #把world_alcohol的第3列与"Algeria"比较,返回一个布尔列表
country_algeria = world_alcohol[country_is_algeria, :] #返回world_alcohol中country_is_algeria为真的那一行

is_algeria_and_1986 = (world_alcohol[:, 0]=="1986") & (world_alcohol[:, 2]=="Algeria")
rows_with_algeria_and_1986 = world_alcohol[is_algeria_and_1986, :]  #返回true的那些行构成矩阵

#把第一列中的字符串“1986”换成“2014”,两种方法:
#方法一:
world_alcohol[:,0][world_alcohol[:,0] == '1986'] = '2014'
#方法二:
first_column_1986 = world_alcohol[:, 0] == "1986"
world_alcohol[first_column_1986, 0] = "2014"
#将缺失值置0(字符串)
is_value_empty = world_alcohol[:, 4] == ""
world_alcohol[is_value_empty, 4] = "0"
#将U75类型转化为浮点数类型
alcohol_consumption = world_alcohol[:, 4]
alcohol_consumption = alcohol_consumption.astype(float)
total_alcohol = alcohol_consumption.sum()  #求和
#把country和alcohol consumption对应组成字典
totals = {}
is_year = world_alcohol[:,0] == "1989"
year = world_alcohol[is_year,:]

for country in countries:
    is_country = year[:,2] == country
    country_consumption = year[is_country,:]
    alcohol_column = country_consumption[:,4]
    is_empty = alcohol_column == ''
    alcohol_column[is_empty] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals[country] = alcohol_column.sum()
#遍历字典找值最大的键值对
highest_value = 0
highest_key = None
for country in totals:
    consumption = totals[country]
    if highest_value < consumption:
        highest_value = consumption
        highest_key = country

nan and na values are types of missing data:
nan:stands for “not a number”, is a data type used to represent missing values.
na :stands for “not available”, when the value doesn’t exist.
nan和inf值判断

np.isnan(dataset)           
np.isinf(dataset)           
np.isfinite(dataset)        

np.argwhere(np.isnan(dataset) 
np.argwhere(np.isinf(dataset) 
                            
np.any(np.isnan(dataset))   
np.any(np.isinf(dataset))   
np.all(np.isfinite(dataset))

你可能感兴趣的:(Python,NumPy,Python)