import numpy as np
import random
#使用numpy生成数组,得到ndarray的类型
t1 = np.array([1,2,3,])
print(t1) #[1 2 3]
print(type(t1)) #
t2 = np.array(range(10)) #[0 1 2 3 4 5 6 7 8 9]
print(t2)#[0 1 2 3 4 5 6 7 8 9]
print(type(t2)) #
t3 = np.arange(10)
print(t3)#[0 1 2 3 4 5 6 7 8 9]
print(t3.dtype)#int32
t4 = np.arange(4,10,2)
print(t4)#[4 6 8]
print(t4.dtype)#int32
print("*"*100)
#numpy中的数据类型
t5 = np.array(range(1,4),dtype="i1")
print(t5)#[1 2 3]
print(t5.dtype)#int8
#numpy中的bool类型
t6 = np.array([1,1,0,1,0,0],dtype=bool)
print(t6)#[ True True False True False False]
print(t6.dtype)#bool
#调整数据类型
t7 = t6.astype("int8")
print(t7)#[1 1 0 1 0 0]
print(t7.dtype)#int8
#numpy中的小数
t8 = np.array([random.random() for i in range(10)])
print(t8)#[0.22559017 0.27421865 0.22064019 0.3896223 0.22720133 0.79740277 0.86879759 0.74115348 0.79854583 0.31500414]
print(t8.dtype)#float64
#取两位小数 round(random.random(),2)
t9 = np.round(t8,2)
print(t9)#[0.23 0.27 0.22 0.39 0.23 0.8 0.87 0.74 0.8 0.32]
# coding=utf-8
import numpy as np
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
# t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t2 = np.loadtxt(us_file_path,delimiter=",",dtype="int")
# print(t1)
print(t2)
print("*"*100)
#取行
# print(t2[2])
#取连续的多行
# print(t2[2:])
#取不连续的多行;注意方括号的使用
# print(t2[[2,8,10]])
# print(t2[1,:])
# print(t2[2:,:])
# print(t2[[2,10,3],:])
#取列
# print(t2[:,0])
#取连续的多列
# print(t2[:,2:])
#取不连续的多列
# print(t2[:,[0,2]])
#取行和列,取第3行,第四列的值
# a = t2[2,3]
# print(a) #170708
# print(type(a)) #
#取多行和多列,取第3行到第五行,第2列到第4列的结果
#取的是行和列交叉点的位置
b = t2[2:5,1:4]
# print(b)
#取多个不相邻的点 此处应该注意
#选出来的结果是(0,0) (2,1) (2,3)
c = t2[[0,2,2],[0,1,3]]
print(c)
import numpy as np
def fill_ndarray(t1):
for i in range(t1.shape[1]): #遍历每一列
temp_col = t1[:,i] #当前的一列
nan_num = np.count_nonzero(temp_col!=temp_col)
if nan_num != 0: #不为0,说明当前这一列中有nan
temp_not_nan_col = temp_col[temp_col==temp_col] #当前一列不为nan的array
#选中当前为nan的位置,把值赋值为不为nan的均值
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
return t1
if __name__ == '__main__':
t1 = np.arange(12).reshape((3, 4)).astype("float")
t1[1, 2:] = np.nan
print(t1)
t1 = fill_ndarray(t1)
print(t1)
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
#t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t_us = np.loadtxt(us_file_path,delimiter=",",dtype="int")
#取评论的数据
t_us_comments = t_us[:,-1]
#选择比5000小的数据
t_us_comments = t_us_comments[t_us_comments<=5000]
print(t_us_comments.max(),t_us_comments.min())
d = 250
bin_nums = (t_us_comments.max()-t_us_comments.min())//d
#绘图
plt.figure(figsize=(20,8),dpi=80)
plt.hist(t_us_comments,bin_nums)
plt.show()
题目二:
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
#t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t_uk = np.loadtxt(uk_file_path,delimiter=",",dtype="int")
#选择喜欢书比50万小的数据
t_uk = t_uk[t_uk[:,1]<=500000]
t_uk_comment = t_uk[:,-1]
t_uk_like = t_uk[:,1]
#绘图
plt.figure(figsize=(20,8),dpi=80)
plt.scatter(t_uk_like,t_uk_comment)
plt.show()
import numpy as np
us_data = "./youtube_video_data/US_video_data_numbers.csv"
uk_data = "./youtube_video_data/GB_video_data_numbers.csv"
#加载国家数据
us_data = np.loadtxt(us_data,delimiter=",",dtype=int)
uk_data = np.loadtxt(uk_data,delimiter=",",dtype=int)
#添加国家信息
#构造全为零的数据
zeros_data = np.zeros((us_data.shape[0],1)).astype(int)
ones_data = np.ones((uk_data.shape[0],1)).astype(int)
#分别添加一列全为0,1的数组
us_data = np.hstack((us_data,zeros_data))
uk_data = np.hstack((uk_data,ones_data))
#拼接两组数据
final_data= np.vstack((us_data,uk_data))
print(final_data)