#numpy 对数组的操作
import numpy as np
us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"
t1 = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据
t2 = np.loadtxt(uk_file_path,dtype="int",delimiter=",",unpack= True) #转置 把行变成列
print(t1)
print(t2)
#取一行
# print(t2[2])
#取多行
# print(t2[2:])
#取多行
# print(t2[[2,8,10]])
#取列
print("*" * 100)
# print(t2[:,0])
#取连续的多列
#print(t2[:,2:])
#取不连续的多列
# print(t2[:,[0,2]])
#取多行多列 ,3行4列值
# a = t1[2,3]
# print(a)
# print(type(a))
#取多行多列,取第3行到5行, 第2列到4列
#取得是交叉点的位置
# print(t2[2:5,1:4])
#取出多个不相邻的点 选出的结果 (0,0) (2,1) (2,3)
c = t2[[0,2,2],[0,1,3]]
print(c)
# 把数组的nan替换成中值
import numpy as np
def fill_ndarry(t1):
for i in range(t1.shape[1]): #取总列数
temp_col = t1[:,i] #当前一列
print(temp_col)
nan_num = np.count_nonzero(temp_col != temp_col)
if nan_num != 0: #不为0 说明当前一列有 nan
#当前一列不为nan
temp_not_nan_col = temp_col[temp_col == temp_col]
print(temp_not_nan_col)
#选中当前为nan位置,把值赋值为nan的均值
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
return t1
if __name__ == '__main__':
t1 = np.arange(12).reshape((3, 4)).astype("float")
# 赋值nan
t1[1, 2:] = np.nan
print(t1)
t2 = fill_ndarry(t1)
print(t2)
# 绘制评论直方统计图
import numpy as np
from matplotlib import pyplot as plt
us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"
t_us = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据
#获取评论数
t_us_comments = t_us [:,-1]
t_us_comments = t_us_comments[t_us_comments <= 5000]
d = 50
bin_mins = (t_us_comments.max() - t_us_comments.min()) // d
#绘图
plt.figure(figsize=(16,8),dpi=80)
plt.hist(t_us_comments, bin_mins)
plt.show()
#绘制评论喜欢散点图
import numpy as np
from matplotlib import pyplot as plt
us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"
t_uk = np.loadtxt(uk_file_path,dtype="int",delimiter=",") #从csv读取数据
t_uk = t_uk[t_uk[:,1] <= 500000]
t_uk_comment = t_uk[:,-1]
t_uk_like = t_uk[:,1]
plt.figure(figsize=(16,8), dpi=80)
plt.scatter(t_uk_like, t_uk_comment)
plt.show()
#分别数组添加列
import numpy as np
us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"
t1 = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据
t2 = np.loadtxt(uk_file_path,dtype="int",delimiter=",")
#添加信息,构造全为0的数据
zero_data = np.zeros((t1.shape[0],1)).astype(int)
ones_zero = np.ones((t2.shape[0],1)).astype(int)
#分别添加一列0,1的数组
t1 = np.hstack((t1, zero_data))
t2 = np.hstack((t2, ones_zero))
#拼接2组数组
final_data = np.vstack((t1,t2))
print(final_data)