代码:
# coding=utf-8
import numpy as np
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t2 = np.loadtxt(us_file_path,delimiter=",",dtype="int")
print(t1)
print("c"*100)
print(t2)
效果:
每行4个数字分别代表点击,喜欢,不喜欢,评论数量的点击量
代码:
# coding=utf-8
import numpy as np
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
us_data = np.loadtxt(us_file_path,delimiter=",",dtype="int")
uk_data = np.loadtxt(us_file_path,delimiter=",",dtype="int")
zeros_data = np.zeros((us_data.shape[0],1)).astype(int)
ones_data = np.ones((uk_data.shape[0],1)).astype(int)
us_data = np.hstack((us_data,zeros_data))
uk_data = np.hstack((uk_data,ones_data))
final_data = np.vstack((us_data,uk_data))
print(final_data)
代码:
# coding=utf-8
import numpy as np
t1 = np.arange(12).reshape((3,4)).astype("float")
t1[1,2:] = np.nan
def fill_ndaarray(t1):
for i in range(t1.shape[1]):
temp_col = t1[:,i]
nan_num = np.count_nonzero(temp_col != temp_col)
if nan_num !=0:
temp_not_nan_col = temp_col[temp_col == temp_col]
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
return t1
if __name__ =='__main__':
t1 = np.arange(12).reshape((3, 4)).astype("float")
t1[1, 2:] = np.nan
print(t1)
t1 = fill_ndaarray(t1)
print(t1)
初始代码:
# coding=utf-8
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t_us = np.loadtxt(us_file_path,delimiter=",",dtype="int")
t_us_comment = t_us[:,-1]
print(t_us_comment.max(),t_us_comment.min())
d= 10000
bin_nums = (t_us_comment.max()-t_us_comment.min())//d
plt.figure(figsize=(20,8),dpi=80)
plt.hist(t_us_comment,bin_nums)
plt.show()
更改后的代码:
# coding=utf-8
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t_us = np.loadtxt(us_file_path,delimiter=",",dtype="int")
t_us_comment = t_us[:,-1]
t_us_comment = t_us_comment[t_us_comment<=5000]
print(t_us_comment.max(),t_us_comment.min())
d= 250
bin_nums = (t_us_comment.max()-t_us_comment.min())//d
plt.figure(figsize=(20,8),dpi=80)
plt.hist(t_us_comment,bin_nums)
plt.show()
代码:
# coding=utf-8
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t_uk = np.loadtxt(uk_file_path,delimiter=",",dtype="int")
t_uk=t_uk[t_uk[:,1]<=500000]
t_uk_comment = t_uk[:,-1]
t_uk_like = t_uk[:,1]
plt.figure(figsize=(20,8),dpi=80)
plt.scatter(t_uk_like,t_uk_comment)
plt.show()