数据 day2

Number of dim: 维度
Shape: 第一个数字是行 第二个数字是列
Size: 有多少个数字
print(np.argmin(A))
np.split(A,2,axis=1))
#A = np.arange(14,2,-1).reshape((3,4))
#print(A)
#print(np.sort(A))
#print(np.clip(A,5,9))
a = np.array([[1,1],[1,0]])
b =np.arange(4).reshape((2,2))
c= a*b
print©
c_dot=np.dot(a,b)
c_dot_2=a.dot(b)
print(c_dot)
print(c_dot_2)

formater="{0:.03f}".format
df.applymap(formater) #### 以参数的形式
formater= lambda x:"%.03f" % x
df.applymap(formater) ####小数点保留三位

movie lens

unanme =(“user_id”,“gender”,“age”,“occupation”,“zipcode”)
pd.read_table(“users.dat”,sep=)
unanme =(“user_id”,“gender”,“age”,“occupation”,“zipcode”)
users=pd.read_table(“users.dat”,sep="::",header=None,names=unanme)
rating_names =(“user_id”,“movie_id”,“rating”,“timsstamp”)
ratings=pd.read_table(“ratings.dat”,sep="::",header=None,names=rating_names)
movie_names=(“movie_id”,“title”,“genres”)
movies=pd.read_table(“movies.dat”,sep="::",header=None,names=movie_names)
movies_data = pd.merge(pd.merge(users,ratings),movies)

movies_data[movies_data.user_id ==1]
找出ID是1 的用户评价的电影

ratings_by_gender =movies_data.pivot_table(values=“rating”,index=“title”,columns=“gender”,aggfunc=“mean”)
ratings_by_gender[“diff”]=ratings_by_gender.F - ratings_by_gender.M
ratings_by_gender.sort_values(by=“diff”,ascending=True)
ratings_by_title=movies_data.groupby(“title”).size()
ratings_by_title.sort_values(ascending=False).head(20)
mean_ratings=movies_data.pivot_table(values=“rating”,index=“title”,aggfunc=“mean”)

mean_ratings1=movies_data.groupby(“title”).mean()

mean_ratings.sort_values(by=“rating”,ascending=False).head(10)

top_10_hot=ratings_by_title.sort_values(ascending=False).head(10)
top_10_hot

mean_rating1[top_10_hot.index]

top_20_score=mean_ratings.sort_values(by=“rating”,ascending=False).head(20)

你可能感兴趣的:(数据 day2)