pandas120道试题代码答案及解析----机器学习

#1.DataFrame基本操作
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#将下⾯的字典创建为DataFrame
data = {"grammer":["Python","C","Java","GO",np.nan,"SQL","PHP","Python"],"score":[1,2,np.nan,4,5,6,7,10]}
df=pd.DataFrame(data)

#提取数据–提取含有字符串“Python”的⾏
df['grammer'] #提取某一列
df[df['grammer']=='Python'] #提取某一列中等于某个值的行

#提取列名;columns中文意思为:柱
df.columns

#修改第二列列名为‘popularity’
df.rename(columns={'score':'popularity'},inplace=True)
#inplace:是否用新生成的列表替换原列表
#columns: 以字典的形式,赋予列名新的值

#统计grammer列中每种编程语⾔出现的次数
df['grammer'].value_counts()#对某一列进行计数

#统计一共出现多少个不重复的编程语言
df['grammer'].nunique()

#缺失值用上下值均值填充
df['popularity']=df['popularity'].fillna(df['popularity'].interpolate())
df

#提取popularity列中值大于3的行
df[df['popularity']>3]

#按照grammer列进行去重(删除重复行)
df.drop_duplicates(['grammer'])

#计算popularity列平均值
df['popularity'].mean()

#将grammer列转换为list
df['grammer'].to_list()

#将dataframe保存为excel
#df.to_excel('f1.xlsx')

#查看数据行列数形状
df.shape

#提取popularity列值大于3小于7的行
df[(df['popularity']>3)&(df['popularity']<7)]

#交换两列的位置
df=df[['popularity','grammer']]
df

#提取popularity列最大值所在的行
df[df['popularity']==df['popularity'].max()]

#查看最后5行
df.tail()

#删除最后一行数据
df=df.drop(labels=df.shape[0]-1)
df

# #添加一行数据(方法已弃用)
# row = {'grammer':'Perl','popularity':6.6}
# df = df.append(row,ignore_index=True)
# df

你可能感兴趣的:(python,人工智能)