pandas 用法千千万,没有实战一个语法都记不住。
本文例题来自和鲸训练营, 非广,亲测社区干货很多。
import pandas as pd
import numpy as np
data = {"grammer":["Python","C","Java","GO",np.nan,"SQL","PHP","Python"],
"score":[1,2,np.nan,4,5,6,7,10]}
# 1.将字典创建为DataFrame
df = pd.DataFrame(data)
df
'''
grammer score
0 Python 1.0
1 C 2.0
2 Java NaN
3 GO 4.0
4 NaN 5.0
5 SQL 6.0
6 PHP 7.0
7 Python 10.0
'''
# 2.提取含有字符串"Python"的行
## 方法一
df[df['grammer' =='Python']
## 方法二
df.query("grammer = 'Python'")
## 方法三
results = df['grammer'].str.contains("Python")
results.fillna(value=False,inplace = True)
df[results]
# 3.输出df的所有列名
df.columns
# 4.修改第二列列名为'popularity'
df.rename(columns={'score':'popularity'}, inplace=True)
# 5.统计grammer列中每种编程语言出现的次数
df['grammer'].value_counts()
# 6.将空值用上下值的平均值填充
df['popularity'] = df['popularity'].fillna(df['popularity'].interpolate())
# 7.提取popularity列中值大于3的行
##方法一
df.query("popularity>3")
##方法二
df[df['popularity'] > 3]
# 8.按照grammer列进行去除重复值
df.drop_duplicates(['grammer'])
# 9.计算popularity列平均值
df['popularity'].mean()
# 10.将grammer列转换为list
df['grammer'].to_list()
# 11.将DataFrame保存为CSV
df.to_csv('df.csv')
# 12.查看数据行列数
df.shape
# 13.提取popularity列值大于3小于7的行
## 方法一
df[df['popularity']>3 & df['popularity']<7]
## 方法二
df.query("popularity>3 & popularity<7")
# 14.交换两列位置
cols = df.columns[[1,0]]
df[cols]
# 15.提取popularity列最大值所在行
df[df['popularity'] == df['popularity'].max()]
# 16.查看最后5行数据
df.tail(5)
# 17.删除最后一行数据
df.drop(len(df)-1)
# 18.添加一行数据['Perl',6.6]
## 方法一
data2 = {'grammer':'Perl','popularity':6.6}
df.append(pd.DataFrame(data2, index=[len(df)]))
## 方法二
df.loc[len(df)] = {'grammer':'Perl','popularity':6.6}
# 19.对数据按照"popularity"列值的大小进行排序
df.sort_values('popularity', ascending = False)
# 20.统计grammer列每个字符串的长度
df['len'] = df['grammer'].apply(lambda x : len(str(x)))
df