pandas.DataFrame 增删改查详细操作

import numpy as np
import pandas as pd

data = {'city': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Hangzhou', 'Chongqing'],
       'year': [2016,2017,2016,2017,2016, 2016],
       'population': [2100, 2300, 1000, 700, 500, 500]}
frame = pd.DataFrame(data, columns = ['year', 'city', 'population', 'debt'])
# result = pd.DataFrame({'PassengerId':data_test['PassengerId'].as_matrix(), 
#   	'Survived':predictions.astype(np.int32)})

df1 = pd.DataFrame({'apts': [55000, 60000],
                   'cars': [200000, 300000],},
                  index = ['Shanghai', 'Beijing'])
df2 = pd.DataFrame({'apts': [25000, 20000],
                   'cars': [150000, 120000],},
                  index = ['Hangzhou', 'Najing'])
df3 = pd.DataFrame({'apts': [30000, 10000],
                   'cars': [180000, 100000],},
                  index = ['Guangzhou', 'Chongqing'])

df4 = pd.DataFrame({'apts': [55000, 60000, 58000],
                   'cars': [200000, 300000,250000],
                  'cities': ['Shanghai', 'Beijing','Shenzhen']})
df5 = pd.DataFrame({'salaries': [10000, 30000, 30000, 20000, 15000],
                  'cities': ['Suzhou', 'Beijing', 'Shanghai', 'Guangzhou', 'Tianjin']})

# 增
frame.ix[0] = np.arange(4) # 在第0行添加新行
frame.insert(0, 'temp', frame.year) # 在第0列处添加新列,名为temp
frame.ix[:, 'xx'] = np.arange(6) # 在末尾添加列
frame.reindex(index=list(frame.indexs),columns=list(df.columns) + ['E'])

df1.append(df2) # 往末尾添加dataframe
pd.concat([df1, df2, df3]) # 往末尾添加多个dataframe
pd.concat([df1, df2, df3], axis = 1) # 往末尾添加多个dataframe

	# 按照关键字做并
result = pd.merge(df4, df5, on='cities')
result2 = pd.merge(df4, df5, on='cities', how='outer') # 不存在的值置空

# 删
del frame['year'] # 删除列
frame = frame.drop(['city', 'debt'], axis = 1) # 删除多列
frame = frame.drop([0, 1, 2]) # 删除012行

frame.dropna() # 删除带有Nan的行
frame.dropna(axis = 1, how = 'all') # 删除全为Nan的列
frame.dropna(axis = 1, how = 'any') # 删除带有Nan的列
frame.dropna(axis = 0, how = 'all') # 删除全为Nan的行
frame.dropna(axis = 0, how = 'any') # 删除带有Nan的行 默认选项为此

# 改

	# 元素赋值
frame.loc[0, 'city'] = 'YunCheng'
frame.iloc[0, 0] = 2011
frame.at[0, 'city'] = 'YunCheng'
frame.iat[0, 0] = 2010

frame.fillna(value = 1) # 用1填充缺失值

	# 列赋值
frame['year'] = 2000 
frame.debt = np.arange(6)
val = pd.Series([200, 300, 500]) 
frame['debt'] = val

	# 行赋值
val = pd.Series(['aa', 2000, 500], index = ['city', 'year', 'population'])	
frame.loc[0] = val


# 查
frame.index # RangeIndex
frame.columns # Index
frame.values # ndarray

	# 元素查找
xx = frame.loc[0, 'city'] # 数据是什么类型,xx就是什么类型
xx = frame.loc[[0], ['city']] # DataFrame类型

	# 行查找 
df = frame.loc[0:2] # DataFrame类型 012共3行
df = frame.iloc[0:2] # DataFrame类型 01共1行
df = frame[0:3] # DataFrame类型 前三行
df = frame.ix[0] # Series类型 第0行

	# 列查找
df = frame.loc[:, 'city'] # Series 列查找
df = frame.loc[:, ['city', 'population']] # DataFrame类型 多列查找
df = frame.iloc[:, 0:2] # DataFrame类型 01列

df = frame['year'] # Series类型
df = frame.year # Series类型 同上
df = frame[['population', 'year']] # DataFrame类型 按照新列序
df = frame.filter(regex = 'population|year') # DataFrame类型 按照原列序
frame[frame.year > 2016]# 选择frame.year中>2016的行
frame[frame > 2016] # 会把<=2016的数赋值为Nan,其余不变
frame[frame.year.isin(['2016', '2015'])] #DataFrame 条件查找
frame[['city', 'year']][0:3] # DataFrame类型 

	# 块查找
df = frame.iloc[0:2, 0:2] # DataFrame类型

	# 条件查找
df = frame.year.notnull() # Series类型 true与false的一列
df = frame['year'].notnull() # Series 同上
df = frame[frame.year.notnull()] # DataFrame类型 按照year非空选择之后的结果
df = frame[frame.year.notnull()].values # ndarray类型 或者.as_matrix()
df = frame[frame.year == 2016][frame.city == 'Beijing'] # DataFrame
df = frame.debt[frame.year == 2016][frame.city == 'Beijing'] # Series


# 其他
df = frame.T
frame.year.count()
frame.year.value_counts()
frame.groupby('year').count()# groupby得到的是dataframe类型

你可能感兴趣的:(Python与机器/深度学习)