df.loc[32]
df.iloc[32,:]
df.iloc[32]
df[['xxx']]
#1.提取xxx1, xxx2, xxx3列数据:
df[['xxx1', 'xxx2', 'xxx3']]
#2.提取除倒数后三列之外的全部列数据:
df.iloc[:, : -3]
#方法一:
df.iloc[[1, 10, 15], 0]
#方法二:
df['col1'][[1, 10, 15]]
#方法三:
df['col1'].take([1, 10, 15])
df.xxx.duplicated()
df.duplicated()
df.duplicated(subset = ['xxx1','xxx2'])
#方法一:
df[df.index.duplicated()]
#方法二:
df.index.is_unqiue
df[df['xxx'] > 3]
df[(df['xxx'] > 3) & (df['xxx'] < 7)]
df[df['xxx'] == df['xxx'].max()]
df[(df['xxx1'] + df['xxx2']) > 60000].tail(3)
data[data['xxx'].isnull()]
for columname in data.columns: #遍历每一列
if data[columname].count() != len(data): #判断缺失行条件:所在列的值数等于总数据的长度
#将存在缺失值的行的索引转换成列表储存
loc = data[columname][data[columname].isnull().values == True].index.tolist()
print('列名:"{}",第{}行位置有缺失值'.format(columname, loc))
for i in range(len(data)):
if type(data['换手率(%)'][i]) != float: #data['换手率(%)'][i]或写成data.iloc[i, 13]
temp = temp.append(data.loc[i])
temp.head()
# 方法一:isin()
data[data['xxx'].isin(['--'])]
# 方法二:contains()
df[df["xxx"].str.contains("--")]
# 方法一:
df[df['grammer'] == 'Python']
# 方法二:
results = df['grammer'].str.contains('Python')
results.fillna(value=False, inplace=True)
df[results]
# 方法一:isin()
df[df['xxx'].isin(['England', 'Italy', 'Russia'])] [['xxx', 'xxx1']]
# 方法二:loc提取
df.loc[df['xxx'].isin(['England', 'Italy', 'Russia']), ['xxx','xxx1']]
# 方法一:match函数
df[df['xxx'].str.match('数据')]
# 方法二:startswith函数
df[df['xxx'].str.startswith('数据')]
df['col1'][~df['col1'].isin(df['col2'])] #~取反
'''
isin()函数:判断数据是否存在,返回布尔型向量,可用于数据的过滤
Series.isin(vals):判断Series的内容是否再vals中
Index.isin(vals):判断索引值是否在vals中
DataFrame.isin(vals):
vals为列表时,判断DataFrame中数据是否在vals中
vals为字典时,判断DataFrame中对应列的数据是否在vals中
'''
# 先将两列使用append()按行合并,再用计数函数:
temp = df['col1'].append(df['col2'])
temp.value_counts(ascending=False)#不加index,返回的是一个Series
temp.value_counts(ascending=False).index[:3] #返回一个数组
#方法一:
df[df['col1'] % 5 == 0].index
#方法二:np.argwhere
np.argwhere(np.array(df['col1'] % 5 == 0))