series [ˈsɪəriːz] 系列 级数
import pandas as pd
s1 = pd.Series([1, 2, 3]) # 默认创建索引
print(s1)
print(s1.index)
s2 = pd.Series({‘山东’: 1.1, ‘北京’: 1.2}) # 隐式指定索引和数据
s3 = pd.Series(data=[2.4, 5.6], index=[‘天津’, ‘上海’]) # 显示指定索引和数据
print(s2)
print(s3)
print(s2.index)
print(s2[0])
print(s2[‘山东’])
import pandas as pd
s1 = pd.Series({‘山东’: 1.1, ‘北京’: 1.2})
print(s1)
d1 = {‘山东’: 1.1, ‘北京’: 1.2}
print(d1)
d1.update({‘山西’: 0.3}) # 在字典中,用update增加一个键值对
print(d1)
s2 = s1.append(pd.Series({‘山西’: 0.3})) # 用append增加元素,但必须要用pd.Series构建
print(s1) # s1没有变化
print(s2) # s2从新创建了一个对象
print(s2[‘山西’])
newSeries = pd.Series({‘陕西’: 0.55, ‘广州’: 0.7})
s3 = s2.append(newSeries) # 用一个已有eries对象作为参数添加元素
print(s3)
import pandas as pd
d1 = {‘山东’: 1.1, ‘北京’: 1.2}
s1 = pd.Series(d1) # 直接用字典创建一个pandas对象
print(s1)
s3 = s1.drop(‘北京’) # 用drop方法删除一个pandas元素,也必须生成一个新的pandas对象
print(s3)
s4 = s1.append(pd.Series({‘北京’: 1.3})) # pandas对象元素可以重复
print(s4)
s5 = s4.drop(‘北京’) # 都会删除
print(s5)
print(‘北京’ in s4.index) # 北京是否在s4的里面,如果有,返回true,否则,返回false
if ‘山西’ in s4.index: # 删除的时候,需要判断,这样就不会抛出异常
s4.drop(‘山西’)
import pandas as pd
d1 = {‘山东’: 1.1, ‘北京’: 1.5}
s1 = pd.Series(d1)
print(s1)
s1[‘山东’] = 1.8 # 修改pandas对象元素,不会创建新的对象
print(s1)
print(s1[0])
print(s1[‘山东’])
import pandas as pd
df = pd.DataFrame([[‘小红’, 2, 5],
[‘小张’, 7, 1]])
print(df)
df = pd.DataFrame([[‘小红’, 2, 5],
[‘小张’, 7, 1]], columns=[‘姓名’, ‘年龄’, ‘身高’]) # 添加列标签
df = pd.DataFrame([[‘小红’, 2, 5],
[‘小张’, 7, 1]], columns=[‘姓名’, ‘年龄’, ‘身高’], index=[‘page0’, ‘page2’]) # 添加行,列标签
df = pd.DataFrame({‘姓名’: [‘小红,小张’],
‘年龄’: [3, 5],
‘身高’: [1.43, 1.23]}, index=[‘page0’, ‘page1’])
print(df)
import pandas as pd
df = pd.read_csv(‘IMDB-Movie-Data.csv’)
print(df.info())
print(df.describe())
import pandas as pd
df = pd.DataFrame({‘姓名’: [‘小红,小张’],
‘年龄’: [3, 5],
‘身高’: [1.43, 1.23]}, index=[‘page0’, ‘page1’])
s1 = pd.Series({“姓名”: ‘jiang’, “年龄”: 4, “身高”: 1.56})
s1.name = ‘index2’ # 这个name要转为行标签
df2 = df.append(s1)
print(df2)
import pandas as pd
df = pd.DataFrame({‘姓名’: [‘小红,小张’],
‘年龄’: [3, 5],
‘身高’: [1.43, 1.23]}, index=[‘page0’, ‘page1’])
print(df)
df2 = df.append(df) # 行合并
print(df2)
df[‘分数’] = [69, 30] # 末尾增加1列
print(df)
df.insert(2, ‘籍贯’, [‘河北’, ‘莫旗’]) # insert直接按原有的数据框对象上
print(df)
import pandas as pd
df = pd.DataFrame({‘姓名’: [‘小红’, ‘小张’],
‘年龄’: [3, 5],
‘身高’: [1.43, 1.23]}, index=[‘page0’, ‘page1’])
df[‘分数’] = [69, 30] # 末尾增加1列
df.insert(2, ‘籍贯’, [‘河北’, ‘莫旗’]) # insert直接按原有的数据框对象上
print(df)
df2 = df.drop(‘年龄’, axis=1) # 删除一列,生成新的对象,并没有删除原有df对象
print(df2)
df3 = df2.drop(‘page0’, axis=0) # 删除一行
print(df3)
import pandas as pd
df_imdb = pd.read_csv(‘IMDB-Movie-Data.csv’)
print(df_imdb)
print(‘*’ * 30)
print(df_imdb.iloc[:, 1]) # 返回title列,用iloc函数,用的是【整数】索引
print(df_imdb.loc[:, ‘Title’]) # 返回title列,用loc函数,用的是【标签】索引
print(df_imdb.iloc[:, :3]) # 访问前3列
print(df_imdb.loc[:, [‘Title’, ‘Rank’, ‘Genre’]]) # 访问前3列
print(df_imdb.iloc[0]) # 整数索引
print(df_imdb.loc[0]) # 标签索引,注意,这个标签只不过是数值,但和数值表达的意思是不同的
print(df_imdb.iloc[5:10])
print(df_imdb.loc[5:10])
df2 = df_imdb[(df_imdb[‘Year’] == 2014) & (df_imdb[‘Rating’] > 7.0)] # 必须加括号
print(df2.shape)
import pandas as pd
df_imdb = pd.read_csv(‘IMDB-Movie-Data.csv’)
print(df_imdb)
print(‘*’ * 30)
df_imdb.loc[0, ‘Title’] = ‘Guardians of the Galaxy2’
print(df_imdb)
df_s = df_imdb[(df_imdb[‘Year’] == 2014) & (df_imdb[‘Rating’] > 7)]
print(df_s.shape)
df_imdb.loc[(df_imdb[‘Year’] == 2014) & (df_imdb[‘Rating’] > 7),
‘Title’] += “-1” # loc方法,原数据会改变
print(df_imdb)
df_imdb[‘Title’] += ‘-1’
print(df_imdb)
import pandas as pd
df_imdb = pd.read_csv(‘IMDB-Movie-Data.csv’)
print(df_imdb.head())
print(‘*’ * 30)
def iterRowsBy1():
for i, row in df_imdb.iterrows(): # 执行效率低
print(i)
print(row)
iterRowsBy1()
def iterRowsBy2():
for nt in df_imdb.itertuples(): # 执行效率高
print(nt)
iterRowsBy2()