目录
一、iterrows 方法遍历 DataFrame
二、itertuples() 方式遍历DataFrame:
三 iteritems 旧方法和新方法方法 遍历 DataFrame
四、速度 比拼 下述代码可单独运行:都是运行30000行或列访问其中一个元素。
for df2index, df2_row in df2.iterrows():
中df2index为索引,从0到len(df2)
在df2_row 这个row中,可以使用df2_row[0]、df2_row[1]、df2_row[2]、df2_row[3]
实际为row中每列的值
#后为当前语句运行结果
import pandas as pd
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(12)
print('t4:' + str(t4))
# t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
# t4.reshape(3,4):
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
# print('type(df):'+str(type(df)))
# print(df)
# print('type(t4):'+str(type(t4)))
# print('t4:'+str(t4))
print('df2:'+df2.to_string())
# df2: 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
for df2index, df2_row in df2.iterrows():
print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
print('df2_row:'+df2_row.to_string())
# df2_row: 0 0
# 1 1
# 2 2
# 3 3
print('df2_row['+str(df2index)+']:'+str(df2_row[df2index])) #用索引方式访问
# df2_row[0]: 0
i = 0
for i in range(len(df2_row)):
print('df2_row['str(i)+']:'+str(df2_row[i])) #用索引方式访问
# df2_row[i]: 0
# df2_row[i]: 1
# df2_row[i]: 2
# df2_row[i]: 3
完整运行结果:
t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
t4.reshape(3,4):[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
df2: 0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
df2index:0#len(df2_row):4
df2_row:0 0
1 1
2 2
3 3
df2_row[0]:0
df2_row[0]:0
df2_row[1]:1
df2_row[2]:2
df2_row[3]:3
df2index:1#len(df2_row):4
df2_row:0 4
1 5
2 6
3 7
df2_row[1]:5
df2_row[0]:4
df2_row[1]:5
df2_row[2]:6
df2_row[3]:7
df2index:2#len(df2_row):4
df2_row:0 8
1 9
2 10
3 11
df2_row[2]:10
df2_row[0]:8
df2_row[1]:9
df2_row[2]:10
df2_row[3]:11
Process finished with exit code 0
因我没有设置列名,所以用系统默认的列名:下划线加索引,同样可以访问
for df2_itertuples_row in df2.itertuples():
print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
print('df2_row:'+str(df2_itertuples_row))
print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
# df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
# df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
# 0 1 2 3
# df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
# df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
# 4 5 6 7
# df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
# df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
# 8 9 10 11
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index:2#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index:3#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
# D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items() 这个方法
# for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
# -----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
完整代码:
import pandas as pd
import numpy as np
# df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
# 'B': ['B1', 'B2', 'B3'],
# 'C': ['C1', 'C2', 'C3']},
# index=['ONE', 'TWO', 'THREE'])
# s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
# df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
# print(df)
# # D A B C
# # ONE 0 A1 B1 C1
# # TWO 0 A2 B2 C2
# # THREE 0 A3 B3 C3
#
# # df.insert(1, 'D', 0)
# # print(df)
#
# df.insert(len(df.columns), 'E', s)
# print(df)
# # D A B C E
# # ONE 0 A1 B1 C1 NaN
# # TWO 0 A2 B2 C2 X2
# # THREE 0 A3 B3 C3 X3
# ------------------------------
import pandas as pd
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(12)
print('t4:' + str(t4))
# t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(3, 4)))
# t4.reshape(3,4):
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(3, 4))
# print('type(df):'+str(type(df)))
# print(df)
# print('type(t4):'+str(type(t4)))
# print('t4:'+str(t4))
print('df2:'+df2.to_string())
# df2: 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
#一用 iterrows 方法遍历 DataFrame
# for df2index, df2_row in df2.iterrows():
# print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
# print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
# print('df2_row:'+df2_row.to_string())
# # df2_row: 0 0
# # 1 1
# # 2 2
# # 3 3
# print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
# # df2_row[0]: 0
# i = 0
# for i in range(len(df2_row)):
# print('df2_row['+str(i)+']:'+str(df2_row[i]))
# # df2_row[i]: 0
# # df2_row[i]: 1
# # df2_row[i]: 2
# # df2_row[i]: 3
# ---------------------------------
# 二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
print('df2_row:'+str(df2_itertuples_row))
print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
# df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
# df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
# 0 1 2 3
# df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
# df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
# 4 5 6 7
# df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
# df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
# 8 9 10 11
# ----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame
for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index:2#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index:3#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
# D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items 这个方法?
# for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
# -----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
代码:
import pandas as pd
import numpy as np
# df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
# 'B': ['B1', 'B2', 'B3'],
# 'C': ['C1', 'C2', 'C3']},
# index=['ONE', 'TWO', 'THREE'])
# s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
#
# df.insert(2, 'D', 0) #第一个参数是要加的列的位置,0为列的第一位,2为列的第3位,其实是给整列赋值。
# print(df)
# # D A B C
# # ONE 0 A1 B1 C1
# # TWO 0 A2 B2 C2
# # THREE 0 A3 B3 C3
#
# # df.insert(1, 'D', 0)
# # print(df)
#
# df.insert(len(df.columns), 'E', s)
# print(df)
# # D A B C E
# # ONE 0 A1 B1 C1 NaN
# # TWO 0 A2 B2 C2 X2
# # THREE 0 A3 B3 C3 X3
# ------------------------------
import pandas as pd
from datetime import datetime
from time import time
inp = [{'c1': 10, 'c2': 100}, {'c1': 11, 'c2': 110}, {'c1': 12, 'c2': 123}]
df = pd.DataFrame(inp)
t4 = np.arange(120000)
print('t4:' + str(t4))
# t4:[ 0 1 2 3 4 5 6 7 8 9 10 11]
print('t4.reshape(3,4):' + str(t4.reshape(30000, 4)))
# t4.reshape(3,4):
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
df2 = pd.DataFrame(t4.reshape(30000, 4))
# print('type(df):'+str(type(df)))
# print(df)
# print('type(t4):'+str(type(t4)))
# print('t4:'+str(t4))
print('df2:'+df2.to_string())
# df2: 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
start = time()
# 一用 iterrows 方法遍历 DataFrame
for df2index, df2_row in df2.iterrows():
# print('df2index:'+str(df2index)+'#len(df2_row):'+str(len(df2_row)))
# print('type(df2_row):'+str(type(df2_row)))
# type(df2_row): < class 'pandas.core.series.Series'>
# df2index: 0 # len(df2_row):4
# print('df2_row:'+df2_row.to_string())
# # df2_row: 0 0
# # 1 1
# # 2 2
# # 3 3
# print('df2_row['+str(df2index)+']:'+str(df2_row[df2index]))
print('df2_row['+str(df2index)+']:'+str(df2_row[0]))
# # df2_row[0]: 0
# i = 0
# for i in range(len(df2_row)):
# print('df2_row['+str(i)+']:'+str(df2_row[i]))
# # df2_row[i]: 0
# # df2_row[i]: 1
# # df2_row[i]: 2
# # df2_row[i]: 3
print('iterrows方法耗时:'+str(time()-start))
# ---------------------------------
start = time()
# 二、itertuples方法遍历 DataFrame
for df2_itertuples_row in df2.itertuples():
# print('df2index:'+str(df2_itertuples_row)+'#type(df2_itertuples_index):'+str(type(df2_itertuples_row)))
# df2index: 0 # len(df2_row):4
# print('df2_row:'+str(df2_itertuples_row))
# print(getattr(df2_itertuples_row, '_1'), getattr(df2_itertuples_row, '_2'),getattr(df2_itertuples_row, '_3'),getattr(df2_itertuples_row, '_4')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
print(getattr(df2_itertuples_row, '_1')) #用列名来访问,貌似仅能用列名来访问 # 这个可以作为遍历项目使用。
# df2index:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)#type(df2_itertuples_index):
# df2_row:Pandas(Index=0, _1=0, _2=1, _3=2, _4=3)
# 0 1 2 3
# df2index:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)#type(df2_itertuples_index):
# df2_row:Pandas(Index=1, _1=4, _2=5, _3=6, _4=7)
# 4 5 6 7
# df2index:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)#type(df2_itertuples_index):
# df2_row:Pandas(Index=2, _1=8, _2=9, _3=10, _4=11)
# 8 9 10 11
print('iitertuples方法耗时:'+str(time()-start))
# ----------------------------------------------
#三、 iteritems 老的方法 遍历 DataFrame
# for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
# print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
# print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])) #用索引的方法访问。
# df2_iteritems_row[0]:1#df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index:2#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:2#df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index:3#len(df2_iteritems_row):3
# type(df2_iteritems_row):
# df2_iteritems_row[0]:3#df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
# D:\develop\pycharmCode\exericse.py:86: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. 告诉我们这个方法以后不要用了,要用.items 这个方法?
# for df2_iteritems_index, df2_iteritems_row in df2.iteritems():
# -----------------------------------------------------------------------------------
#三、 iteritems 新的方法遍历 DataFrame,无报错提示:
df2 = pd.DataFrame(t4.reshape(4, 30000))
start = time()
for df2_iteritems_index, df2_iteritems_row in df2.items(): #用新的items()方法,无报错提示:
# print('df2_iteritems_index:'+str(df2_iteritems_index)+'#len(df2_iteritems_row):'+str(len(df2_iteritems_row)))
# df2_iteritems_index: 0 # len(df2_iteritems_row):3
# print('type(df2_iteritems_row):'+str(type(df2_iteritems_row)))
# type(df2_iteritems_row): <' class 'pandas.core.series.Series'>
# print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0])+'#df2_iteritems_row[1]'+str(df2_iteritems_row[1])+'#str(df2_iteritems_row[2])'+str(df2_iteritems_row[2])) #用索引的方法访问。
print('df2_iteritems_row[0]:'+str(df2_iteritems_row[0]) ) #用索引的方法访问。
# df2_iteritems_row[0]: 0 # df2_iteritems_row[1]4#str(df2_iteritems_row[2])8
# df2_iteritems_index: 1 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 1 # df2_iteritems_row[1]5#str(df2_iteritems_row[2])9
# df2_iteritems_index: 2 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 2 # df2_iteritems_row[1]6#str(df2_iteritems_row[2])10
# df2_iteritems_index: 3 # len(df2_iteritems_row):3
# type(df2_iteritems_row): < class 'pandas.core.series.Series'>
# df2_iteritems_row[0]: 3 # df2_iteritems_row[1]7#str(df2_iteritems_row[2])11
print('items方法耗时:'+str(time()-start))
结果
items方法耗时:3.169365882873535
iterrows方法耗时:2.4688100814819336
iitertuples方法耗时:0.17905783653259277 据说这种方法不能更改里面的值