数据合并 concat
import numpy as np
import pandas as pd
NumPy的concat
arr1 = np.random.randint(0, 10, (3, 4))
arr2 = np.random.randint(0, 10, (3, 4))
print(arr1)
print(arr2)
[[0 2 5 2]
[6 3 1 0]
[4 2 6 9]]
[[2 5 4 1]
[5 5 4 4]
[4 3 3 3]]
np.concatenate([arr1, arr2])
array([[0, 2, 5, 2],
[6, 3, 1, 0],
[4, 2, 6, 9],
[2, 5, 4, 1],
[5, 5, 4, 4],
[4, 3, 3, 3]])
np.concatenate([arr1, arr2], axis=1)
array([[0, 2, 5, 2, 2, 5, 4, 1],
[6, 3, 1, 0, 5, 5, 4, 4],
[4, 2, 6, 9, 4, 3, 3, 3]])
Series上的concat
index 没有重复的情况
ser_obj1 = pd.Series(np.random.randint(0, 10, 5), index=range(0,5))
ser_obj2 = pd.Series(np.random.randint(0, 10, 4), index=range(5,9))
ser_obj3 = pd.Series(np.random.randint(0, 10, 3), index=range(9,12))
print(ser_obj1)
print(ser_obj2)
print(ser_obj3)
0 4
1 3
2 0
3 5
4 0
dtype: int32
5 1
6 1
7 4
8 8
dtype: int32
9 8
10 9
11 6
dtype: int32
pd.concat([ser_obj1, ser_obj2, ser_obj3])
0 4
1 3
2 0
3 5
4 0
5 1
6 1
7 4
8 8
9 8
10 9
11 6
dtype: int32
pd.concat([ser_obj1, ser_obj2, ser_obj3], axis=1)
index 有重复的情况
ser_obj1 = pd.Series(np.random.randint(0, 10, 5), index=range(5))
ser_obj2 = pd.Series(np.random.randint(0, 10, 4), index=range(4))
ser_obj3 = pd.Series(np.random.randint(0, 10, 3), index=range(3))
print(ser_obj1)
print(ser_obj2)
print(ser_obj3)
0 1
1 4
2 7
3 6
4 0
dtype: int32
0 6
1 3
2 6
3 6
dtype: int32
0 2
1 0
2 0
dtype: int32
pd.concat([ser_obj1, ser_obj2, ser_obj3])
0 1
1 4
2 7
3 6
4 0
0 6
1 3
2 6
3 6
0 2
1 0
2 0
dtype: int32
pd.concat([ser_obj1, ser_obj2, ser_obj3], axis=1, join='inner')
DataFrame上的concat
df_obj1 = pd.DataFrame(np.random.randint(0, 10, (3, 2)), index=['a', 'b', 'c'],
columns=['A', 'B'])
df_obj2 = pd.DataFrame(np.random.randint(0, 10, (2, 2)), index=['a', 'b'],
columns=['C', 'D'])
print(df_obj1)
print(df_obj2)
A B
a 7 2
b 4 7
c 1 3
C D
a 8 3
b 5 8
pd.concat([df_obj1, df_obj2])
pd.concat([df_obj1, df_obj2], axis=1)