```python import pandas as pd import numpy as np ``` ```python s = pd.Series([1,3,6,np.nan,44,1]) s ------------ 0 1.0 1 3.0 2 6.0 3 NaN 4 44.0 5 1.0 dtype: float64 ``` ```python dates = pd.date_range('20191217',periods=6) dates ------------ DatetimeIndex(['2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20', '2019-12-21', '2019-12-22'], dtype='datetime64[ns]', freq='D') ``` ```python df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d']) df ----------------- a b c d 2019-12-17 -0.148546 -0.693356 0.121830 -0.373264 2019-12-18 1.120692 1.023919 1.860231 -0.604606 2019-12-19 0.628022 -1.118451 2.173190 -1.822923 2019-12-20 0.133966 -1.366570 1.076824 0.395217 2019-12-21 1.433090 -1.166129 1.398020 0.118124 2019-12-22 0.707654 1.716963 -2.136076 0.758152 ``` ```python dates = pd.date_range('20191217',periods=6) df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D']) df ---------------------------- A B C D 2019-12-17 0 1 2 3 2019-12-18 4 5 6 7 2019-12-19 8 9 10 11 2019-12-20 12 13 14 15 2019-12-21 16 17 18 19 2019-12-22 20 21 22 23 ``` ```python print(df['A']) 2019-12-17 0 2019-12-18 4 2019-12-19 8 2019-12-20 12 2019-12-21 16 2019-12-22 20 Freq: D, Name: A, dtype: int32 ``` ```python print(df[0:3]) ----------------------- A B C D 2019-12-17 0 1 2 3 2019-12-18 4 5 6 7 2019-12-19 8 9 10 11 ``` ```python print(df.loc['2019-12-17']) ---------------------- A 0 B 1 C 2 D 3 Name: 2019-12-17 00:00:00, dtype: int32 ``` ```python print(df.loc[:,['A','B']]) ------------------------- A B 2019-12-17 0 1 2019-12-18 4 5 2019-12-19 8 9 2019-12-20 12 13 2019-12-21 16 17 2019-12-22 20 21 ``` ```python print(df.iloc[3]) ---------------------- A 12 B 13 C 14 D 15 Name: 2019-12-20 00:00:00, dtype: int32 ``` ```python print(df.iloc[3,1]) --------------- 13 ``` ```python print(df.iloc[3:5,1:3]) --------------------- B C 2019-12-20 13 14 2019-12-21 17 18 ``` ```python print(df.iloc[[1,3,5],1:3]) ----------------------- B C 2019-12-18 5 6 2019-12-20 13 14 2019-12-22 21 22 ``` ```python print(df[df.A>8]) ------------------ A B C D 2019-12-20 12 13 14 15 2019-12-21 16 17 18 19 2019-12-22 20 21 22 23 ``` ```python df.iloc[2,2] = 1111 print(df) ---------------------- A B C D 2019-12-17 0 1 2 3 2019-12-18 4 5 6 7 2019-12-19 8 9 1111 11 2019-12-20 12 13 14 15 2019-12-21 16 17 18 19 2019-12-22 20 21 22 23 ``` ```python df.loc['2019-12-17','B'] = 222 df ---------------------- A B C D 2019-12-17 0 222 2 3 2019-12-18 4 5 6 7 2019-12-19 8 9 1111 11 2019-12-20 12 13 14 15 2019-12-21 16 17 18 19 2019-12-22 20 21 22 23 ``` ```python df.A[df.A>4] = 0 df --------------- A B C D 2019-12-17 0 222 2 3 2019-12-18 4 5 6 7 2019-12-19 0 9 1111 11 2019-12-20 0 13 14 15 2019-12-21 0 17 18 19 2019-12-22 0 21 22 23 ``` ```python df['E'] = pd.Series([1,2,3,4,5,6],index=pd.date_range('20191217',periods=6)) df --------------------- A B C D E 2019-12-17 0 0 2 3 1 2019-12-18 4 0 6 7 2 2019-12-19 0 0 1111 11 3 2019-12-20 0 0 14 15 4 2019-12-21 0 0 18 19 5 2019-12-22 0 0 22 23 6 ``` ```python df.iloc[0,1] = np.nan df.iloc[1,2] = np.nan df --------------- A B C D E 2019-12-17 0 NaN 2.0 3 1 2019-12-18 4 0.0 NaN 7 2 2019-12-19 0 0.0 1111.0 11 3 2019-12-20 0 0.0 14.0 15 4 2019-12-21 0 0.0 18.0 19 5 2019-12-22 0 0.0 22.0 23 6 ``` ```python df.dropna(axis=0,how='any') # how=['any','all'] ------------------ A B C D E 2019-12-19 0 0.0 1111.0 11 3 2019-12-20 0 0.0 14.0 15 4 2019-12-21 0 0.0 18.0 19 5 2019-12-22 0 0.0 22.0 23 6 ``` ```python df.iloc[1,1] = np.nan df ----------------- A B C D E 2019-12-17 0 NaN 2.0 3 1 2019-12-18 4 NaN NaN 7 2 2019-12-19 0 0.0 1111.0 11 3 2019-12-20 0 0.0 14.0 15 4 2019-12-21 0 0.0 18.0 19 5 2019-12-22 0 0.0 22.0 23 6 ``` ```python print(df.fillna(value=12)) ------------------ A B C D E 2019-12-17 0 12.0 2.0 3 1 2019-12-18 4 12.0 12.0 7 2 2019-12-19 0 0.0 1111.0 11 3 2019-12-20 0 0.0 14.0 15 4 2019-12-21 0 0.0 18.0 19 5 2019-12-22 0 0.0 22.0 23 6 ``` ```python print(df.isnull()) --------------------- A B C D E 2019-12-17 False True False False False 2019-12-18 False True True False False 2019-12-19 False False False False False 2019-12-20 False False False False False 2019-12-21 False False False False False 2019-12-22 False False False False False ``` ```python df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d']) df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d']) df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d']) print(df1) print(df2) print(df3) ----------------------- a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 a b c d 0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 2 1.0 1.0 1.0 1.0 a b c d 0 2.0 2.0 2.0 2.0 1 2.0 2.0 2.0 2.0 2 2.0 2.0 2.0 2.0 ``` ```python res = pd.concat([df1,df2,df3],ignore_index=True) res -------------------- a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 5 1.0 1.0 1.0 1.0 6 2.0 2.0 2.0 2.0 7 2.0 2.0 2.0 2.0 8 2.0 2.0 2.0 2.0 ``` ```python df4 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d']) df5 = pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e']) print(df4) print(df5) ----------------- a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 b c d e 0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 2 1.0 1.0 1.0 1.0 ``` ```python res = pd.concat([df4,df5],sort=True) res --------------------- a b c d e 0 0.0 0.0 0.0 0.0 NaN 1 0.0 0.0 0.0 0.0 NaN 2 0.0 0.0 0.0 0.0 NaN 0 NaN 1.0 1.0 1.0 1.0 1 NaN 1.0 1.0 1.0 1.0 2 NaN 1.0 1.0 1.0 1.0 ``` ```python res = pd.concat([df4,df5],axis=1) res ---------------- a b c d b c d e 0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 1 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ``` ```python df8 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d']) s1 = pd.Series([1,2,3,4],index=['a','b','c','d']) res = df8.append(s1,ignore_index=True) res --------------- ``` ```python a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 2.0 3.0 4.0 ```