pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
1.Series
import pandas as pd
s = pd.Series([1,2,3,4,4,5])
print(s)
0 1
1 2
2 3
3 4
4 4
5 5
dtype: int64
2.DataFrame,它表示的是二维数组
df = pd.DataFrame(np.random.randn(6,4),columns=list('ABCD'))
print(df)
A B C D
0 1.319645 0.200689 0.465237 1.096319
1 -1.161289 1.425719 -0.933799 0.916855
2 -0.400188 -1.230783 2.036401 0.040048
3 1.058809 0.386778 -0.412866 1.804066
4 -0.711314 1.303099 2.134634 0.324449
5 0.599744 -0.392085 -0.144836 0.167158
print(df.shape)
(6,4)
print(df.describe())
count 6.000000 6.000000 6.000000 6.000000
mean 0.087328 -0.043831 -0.509932 0.729124
std 0.753698 1.098022 1.182985 1.151292
min -1.110985 -1.479892 -1.701878 -0.283554
25% -0.254115 -0.961305 -1.521005 -0.107824
50% 0.131861 0.422144 -0.678687 0.242547
75% 0.652308 0.667418 0.328144 1.679714
max 0.928213 1.045899 1.137686 2.239049