numpy和pandas的基础用法
- numpy
-
- numpy创建array
- numpy的属性
- numpy的基础运算
- numpy的索引
- numpy的合并
- numpy的分割
- numpy的copy和deepcopy
- pandas
-
- pandas的基本介绍
- pandas选择数据
- pandas设置值
- pandas处理丢失数据
- pandas的合并
- pandas用plot绘图
numpy
numpy创建array
import numpy as np
a = np.array([[1,2,3],
[2,3,4]],dtype = np.int32)
print(a.dtype)
a = np.zeros((12,3))
print(a)
a = np.ones((3,4))
print(a)
b = np.empty((3,4))
print(b)
a = np.arange(12).reshape((3,4))
print(a)
a = np.linspace(1,10,6).reshape((2,3))
print(a)
for i in a:
print(i)
numpy的属性
import numpy as np
array = np.array([[1,2,3],
[2,3,4]])
print(array)
print('number of dim(几维):',array.ndim)
print('number of shape(行和列):',array.shape)
print('number of size(元素个数):',array.size)
运行结果:

numpy的基础运算
import numpy as np
a = np.array([[1,2],
[3,4]])
b = np.arange(4).reshape(2,2)
print(a)
print(b)
c = a*b
c_dot = np.dot(a,b)
print(c)
print(c_dot)
c = np.random.random((2,4))
print(c)
print(np.sum(c))
print(np.min(c))
print(np.max(c))
d = np.arange(2,14).reshape((3,4))
print(d)
print(np.argmin(d))
print(np.argmax(d))
print(np.mean(d))
print(np.mean(d,axis = 0))
print(np.cumsum(d))
print(np.diff(d))
print(np.nonzero(d))
print(np.sort(d))
print(np.transpose(d))
print(d.T)
print((d.T).dot(d))
print(np.clip(d,4,9))
numpy的索引
import numpy as np
A = np.arange(3,15).reshape((3,4))
print(A)
print(A[2][1])
print(A[1,1:4])
for row in A:
print(row)
for colum in A.T:
print(colum)
print(A.flatten())
numpy的合并
import numpy as np
A = np.array([1,1,1])[:,np.newaxis]
print(A)
B = np.array([2,2,2])[:,np.newaxis]
print(np.vstack((A,B)))
print(np.hstack((A,B)))
print(np.hstack((A,B)).shape)
print(np.concatenate((A,B,A,B), axis = 1))
numpy的分割
import numpy as np
a = np.arange(12).reshape((3,4))
print(a)
print(np.split(a,2,axis = 1))
print(np.split(a,3,axis = 0))
print(np.vsplit(a,3))
print(np.hsplit(a,4))
numpy的copy和deepcopy
import numpy as np
a = np.arange(12).reshape((3,4))
print(a)
print(np.split(a,2,axis = 1))
print(np.split(a,3,axis = 0))
print(np.vsplit(a,3))
print(np.hsplit(a,4))
pandas
pandas的基本介绍
import pandas as pd
import numpy as np
s = pd.Series([1,2,3,np.nan,44,1])
print(s)
dates = pd.date_range('20160101',periods=4)
print(dates)
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),index = ["a",'b','c'],columns = dates)
print(df1)
df2 = pd.DataFrame(np.arange(12).reshape(3,4))
print(df2)
df3 = pd.DataFrame(
{'A':1.0,
'B':pd.Timestamp('20200525'),
'C':pd.Series(1,index=list(range(4)),dtype='float32'),
'D':np.array([3]*4,dtype='int32'),
'E':pd.Categorical(['test','train','test','train']),
'F':'foo'
}
)
print(df3)
print(df3.dtypes)
print(df3.describe())
df3.sort_values(by = 'E')
print(df3)
pandas选择数据
import numpy as np
import pandas as pd
dates = pd.date_range('20200121',periods=6)
df = pd.DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])
print(df)
print(df['A'])
print(df.A)
print(df[0:3])
print(df.loc['20200121'],['A'])
print(df.iloc[[1,2,3],1:3])
print(df[df.A>8])
pandas设置值
import numpy as np
import pandas as pd
dates = pd.date_range('19960121',periods=6)
print(dates)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index = dates,columns = ['A','B','C','D'])
print(df)
df.iloc[3,3] = 123
print(df)
df.loc['19960124','A'] = 46
print(df)
df.B[df.A>0] = 0
print(df)
pandas处理丢失数据
import numpy as np
import pandas as pd
dates = pd.date_range('19960121',periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
df.iloc[0,1] = np.nan
print(df)
print(df.dropna(axis = 0,how = 'any'))
print(df.fillna(value=0))
print(np.any(df.isnull()))
df = pd.read_excel('/Users/jerry/Desktop/DEMO.xls')
print(df)
pandas的合并
import numpy as np
import pandas as pd
df = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df1 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
print(df)
print(df1)
print(df2)
res = pd.concat([df,df1,df2],axis=0,ignore_index=True)
print(res)
df3 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index = [1,2,3])
df4 = pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e'],index = [2,3,4])
print(df3)
print(df4)
res1 = pd.concat([df3,df4],join='outer')
res2 = pd.concat([df3,df4],join='inner')
print(res1)
print(res2)
df5 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df6 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df7 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
res = df5.append([df6,df7],ignore_index=True)
print(res)
pandas用plot绘图
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data = data.cumsum()
data.plot()
plt.show()