Pandas
Series
import pandas as pd
import numpy as np
s = pd.Series([1, 3, 5, np.nan, 6, 8])
data = np.array(['a', 'b', 'c', 'd'])
s = pd.Series(data)
print(s)
data1 = {'a': 0., 'b': 1., 'c': 2.}
s = pd.Series(data1)
print(s)
DataFrame
import pandas as pd
import numpy as np
print("*********步骤1*********\n")
dates = pd.date_range('20200101', periods=7)
print(dates)
print("--"*16)
df = pd.DataFrame(np.random.randn(7, 4), index=dates, columns=list('ABCD'))
print(df)
df1 = pd.DataFrame({'A': 1.,
'B': pd.Timestamp('20200102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'})
print(df1)
print("*********步骤2*********\n")
data2 = np.arange(30).reshape(6, 5)
df2 = pd.DataFrame(data2, index=['a', 'b ', 'c', 'd', 'e', 'f'], columns=[
'A', 'B', 'C', 'D', 'E'])
print(df2)
print("--" * 10)
print(df2.head())
print("--" * 10)
print(df2.tail(3))
print("index is :")
print(df2.index)
print("columns is :")
print(df2.columns)
print("values is :")
print(df2.values)
print(df2.loc['a':'f':2, 'A'])
print(df2.describe())
print("*********步骤3*********\n")
data3 = np.arange(30).reshape(6, 5)
df3 = pd.DataFrame(data3, index=['a', 'b', ' c', 'd', 'e', 'f'], columns=[
'A', 'B', 'C', 'D', 'E'])
a = df3.drop(['a'], axis=0)
b = df3.drop(['A'], axis=1)
print('-------原始数据df-----')
print(df3)
print('-------删除行---------')
print(a)
print(' -------删除列---------')
print(b)
c = b.append(a)
print(b)
print('------合并后产生的新数据------')
print(c)
b.reset_index(inplace=True)
print(b)
print("*********步骤4*********\n")
df4 = pd.DataFrame(np.random.randn(4, 3), columns=['col1 ', 'col2', 'col3'])
print("df4:", df4)
i = 1
for s in df4.iteritems():
print("第%d列数据%s" % (i, s))
i += 1
print("*********步骤5*********\n")
s = pd.Series([1, 2, 3, 4, 5, 4])
print(s.pct_change())
df5 = pd.DataFrame(np.random.randn(5, 2))
print(df5.pct_change())
s1 = pd.Series(np.random.randn(10))
s2 = pd.Series(np.random.randn(10))
print("s1:", s1)
print("s2:", s2)
print("协方差:", s1.cov(s2))
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
print(frame['a'].cov(frame['b']))
print(frame.cov())
s3 = pd.Series(np.random.randn(5), index=list('abcde'))
s3['d'] = s3['b']
print(s3.rank())
print("*********步骤6*********\n")
df6 = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'], columns=['one', 'two', 'three'])
df6 = df6.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print(df6)
df7 = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'], columns=['one', 'two', 'three'])
df7 = df7.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print(df7['one'].isnull())
df8 = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'], columns=['one', 'two', ' three'])
df8 = df8.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print(df8)
print(df8['one'].sum())
df9 = pd.DataFrame(np.random.randn(3, 3), index=['a', 'c', 'e'], columns=['one',
'two', 'three'])
df9 = df9.reindex(['a', 'b', 'c'])
print(df9)
print("NaN replaced with '0':")
print(df9.fillna(0))
df10 = pd.DataFrame(np.random.randn(5, 3), index=[
'a', 'c ', 'e', 'f', 'h'], columns=['one', 'two', 'three'])
df10 = df10.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print(df10)
print("---"*10)
print(df10.dropna())