Pandas建立dataframe\ series\ 及相应操作 切片、排序、复制、修改

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pandas as pd
import numpy as np


def main():
    # Data Structure#
    s = pd.Series([i * 2 for i in range(1, 11)])
    print (s)
    dates = pd.date_range("20180301", periods=8)
    df = pd.DataFrame(np.random.randn(8, 5), index=dates, columns=list('ABCDE'))  # randn正态分布中随机数#
    print (df)
    # df2 = pd.DataFrame({"A": 1, "B": pd.Timestamp("20180301"),#时间序列#
    #                   "C": pd.Series(1, index=list(range(4)), dtype="float32"),#索引是4范围内的列表#
    #                    "D": np.array([3]*4, dtype="float32"),#生成4个3  4行#
    #                    "E": pd.Categorical(["police", "student", "teacher", "doctor"])})
    #

    print (df.head(3))#打印前三行#
    print (df.tail(3))#打印后三行#
    print (df.index)#打印索引#
    print (df.values)#打印值#
    print (df.T)#矩阵转置#
    print (df.sort_index(axis=1, ascending=False))#列降序#
    print (df.describe())#描述统计#
    print (df.sort_values(by="C", ascending=True))#按照C列大小升序排列
    # sort_values其它参数:axis=0或者1 纵向排序还是横向;
    # na_position='last' 将空值排在最后。kind和inplace是排序的具体方式,一般数据用不到。#
    print (df.sort_values(by=["C","B"],ascending=[True,False]))
    #select 切片
    print(type(df["A"]))#A的类型
    print (df[:3])#前三行#
    print(df["20180301":"20180304"])#用索引切片#
    print (df.loc[dates[0]]) #第一行#
    print (df.loc["20180301":"20180304",["B","D"]])
    print (df.at[dates[0],"C"]) #一行C列#
    print (df.iloc[1:3,2:4])#第二行第三行,第三列第四列#
    print (df.iloc[1,4])#第一行第四列#
    print (df.iat[1,4])#第一行第四列#
    print (df[df.B>0][df.A<0])
    print (df[df>0])
    print (df[df["E"].isin([3,4])])
    #Set
    s1=pd.Series(list(range(10,18)),index=pd.date_range("20180301",periods=8)) #建立series#
    df["F"]=s1
    print (df)
    df.at[dates[0],"A"]=0
    print (df)
    df.iat[1,1]=1
    df.loc[:,"D"]= np.array([4]*len(df))#D列全为4#
    print (df)
    df2 = df.copy()
    df2[df2>0] = -df2
    print (df2)

if __name__ == "__main__":
    main()

你可能感兴趣的:(python基础)