DataFrame

1. reindex

print '对DataFrame重新指定索引'
frame = DataFrame(np.arange(9).reshape(3, 3),
                  index = ['a', 'c', 'd'],
                  columns = ['Ohio', 'Texas', 'California'])
print frame
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
print frame2
print

print '重新指定column'
states = ['Texas', 'Utah', 'California']
print frame.reindex(columns = states)
print

print '对DataFrame重新指定索引并指定填元素充方法'
print frame.reindex(index = ['a', 'b', 'c', 'd'],
                    method = 'ffill',
                    columns = states)
print frame.ix[['a', 'b', 'd', 'c'], states]

2. 索引

import numpy as np
from pandas import Series, DataFrame

print 'Series的索引,默认数字索引可以工作。'
obj = Series(np.arange(4.), index = ['a', 'b', 'c', 'd'])
print obj['b']
print obj[3]
print obj[[1, 3]]
print obj[obj < 2]
print

print 'Series的数组切片'
print obj['b':'c']  # 闭区间
obj['b':'c'] = 5
print obj
print

print 'DataFrame的索引'
data = DataFrame(np.arange(16).reshape((4, 4)),
                  index = ['Ohio', 'Colorado', 'Utah', 'New York'],
                  columns = ['one', 'two', 'three', 'four'])
print data
print data['two'] # 打印列
print data[['three', 'one']]
print data[:2]
print data.ix['Colorado', ['two', 'three']] # 指定索引和列
print data.ix[['Colorado', 'Utah'], [3, 0, 1]]
print data.ix[2]  # 打印第2行(从0开始)
print data.ix[:'Utah', 'two'] # 从开始到Utah,第2列。
print

print '根据条件选择'
print data[data.three > 5]
print data < 5  # 打印True或者False
data[data < 5] = 0
print data

3. apply

# -*- coding: utf-8 -*- 

import numpy as np
from pandas import Series, DataFrame

print '函数'
frame = DataFrame(np.random.randn(4, 3),
                  columns = list('bde'),
                  index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
print frame
print np.abs(frame)
print

print 'lambda以及应用'
f = lambda x: x.max() - x.min()
print frame.apply(f)
print frame.apply(f, axis = 1)
def f(x):
    return Series([x.min(), x.max()], index = ['min', 'max'])
print frame.apply(f)
print

print 'applymap和map'
_format = lambda x: '%.2f' % x
print frame.applymap(_format)
print frame['e'].map(_format)

  1. sort
# -*- coding: utf-8 -*- 

import numpy as np
from pandas import Series, DataFrame

print '根据索引排序,对于DataFrame可以指定轴。'
obj = Series(range(4), index = ['d', 'a', 'b', 'c'])
print obj.sort_index()
frame = DataFrame(np.arange(8).reshape((2, 4)),
                  index = ['three', 'one'],
                  columns = list('dabc'))
print frame.sort_index()
print frame.sort_index(axis = 1)
print frame.sort_index(axis = 1, ascending = False) # 降序
print

print '根据值排序'
obj = Series([4, 7, -3, 2])
print obj.sort_values() # order已淘汰
print

print 'DataFrame指定列排序'
frame = DataFrame({'b':[4, 7, -3, 2], 'a':[0, 1, 0, 1]})
print frame
print frame.sort_values(by = 'b') # sort_index(by = ...)已淘汰
print frame.sort_values(by = ['a', 'b'])
print

print 'rank,求排名的平均位置(从1开始)'
obj = Series([7, -5, 7, 4, 2, 0, 4])
# 对应排名:-5(1), 0(2), 2(3), 4(4), 4(5), 7(6), 7(7)
print obj.rank()
print obj.rank(method = 'first')  # 去第一次出现,不求平均值。
print obj.rank(ascending = False, method = 'max') # 逆序,并取最大值。所以-5的rank是7.
frame = DataFrame({'b':[4.3, 7, -3, 2],
                  'a':[0, 1, 0, 1],
                  'c':[-2, 5, 8, -2.5]})
print frame
print frame.rank(axis = 1)

5. fillna

# -*- coding: utf-8 -*- 

import numpy as np
from numpy import nan as NA
import pandas as pd
from pandas import Series, DataFrame, Index

print '填充0'
df = DataFrame(np.random.randn(7, 3))
df.ix[:4, 1] = NA
df.ix[:2, 2] = NA
print df.fillna(0)
df.fillna(0, inplace = True)
print df
print

print '不同行列填充不同的值'
print df.fillna({1:0.5, 3:-1})  # 第3列不存在
print

print '不同的填充方式'
df = DataFrame(np.random.randn(6, 3))
df.ix[2:, 1] = NA
df.ix[4:, 2] = NA
print df
print df.fillna(method = 'ffill')
print df.fillna(method = 'ffill', limit = 2)
print

print '用统计数据填充'
data = Series([1., NA, 3.5, NA, 7])
print data.fillna(data.mean())

你可能感兴趣的:(DataFrame)