from __future__ import division
import numpy as np
import xlwt
import os
import matplotlib.pyplot as matplot_pyplot
from scipy.interpolate import lagrange
np.random.seed(12345)
matplot_pyplot.rc('figure', figsize=(10, 6))
from pandas import Series, DataFrame
import pandas as pd
np.set_printoptions(precision=4, threshold=500)
pd.options.display.max_rows = 100
inputFile = 'd:/data/catering_sale.xls'
outputFile = 'd:/data/sales.xls'
data = pd.read_excel(inputFile)
data[u'销量'][(data[u'销量'] < 400) | (data[u'销量'] > 5000)] = None
def ployinterp_column(s, n, k=5):
y = s[list(range(n - k, n)) + list(range(n + 1, n + 1 + k))]
y = y[y.notnull()]
return lagrange(y.index, list(y))(n)
for i in data.columns:
for j in range(len(data)):
if (data[i].isnull())[j]:
data[i][j] = ployinterp_column(data[i], j)
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
'data': range(7)})
df2 = pd.DataFrame({'key': ['a', 'b', 'd'],
'data2': range(3)})
print 'df1:=\n', df1
print 'df2;=\n', df2
pd.merge(df1, df2)
pd.merge(df1, df2, on='key')
df3 = pd.DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
'data1': range(7)
})
print 'df3:=\n', df3
df4 = pd.DataFrame({'rkey': ['a', 'b', 'd'],
'data2': range(3)
})
print 'df4:=\n', df4
df3_merge_df4 = pd.merge(df3, df4, left_on='lkey', right_on='rkey')
print 'df3_merge_df4:=\n', df3_merge_df4
df1_merge_df2 = pd.merge(df1, df2, how='outer')
print 'df1_merge_df2:=\n', df1_merge_df2
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],
'data1': range(6)
})
df2 = pd.DataFrame({'key': ['a', 'b', 'a', 'b', 'd'],
'data2': range(5)
})
print 'df1:=\n', df1
print 'df2:=\n', df2
df1_left_df2 = pd.merge(df1, df2, on='key', how='left')
print 'df1_left_df2:=\n', df1_merge_df2
df1_inner_df2 = pd.merge(df1, df2, how='inner')
print 'df1_inner_df2:=\n', df1_inner_df2
left = pd.DataFrame({'key1': ['foo', 'foo', 'bar'],
'key2': ['one', 'two', 'one'],
'key3': [1, 2, 3]
})
right = pd.DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],
'key2': ['one', 'one', 'one', 'two'],
'rval': [4, 5, 6, 7]
})
print 'left:=\n', left
print 'right:=\n', right
left_merge_right = pd.merge(left, right, on=['key1', 'key2'], how='outer')
left_on_right = pd.merge(left, right, on='key1')
print 'left_on_right:=\n', left_on_right
left_one_right = pd.merge(left, right, on='key1', suffixes=('_left', '_right'))
print 'left_on_right:=\n', left_on_right
left1 = pd.DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],
'value': range(6)
})
right1 = pd.DataFrame({'group_val': [3.5, 7]}, index=['a', 'b'])
print 'left1:=\n', left1
print 'right1:=\n', right1
left_index_right = pd.merge(left1, right1, left_on='key', right_index=True)
print 'left_index_right:=\n', left_index_right
lefth = pd.DataFrame({'key1': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'key2': [2000, 2001, 2002, 2001, 2002],
'data': np.arange((5.))
})
righth = pd.DataFrame(
np.arange(12).reshape((6, 2)),
index=[
['Nevada', 'Nevada', 'Ohio', 'Ohio', 'Ohio', 'Ohio'],
[2001, 2000, 2000, 2000, 2001, 2002]
],
columns=['event1', 'event2']
)
print 'lefth:=\n', lefth
print 'righth:=\n', righth
lefth_merge_righth = pd.merge(
lefth, righth,
left_on=['key1', 'key2'],
right_index=True)
print 'lefth_merge_righth:=\n', lefth_merge_righth
left2 = pd.DataFrame(
[[1., 2], [3., 4], [5., 6]],
index=['a', 'c', 'e'],
columns=['Ohio', 'Nevada']
)
right2 = pd.DataFrame(
[[1., 2], [3., 4], [5., 6]],
index=['a', 'b', 'e'],
columns=['Ohio', 'Nevada']
)
print 'left2:=\n', left2
print 'right2:=\n', right2
left2_right2 = pd.merge(left2, right2, how='outer', left_index=True, right_index=True)
print 'left2_right2:=\n', left2_right2
left2 = pd.DataFrame([[1., 2], [3., 4], [5., 6]], index=['a', 'c', 'e'],
columns=['Ohio', 'Nevada']
)
right2 = pd.DataFrame([[7., 8.], [9., 10.], [11., 12.], [13, 14]],
index=['b', 'c', 'd', 'e'],
columns=['Missouri', 'Alabama']
)
print 'left2:=\n', left2
print 'right2:=\n', right2
left2_join_right2 = left2.join(right2, how='outer')
print 'left1:=\n', left1
print 'right1:=\n', right1
left1_merge_right1 = pd.merge(left1, right1, left_on='key', right_index=True)
print 'left1_merge_right1:=\n', left1_merge_right1
left1_join_right1 = left1.join(right1, on='key')
print 'left2_join_right2:=\n', left1_join_right1
another = pd.DataFrame([[7, 8], [9, 10], [11, 12], [16, 17]],
index=['a', 'c', 'e', 'f'],
columns=['New York', 'Oregon']
)
left2_join_right2_another = left2.join([right2, another])
print 'another:=\n', another
print 'left2:=\n', left2
print 'right2:=\n', right2
print 'left2_join_right2_another:=\n', left2_join_right2_another
left2_outer_join_right2_another = left2.join([right2, another], how='outer')
print 'left2_outer_join_right2_another:=\n', left2_join_right2_another
arr=np.arange(12).reshape((3,4))
print arr