a=pd.DataFrame({'a':[1,2,3,4,1,1,1,1,1,1,1,1,np.nan],\
'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c ---------------------------------
[ 1. 2. 3. 4. nan]
a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,np.nan],\
'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c
print sorted(c) -------------------------------------------------
[ 5. 6. 3. 4. 1. nan]
[1.0, 3.0, 4.0, 5.0, 6.0, nan]
a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,5],\
'b':[1,2,3,4,5,6,7,8,9,10,11,12,13],\
'c':[3,3,3,4,4,4,4,4,4,5,5,5,5],\
'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
d=a.loc[(a['a']==1)&(a['b']==5)]
print len(d)
print d.loc[:,'c'].values[0]
----------------------------------
1
4
int()#向下
round(),#四舍五入
math.ceil()#向上取整
a=[1,2,3,4]
b=[i for i in a for x in range(n)]
5%2------》1
divmod(5,2)------》(2,1)
def tran_14(dataframe):
m,n=divmod(len(dataframe),14)
new_dataframe=dataframe.iloc[n::,:]
new_dataframe['index14']=[i for i in range (m) for x in range (14)]
new_14_data=new_dataframe.groupby('index14').sum()
return new_14_data
b=pd.read_csv(w_file2[i],index_col=0)
dateindex=pd.to_datetime(b.index,format='%Y%m%d')
b.index=pd.DatetimeIndex(dateindex)
一 出处http://blog.csdn.net/rumswell/article/details/9862089
from matplotlib.dates import AutoDateLocator, DateFormatter
autodates = AutoDateLocator()
yearsFmt = DateFormatter('%Y-%m-%d %H:%M:%S')
figure.autofmt_xdate() #设置x轴时间外观
ax.xaxis.set_major_locator(autodates) #设置时间间隔
ax.xaxis.set_major_formatter(yearsFmt) #设置时间显示格式
ax.set_xticks() #设置x轴间隔
ax.set_xlim() #设置x轴范围
二
from matplotlib.dates import DateFormatter
yearsFmt = DateFormatter('%Y-%m-%d')
data_r=pd.date_range('2014/10/10','2015/12/27',freq='10D')
b1=b.plot(xticks=data_r,grid=True,rot=45)
b1.legend('')
b1.xaxis.set_major_formatter(yearsFmt)
plt.grid(True)
b1.set_title(w_file2[i])
之前的id有一个大写的X,这里用re的search去掉它
def search_id(data):
title=re.search('X([0-9]+)',data)
title=title.group(1)
return title
item_id=[search_id(id) for id in item_id]
附:
Python提供了两种不同的原始操作:match和search。match是从字符串的起点开始做匹配,而search(perl默认)是从字符串做任意匹配。
prog = re.compile(pattern)
result = prog.match(string)
跟
result = re.match(pattern, string)
是等价的。
. ^ $ * + ? { [ ] \ | ( )是几个比较特殊的字符含义见一下博客
http://www.cnblogs.com/huxi/archive/2010/07/04/1771073.html
\d 匹配任何十进制数;它相当于类 [0-9]。
\D 匹配任何非数字字符;它相当于类 [^0-9]。
\s 匹配任何空白字符;它相当于类 [ "t"n"r"f"v]。 \S 匹配任何非空白字符;它相当于类 [^ "t"n"r"f"v]。
\w 匹配任何字母数字字符;它相当于类 [a-zA-Z0-9_]。
\W 匹配任何非字母数字字符;它相当于类 [^a-zA-Z0-9_]。
http://www.runoob.com/python/python-reg-expressions.html(推荐)
#!/usr/bin/python
import re
line = "Cats are smarter than dogs"
matchObj = re.match( r'(.*) are (.*?) .*', line, re.M|re.I)
if matchObj:
print "matchObj.group() : ", matchObj.group()
print "matchObj.group(1) : ", matchObj.group(1)
print "matchObj.group(2) : ", matchObj.group(2)
else:
print "No match!!"
----------------------------
matchObj.group() : Cats are smarter than dogs
matchObj.group(1) : Cats
matchObj.group(2) : smarter
import re
print(re.search('www', 'www.runoob.com').span()) # 在起始位置匹配
print(re.search('com', 'www.runoob.com').span()) # 不在起始位置匹配
======================
(0, 3)
(11, 14)
import re
line = "Cats are smarter than dogs";
searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)
if searchObj:
print "searchObj.group() : ", searchObj.group()
print "searchObj.group(1) : ", searchObj.group(1)
print "searchObj.group(2) : ", searchObj.group(2)
else:
print "Nothing found!!"
-------------------------------
searchObj.group() : Cats are smarter than dogs
searchObj.group(1) : Cats
searchObj.group(2) : smarter
Series.str can be used to access the values of the series as strings and apply several methods to it.
例子在下方
以level为支点展开
DataFrame.stack(level=-1, dropna=True)
level : int, string, or list of these, default last level
Level(s) to stack, can pass level name
dropna : boolean, default True
Whether to drop rows in the resulting Frame/Series with no valid valuesmples
>>> s
a b
one 1. 2.
two 3. 4.
>>> s.stack()
one a 1
b 2
two a 3
b 4
Series.apply(func, convert_dtype=True, args=(), **kwds)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html
a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])
print a.a.str.split(',')
print '--------------'
print a.a.str.split(',').apply(pd.Series,1)
print '--------------'
print a.a.str.split(',').apply(pd.Series,1).stack() ==============================================
0 [1]
1 [w, q, i]
dtype: object --------------
0 1 2
0 1 NaN NaN
1 w q i --------------
0 0 1
1 0 w
1 q
2 i
dtype: object
a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])
b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b)
print b
print '1---------------------'
b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b).stack()
print b
print '2---------------------'
b = b.reset_index()[[0, 'b']] # var1 variable is currently labeled 0
print b
print '3---------------------'
b.columns = ['a', 'b'] # renaming var1
print b
=========================================
0 1 2
b
2 1 None None
o w q i
1---------------------
b
2 0 1
o 0 w
1 q
2 i
dtype: object
2---------------------
0 b
0 1 2
1 w o
2 q o
3 i o
3---------------------
a b
0 1 2
1 w o
2 q o
3 i o
a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])
b=pd.concat([pd.Series(row['b'], row['a'].split(','))
for _, row in a.iterrows()]).reset_index()
print b
print [row['b']for _, row in a.iterrows()]
print [row['a'].split(',') for _, row in a.iterrows()]
print pd.Series([ 'o'],[ ['w', 'q', 'i']])
===============================
index 0
0 1 2
1 w o
2 q o
3 i o
['2', 'o']
[['1'], ['w', 'q', 'i']]
w o
q o
i o
dtype: object
pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)
c=pd.Series([ 'o'],[ ['w', 'q', 'i']])
d=pd.Series([ 'o'],[ ['w', 'q', 'i']])
print pd.concat([c,d])
========================
w o
q o
i o
w o
q o
i o
dtype: object