Python 学习随笔

pandas.Series.unique() 查找所包含的数值

a=pd.DataFrame({'a':[1,2,3,4,1,1,1,1,1,1,1,1,np.nan],\
 'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
 'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
 'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c ---------------------------------
[ 1. 2. 3. 4. nan]

numpy 排序 sorted

a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,np.nan],\
 'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
 'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
 'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c
print sorted(c) -------------------------------------------------
[ 5. 6. 3. 4. 1. nan]
[1.0, 3.0, 4.0, 5.0, 6.0, nan]

已知dataframe中a,b的值输出c的值 (loc 补充)

a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,5],\
                'b':[1,2,3,4,5,6,7,8,9,10,11,12,13],\
                'c':[3,3,3,4,4,4,4,4,4,5,5,5,5],\
                'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})

d=a.loc[(a['a']==1)&(a['b']==5)]
print len(d)
print d.loc[:,'c'].values[0]
----------------------------------
1
4

取整

int()#向下
round(),#四舍五入
math.ceil()#向上取整

重复列表元素n次

a=[1,2,3,4]
b=[i for i in a for x in range(n)]

取余数

5%2------》1

divmod(5,2)------》(2,1)

统计周期内的和

def tran_14(dataframe):
    m,n=divmod(len(dataframe),14)
    new_dataframe=dataframe.iloc[n::,:]
    new_dataframe['index14']=[i for i in range (m) for x in range (14)]
    new_14_data=new_dataframe.groupby('index14').sum()
    return new_14_data

转化为时间序列

b=pd.read_csv(w_file2[i],index_col=0)
dateindex=pd.to_datetime(b.index,format='%Y%m%d')
b.index=pd.DatetimeIndex(dateindex)

画时间序列

一 出处http://blog.csdn.net/rumswell/article/details/9862089

from matplotlib.dates import AutoDateLocator, DateFormatter  
autodates = AutoDateLocator()  
yearsFmt = DateFormatter('%Y-%m-%d %H:%M:%S')  
figure.autofmt_xdate()        #设置x轴时间外观 
ax.xaxis.set_major_locator(autodates)       #设置时间间隔 
ax.xaxis.set_major_formatter(yearsFmt)      #设置时间显示格式 
ax.set_xticks() #设置x轴间隔 
ax.set_xlim()   #设置x轴范围 

from matplotlib.dates import  DateFormatter
yearsFmt = DateFormatter('%Y-%m-%d')
data_r=pd.date_range('2014/10/10','2015/12/27',freq='10D')
b1=b.plot(xticks=data_r,grid=True,rot=45)
b1.legend('')
b1.xaxis.set_major_formatter(yearsFmt)
plt.grid(True)
b1.set_title(w_file2[i])

正则re

之前的id有一个大写的X,这里用re的search去掉它

def search_id(data):
    title=re.search('X([0-9]+)',data)
    title=title.group(1)
    return title
item_id=[search_id(id) for id in item_id]

附:
Python提供了两种不同的原始操作:match和search。match是从字符串的起点开始做匹配,而search(perl默认)是从字符串做任意匹配。
prog = re.compile(pattern)
result = prog.match(string)

result = re.match(pattern, string)
是等价的。
. ^ $ * + ? { [ ] \ | ( )是几个比较特殊的字符含义见一下博客
http://www.cnblogs.com/huxi/archive/2010/07/04/1771073.html

\d  匹配任何十进制数;它相当于类 [0-9]。

\D  匹配任何非数字字符;它相当于类 [^0-9]。

\s  匹配任何空白字符;它相当于类  [ "t"n"r"f"v]。 \S 匹配任何非空白字符;它相当于类 [^ "t"n"r"f"v]。

\w  匹配任何字母数字字符;它相当于类 [a-zA-Z0-9_]。

\W  匹配任何非字母数字字符;它相当于类 [^a-zA-Z0-9_]。

http://www.runoob.com/python/python-reg-expressions.html(推荐)

#!/usr/bin/python
import re

line = "Cats are smarter than dogs"

matchObj = re.match( r'(.*) are (.*?) .*', line, re.M|re.I)

if matchObj:
   print "matchObj.group() : ", matchObj.group()
   print "matchObj.group(1) : ", matchObj.group(1)
   print "matchObj.group(2) : ", matchObj.group(2)
else:
   print "No match!!"
   ----------------------------
matchObj.group() :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter
import re
print(re.search('www', 'www.runoob.com').span())  # 在起始位置匹配
print(re.search('com', 'www.runoob.com').span())         # 不在起始位置匹配

======================
(0, 3)
(11, 14)
import re

line = "Cats are smarter than dogs";

searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
   print "searchObj.group() : ", searchObj.group()
   print "searchObj.group(1) : ", searchObj.group(1)
   print "searchObj.group(2) : ", searchObj.group(2)
else:
   print "Nothing found!!"
-------------------------------
searchObj.group() :  Cats are smarter than dogs
searchObj.group(1) :  Cats
searchObj.group(2) :  smarter

Series.str.split()

Series.str can be used to access the values of the series as strings and apply several methods to it.
例子在下方

pandas.DataFrame.stack

以level为支点展开
DataFrame.stack(level=-1, dropna=True)

level : int, string, or list of these, default last level
Level(s) to stack, can pass level name
dropna : boolean, default True
Whether to drop rows in the resulting Frame/Series with no valid valuesmples

>>> s
     a   b
one  1.  2.
two  3.  4.
>>> s.stack()
one a    1
    b    2
two a    3
    b    4

pandas.Series.apply

Series.apply(func, convert_dtype=True, args=(), **kwds)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])
print a.a.str.split(',')
print '--------------'
print a.a.str.split(',').apply(pd.Series,1)
print '--------------'
print a.a.str.split(',').apply(pd.Series,1).stack() ==============================================
0          [1]
1    [w, q, i]
dtype: object --------------
 0 1 2
0  1  NaN  NaN
1 w q i --------------
0  0    1
1  0    w
 1 q
 2 i
dtype: object

一列转化为多列 stack reset_index split

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])

b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b)
print b
print '1---------------------'
b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b).stack()
print b
print '2---------------------'
b = b.reset_index()[[0, 'b']] # var1 variable is currently labeled 0
print b
print '3---------------------'
b.columns = ['a', 'b'] # renaming var1
print b
=========================================
  0     1     2
b               
2  1  None  None
o  w     q     i
1---------------------
b   
2  0    1
o  0    w
   1    q
   2    i
dtype: object
2---------------------
   0  b
0  1  2
1  w  o
2  q  o
3  i  o
3---------------------
   a  b
0  1  2
1  w  o
2  q  o
3  i  o

一列转化为多列 pd.Series() concat() iterrows()

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])

b=pd.concat([pd.Series(row['b'], row['a'].split(','))
                    for _, row in a.iterrows()]).reset_index()

print b
print [row['b']for _, row in a.iterrows()]
print [row['a'].split(',') for _, row in a.iterrows()]
print pd.Series([ 'o'],[ ['w', 'q', 'i']])
===============================

  index  0
0     1  2
1     w  o
2     q  o
3     i  o

['2', 'o']

[['1'], ['w', 'q', 'i']]


w    o
q    o
i    o
dtype: object

pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

concat()

c=pd.Series([ 'o'],[ ['w', 'q', 'i']])
d=pd.Series([ 'o'],[ ['w', 'q', 'i']])
print pd.concat([c,d])
========================
w    o
q    o
i    o
w    o
q    o
i    o
dtype: object

你可能感兴趣的:(python)