Pandas 工具包实战(12)字符串操作:series, dataframe

1 字符串操作

import pandas as pd
import numpy as np
s = pd.Series(['A','b','B','gaer','AGER',np.nan])
s
s.str.lower()  # 把所有字母都变成小写
s.str.upper()  # 把所有字母都变成大写
s.str.len()  # 返回每个数据元素的长度

index = pd.Index(['  tang','  yu   ','di'])
index  # Index(['  tang', '  yu   ', 'di'], dtype='object')
index.str.strip()  # Index(['tang', 'yu', 'di'], dtype='object')
index.str.rstrip()  # Index(['  tang', '  yu', 'di'], dtype='object')

df = pd.DataFrame(np.random.randn(3,2),columns = ['A a','B b'],index = range(3))
df.columns = df.columns.str.replace(' ','')  # 把列名中的空格替换为空

s = pd.Series(['a_b_C','c_d_e','f_g_h'])
#0    a_b_C
#1    c_d_e
#2    f_g_h
#dtype: object
s.str.split('_')  
#0    [a, b, C]
#1    [c, d, e]
#2    [f, g, h]
#dtype: object
s.str.split('_',expand = True)
#	0	1	2
#0	a	b	C
#1	c	d	e
#2	f	g	h
s.str.split('_',expand = True,n=1)  # 参数n用于限制,只能切一次
#	0	1
#0	a	b_C
#1	c	d_e
#2	f	g_h

s = pd.Series(['A','Aas','Afgew','Ager','Agre','Ager'])
#0        A
#1      Aas
#2    Afgew
#3     Ager
#4     Agre
#5     Ager
#dtype: object
s.str.contains('Ag')  # 返回true, flase的序列
#0    False
#1    False
#2    False
#3     True
#4     True
#5     True
#dtype: bool

s = pd.Series(['a','a|b','a|c'])
#0      a
#1    a|b
#2    a|c
#dtype: object
s.str.get_dummies(sep = '|')
#	a	b	c
#0	1	0	0
#1	1	1	0
#2	1	0	1

你可能感兴趣的:(Pandas 工具包实战(12)字符串操作:series, dataframe)