1.split不支持同时使用多种字符做分隔符,如果想实现这样的效果,可以用re,例如:
>>> s = 'Hello!This?Is!What?I!Want' >>> s 'Hello!This?Is!What?I!Want' >>> re.split('[!?]',s) ['Hello', 'This', 'Is', 'What', 'I', 'Want']
i = "25 192.168.19.32 192.168.1.30 [2014.09.28 09:01:35]env -i X='() { (a)=>\' bash -c 'echo date'; cat echo"
>>> re.split(r'[ \[\]]',i)
['25', '192.168.19.32', '192.168.1.30', '', '2014.09.28', '09:01:35', 'env', '-i
', "X='()", '{', "(a)=>'", 'bash', '-c', "'echo", "date';", 'cat', 'echo']
>>> re.findall(r'[\w.:]+',i)
['25', '192.168.19.32', '192.168.1.30', '2014.09.28', '09:01:35', 'env', 'i', 'X
', 'a', 'bash', 'c', 'echo', 'date', 'cat', 'echo']
>>> re.findall(r'[^]^[\s]+',i)
['25', '192.168.19.32', '192.168.1.30', '2014.09.28', '09:01:35', 'env', '-i', "
X='()", '{', "(a)=>'", 'bash', '-c', "'echo", "date';", 'cat', 'echo']
2.例子
test.txt:
abcdstr1
astr2m supernbajack
jordon
#coding: utf-8 import re def split_on_separators(original, separators): # 这个是用正则实现的,可能不满足要求,不过非常简单 # return filter(lambda x:x.strip(), re.split(r"[%s]" % separators, original)) result = [original] for sep in separators: temp = [] for r in result: temp.extend(filter(lambda x:x.strip(), r.split(sep))) result = temp return result if __name__ == "__main__": line = open('test.txt','r').read() print split_on_separators(line, "\t\n") #print split_on_separators("a\tb\nc\tdstr1\nastr2 a\tlisa", "\t\n")
打印结果:
['a', 'b', 'c', 'dstr1', 'astr2', 'm super', 'nba', 'jack', 'jordon']
――――――――――――――――――――
for r in result: temp.extend(filter(lambda x:x.strip(), r.split(sep)))
对这句的理解:
>>> a = ' 123\ra\tabc' >>> a.strip() '123\ra\tabc' >>>filter(lambda x:x.strip(),' 123\ra\tabc') '123aabc' >>> filter(lambda x:x.strip(),'a\tb\nc\tdstr1\nastr2 a\tlisa') 'abcdstr1astr2alisa' >>> b = filter(lambda x:x.strip(),'[a\tb\nc\tdstr1\nastr2 a\tlisa]') >>> b '[abcdstr1astr2alisa]' >>> temp = [] >>> temp.extend(filter(lambda x:x.strip(),'a'.split('\n\t'))) >>> temp ['a'] >>> temp ['a'] >>> temp.extend(filter(lambda x:x.strip(),'str1'.split('\n\t'))) >>> temp ['a', 'str1'] >>> temp.extend(filter(lambda x:x.strip(),'str2')) >>> temp ['a', 'str1', 's', 't', 'r', '2'] >>> temp = [] >>> temp.extend(filter(lambda x:x.strip(),' 123\ra\tabc')) >>> temp ['1', '2', '3', 'a', 'a', 'b', 'c'] >>> temp.extend(filter(lambda x:x.strip(),' 123\ra\tabc'.split('\n\t'))) >>> temp ['1', '2', '3', 'a', 'a', 'b', 'c', ' 123\ra\tabc']
――――――――――――――――――――――――――――――――――
上面例子的应用,将含有多列的普通文本写到excel中(本例split分隔符为空格和\n):
#coding: utf-8 import re,xlrd,xlwt def split_on_separators(original, separators): # 这个是用正则实现的,可能不满足要求,不过非常简单 # return filter(lambda x:x.strip(), re.split(r"[%s]" % separators, original)) result = [original] for sep in separators: temp = [] for r in result: temp.extend(filter(lambda x:x.strip(), r.split(sep))) result = temp return result line = open('ex.txt','r').read() lisa = split_on_separators(line, " \n") def wexcel(infile,outefile): buf = lisa print buf w=xlwt.Workbook() sheet=w.add_sheet('sheet1') for i in range(len(buf)): print buf[i] sheet.write(i,0,buf[i].decode('utf8')) w.save(outefile) wexcel('ex.txt','ex.xls')
打印结果(ex.txt写到ex.xls中):
ex.txt:
/ 24%
/backup 62%
/project 20%
memory 26.16%
――――――――――――――――后续―――――――――――――――
test.txt:
/ 24%
/backup 62%
/project 20%
memory 26.16%
line = file('D:/python27/test.txt','r').read()
>>> line.split('\n')
['/ 24%', '/backup 62%', '/project 20%'
, 'memory 26.16%']
>>> line.split()
['/', '24%', '/backup', '62%', '/project', '20%', 'memory', '26.16%']
>>> filter(lambda x:x.strip(),line.split())
['/', '24%', '/backup', '62%', '/project', '20%', 'memory', '26.16%']
>>> for i in [line]:
... print i.split('\n')
...
['/ 24%', '/backup 62%', '/project 20%'
, 'memory 26.16%']
>>> for i in [line]:
... print filter(lambda x:x.strip(), i.split('\n'))
...
['/ 24%', '/backup 62%', '/project 20%'
>>> filter(lambda x:x.strip(),line)
'/24%/backup62%/project20%memory26.16%'
>>> for i in line.split('\n'):
... for sep in i.split(' '):
... temp = []
... temp.extend(filter(lambda x:x.strip(),i.split(' ')))
... print temp
...
['/', '24%']
['/backup', '62%']
['/project', '20%']
['memory', '26.16%']
>>> for i in line.split('\n'):
... for sep in i.split(' '):
... temp = []
... temp.extend(filter(lambda x:x.strip(),sep.split(' ')))
... print sep
...
24%
62%
20%
26.16%