文本(TEXT)处理中最常用的是string、textwrap、re和difflib类。
今天讲string,不多说,首先贴代码:
import string
s = 'Have a good day.'
print s
print string.capwords(s, None)
leet = string.maketrans('abcdefg', '1234567')
print s
print s.translate(leet)
values = {'var' : 'foo'}
t = string.Template("""
Variable : $var
Escape : $$
Variable in text: ${var}iable
""")
print 'TEMPLATE:', t.substitute(values)
s = """
Variable : %(var)s
Escape : %%
Variable in text: %(var)siable
"""
print 'INTERPOLATION:', s % values
t = string.Template("$var is here but $missing is not provided")
try:
print 'substitute() :', t.substitute(values)
except KeyError, err:
print 'ERROR:', str(err)
print 'safe_substitute():', t.safe_substitute(values)
贴各个函数的解释:
def capwords Found at: string
def capwords(s, sep=None):
"""capwords(s [,sep]) -> string
Split the argument into words using split, capitalize each
word using capitalize, and join the capitalized words using
join. If the optional second argument sep is absent or None,
runs of whitespace characters are replaced by a single space
and leading and trailing whitespace are removed, otherwise
sep is used to split and join the words.
"""
return sep or ' '.join(x.capitalize() for x in s.split(sep))
# Construct a translation string
def maketrans Found at: string
def maketrans(fromstr, tostr):
"""maketrans(frm, to) -> string
Return a translation table (a string of 256 bytes long)
suitable for use in string.translate. The strings frm and to
must be of the same length.
"""
if len(fromstr) != len(tostr):
raise ValueError, "maketrans arguments must have same length"
global _idmapL
if not _idmapL:
_idmapL = list(_idmap)
L = _idmapL[:]
fromstr = map(ord, fromstr)
for i in range(len(fromstr)):
L[fromstr[i]] = tostr[i]
return ''.join(L)
####################################################################
str.translate Found at: __builtin__
S.translate(table [,deletechars]) -> string
Return a copy of the string S, where all characters occurring
in the optional argument deletechars are removed, and the
remaining characters have been mapped through the given
translation table, which must be a string of length 256 or None.
If the table argument is None, no translation is applied and
the operation simply removes the characters in deletechars.
class Template Found at: string
class Template:
"""A string class for supporting $-substitutions."""
__metaclass__ = _TemplateMetaclass
delimiter = '$'
idpattern = r'[_a-z][_a-z0-9]*'
def __init__(self, template):
self.template = template
# Search for $$, $identifier, ${identifier}, and any bare $'s
def _invalid(self, mo):
i = mo.start('invalid')
lines = self.template[:i].splitlines(True)
if not lines:
colno = 1
lineno = 1
else:
colno = i - len(''.join(lines[:-1]))
lineno = len(lines)
raise ValueError(
'Invalid placeholder in string: line %d, col %d' %
(lineno, colno))
def substitute(self, *args, **kws):
if len(args) > 1:
raise TypeError('Too many positional arguments')
if not args:
mapping = kws
elif kws:
mapping = _multimap(kws, args[0])
else:
mapping = args[0]
# Helper function for .sub()
def convert(mo): # Check the most common path first.
named = mo.group('named') or mo.group
('braced')
if named is not None:
val = mapping[named] # We use this idiom
instead of str() because the latter will
# fail if val is a Unicode containing non-ASCII
characters.
return '%s' % (val, )
if mo.group('escaped') is not None:
return self.delimiter
if mo.group('invalid') is not None:
self._invalid(mo)
raise ValueError('Unrecognized named group in
pattern', self.pattern)
return self.pattern.sub(convert, self.template)
def safe_substitute(self, *args, **kws):
if len(args) > 1:
raise TypeError('Too many positional arguments')
if not args:
mapping = kws
elif kws:
mapping = _multimap(kws, args[0])
else:
mapping = args[0]
# Helper function for .sub()
def convert(mo):
named = mo.group('named') or mo.group
('braced')
if named is not None:
try:
return '%s' % (mapping[named], ) # We use
this idiom instead of str() because the latter
# will fail if val is a Unicode containing non-ASCII
except KeyError:
return mo.group()
if mo.group('escaped') is not None:
return self.delimiter
if mo.group('invalid') is not None:
return mo.group()
raise ValueError('Unrecognized named group in
pattern', self.pattern)
return self.pattern.sub(convert, self.template)
###########################################
#########################
# NOTE: Everything below here is deprecated. Use string
methods instead.
# This stuff will go away in Python 3.0.
# Backward compatible names for exceptions