1 """A collection of string operations (most are no longer used).2
3 Warning: most of the code you see here isn't normally used nowadays.4 Beginning with Python 1.6, many of these functions are implemented as5 methods on the standard string object. They used to be implemented by6 a built-in module called strop, but strop is now obsolete itself.7
8 Public module variables:9
10 whitespace -- a string containing all characters considered whitespace11 lowercase -- a string containing all characters considered lowercase letters12 uppercase -- a string containing all characters considered uppercase letters13 letters -- a string containing all characters considered letters14 digits -- a string containing all characters considered decimal digits15 hexdigits -- a string containing all characters considered hexadecimal digits16 octdigits -- a string containing all characters considered octal digits17 punctuation -- a string containing all characters considered punctuation18 printable -- a string containing all characters considered printable19
20 """
21
22 #Some strings for ctype-style character classification
23 whitespace = '\t\n\r\v\f'
24 lowercase = 'abcdefghijklmnopqrstuvwxyz'
25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
26 letters = lowercase +uppercase27 ascii_lowercase =lowercase28 ascii_uppercase =uppercase29 ascii_letters = ascii_lowercase +ascii_uppercase30 digits = '0123456789'
31 hexdigits = digits + 'abcdef' + 'ABCDEF'
32 octdigits = '01234567'
33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
34 printable = digits + letters + punctuation +whitespace35
36 #Case conversion helpers
37 #Use str to convert Unicode literal in case of -U
38 l = map(chr, xrange(256))39 _idmap = str('').join(l)40 dell41
42 #Functions which aren't available as string methods.
43
44 #Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
45 def capwords(s, sep=None):46 """capwords(s [,sep]) -> string47
48 Split the argument into words using split, capitalize each49 word using capitalize, and join the capitalized words using50 join. If the optional second argument sep is absent or None,51 runs of whitespace characters are replaced by a single space52 and leading and trailing whitespace are removed, otherwise53 sep is used to split and join the words.54
55 """
56 return (sep or ' ').join(x.capitalize() for x ins.split(sep))57
58
59 #Construct a translation string
60 _idmapL =None61 defmaketrans(fromstr, tostr):62 """maketrans(frm, to) -> string63
64 Return a translation table (a string of 256 bytes long)65 suitable for use in string.translate. The strings frm and to66 must be of the same length.67
68 """
69 if len(fromstr) !=len(tostr):70 raise ValueError, "maketrans arguments must have same length"
71 global_idmapL72 if not_idmapL:73 _idmapL =list(_idmap)74 L =_idmapL[:]75 fromstr =map(ord, fromstr)76 for i inrange(len(fromstr)):77 L[fromstr[i]] =tostr[i]78 return ''.join(L)79
80
81
82 ####################################################################
83 importre as _re84
85 class_multimap:86 """Helper class for combining multiple mappings.87
88 Used by .{safe_,}substitute() to combine the mapping and keyword89 arguments.90 """
91 def __init__(self, primary, secondary):92 self._primary =primary93 self._secondary =secondary94
95 def __getitem__(self, key):96 try:97 returnself._primary[key]98 exceptKeyError:99 returnself._secondary[key]100
101
102 class_TemplateMetaclass(type):103 pattern = r"""
104 %(delim)s(?:105 (?P%(delim)s) | # Escape sequence of two delimiters106 (?P%(id)s) | # delimiter and a Python identifier107 {(?P%(id)s)} | # delimiter and a braced identifier108 (?P) # Other ill-formed delimiter exprs109 )110 """
111
112 def __init__(cls, name, bases, dct):113 super(_TemplateMetaclass, cls).__init__(name, bases, dct)114 if 'pattern' indct:115 pattern =cls.pattern116 else:117 pattern = _TemplateMetaclass.pattern %{118 'delim': _re.escape(cls.delimiter),119 'id': cls.idpattern,120 }121 cls.pattern = _re.compile(pattern, _re.IGNORECASE |_re.VERBOSE)122
123
124 classTemplate:125 """A string class for supporting $-substitutions."""
126 __metaclass__ =_TemplateMetaclass127
128 delimiter = '$'
129 idpattern = r'[_a-z][_a-z0-9]*'
130
131 def __init__(self, template):132 self.template =template133
134 #Search for $$, $identifier, ${identifier}, and any bare $'s
135
136 def_invalid(self, mo):137 i = mo.start('invalid')138 lines =self.template[:i].splitlines(True)139 if notlines:140 colno = 1
141 lineno = 1
142 else:143 colno = i - len(''.join(lines[:-1]))144 lineno =len(lines)145 raise ValueError('Invalid placeholder in string: line %d, col %d' %
146 (lineno, colno))147
148 def substitute(*args, **kws):149 if notargs:150 raise TypeError("descriptor 'substitute' of 'Template' object"
151 "needs an argument")152 self, args = args[0], args[1:] #allow the "self" keyword be passed
153 if len(args) > 1:154 raise TypeError('Too many positional arguments')155 if notargs:156 mapping =kws157 elifkws:158 mapping =_multimap(kws, args[0])159 else:160 mapping =args[0]161 #Helper function for .sub()
162 defconvert(mo):163 #Check the most common path first.
164 named = mo.group('named') or mo.group('braced')165 if named is notNone:166 val =mapping[named]167 #We use this idiom instead of str() because the latter will
168 #fail if val is a Unicode containing non-ASCII characters.
169 return '%s' %(val,)170 if mo.group('escaped') is notNone:171 returnself.delimiter172 if mo.group('invalid') is notNone:173 self._invalid(mo)174 raise ValueError('Unrecognized named group in pattern',175 self.pattern)176 returnself.pattern.sub(convert, self.template)177
178 def safe_substitute(*args, **kws):179 if notargs:180 raise TypeError("descriptor 'safe_substitute' of 'Template' object"
181 "needs an argument")182 self, args = args[0], args[1:] #allow the "self" keyword be passed
183 if len(args) > 1:184 raise TypeError('Too many positional arguments')185 if notargs:186 mapping =kws187 elifkws:188 mapping =_multimap(kws, args[0])189 else:190 mapping =args[0]191 #Helper function for .sub()
192 defconvert(mo):193 named = mo.group('named') or mo.group('braced')194 if named is notNone:195 try:196 #We use this idiom instead of str() because the latter
197 #will fail if val is a Unicode containing non-ASCII
198 return '%s' %(mapping[named],)199 exceptKeyError:200 returnmo.group()201 if mo.group('escaped') is notNone:202 returnself.delimiter203 if mo.group('invalid') is notNone:204 returnmo.group()205 raise ValueError('Unrecognized named group in pattern',206 self.pattern)207 returnself.pattern.sub(convert, self.template)208
209
210
211 ####################################################################
212 #NOTE: Everything below here is deprecated. Use string methods instead.
213 #This stuff will go away in Python 3.0.
214
215 #Backward compatible names for exceptions
216 index_error =ValueError217 atoi_error =ValueError218 atof_error =ValueError219 atol_error =ValueError220
221 #convert UPPER CASE letters to lower case
222 deflower(s):223 """lower(s) -> string224
225 Return a copy of the string s converted to lowercase.226
227 """
228 returns.lower()229
230 #Convert lower case letters to UPPER CASE
231 defupper(s):232 """upper(s) -> string233
234 Return a copy of the string s converted to uppercase.235
236 """
237 returns.upper()238
239 #Swap lower case letters and UPPER CASE
240 defswapcase(s):241 """swapcase(s) -> string242
243 Return a copy of the string s with upper case characters244 converted to lowercase and vice versa.245
246 """
247 returns.swapcase()248
249 #Strip leading and trailing tabs and spaces
250 def strip(s, chars=None):251 """strip(s [,chars]) -> string252
253 Return a copy of the string s with leading and trailing254 whitespace removed.255 If chars is given and not None, remove characters in chars instead.256 If chars is unicode, S will be converted to unicode before stripping.257
258 """
259 returns.strip(chars)260
261 #Strip leading tabs and spaces
262 def lstrip(s, chars=None):263 """lstrip(s [,chars]) -> string264
265 Return a copy of the string s with leading whitespace removed.266 If chars is given and not None, remove characters in chars instead.267
268 """
269 returns.lstrip(chars)270
271 #Strip trailing tabs and spaces
272 def rstrip(s, chars=None):273 """rstrip(s [,chars]) -> string274
275 Return a copy of the string s with trailing whitespace removed.276 If chars is given and not None, remove characters in chars instead.277
278 """
279 returns.rstrip(chars)280
281
282 #Split a string into a list of space/tab-separated words
283 def split(s, sep=None, maxsplit=-1):284 """split(s [,sep [,maxsplit]]) -> list of strings285
286 Return a list of the words in the string s, using sep as the287 delimiter string. If maxsplit is given, splits at no more than288 maxsplit places (resulting in at most maxsplit+1 words). If sep289 is not specified or is None, any whitespace string is a separator.290
291 (split and splitfields are synonymous)292
293 """
294 returns.split(sep, maxsplit)295 splitfields =split296
297 #Split a string into a list of space/tab-separated words
298 def rsplit(s, sep=None, maxsplit=-1):299 """rsplit(s [,sep [,maxsplit]]) -> list of strings300
301 Return a list of the words in the string s, using sep as the302 delimiter string, starting at the end of the string and working303 to the front. If maxsplit is given, at most maxsplit splits are304 done. If sep is not specified or is None, any whitespace string305 is a separator.306 """
307 returns.rsplit(sep, maxsplit)308
309 #Join fields with optional separator
310 def join(words, sep = ' '):311 """join(list [,sep]) -> string312
313 Return a string composed of the words in list, with314 intervening occurrences of sep. The default separator is a315 single space.316
317 (joinfields and join are synonymous)318
319 """
320 returnsep.join(words)321 joinfields =join322
323 #Find substring, raise exception if not found
324 def index(s, *args):325 """index(s, sub [,start [,end]]) -> int326
327 Like find but raises ValueError when the substring is not found.328
329 """
330 return s.index(*args)331
332 #Find last substring, raise exception if not found
333 def rindex(s, *args):334 """rindex(s, sub [,start [,end]]) -> int335
336 Like rfind but raises ValueError when the substring is not found.337
338 """
339 return s.rindex(*args)340
341 #Count non-overlapping occurrences of substring
342 def count(s, *args):343 """count(s, sub[, start[,end]]) -> int344
345 Return the number of occurrences of substring sub in string346 s[start:end]. Optional arguments start and end are347 interpreted as in slice notation.348
349 """
350 return s.count(*args)351
352 #Find substring, return -1 if not found
353 def find(s, *args):354 """find(s, sub [,start [,end]]) -> in355
356 Return the lowest index in s where substring sub is found,357 such that sub is contained within s[start,end]. Optional358 arguments start and end are interpreted as in slice notation.359
360 Return -1 on failure.361
362 """
363 return s.find(*args)364
365 #Find last substring, return -1 if not found
366 def rfind(s, *args):367 """rfind(s, sub [,start [,end]]) -> int368
369 Return the highest index in s where substring sub is found,370 such that sub is contained within s[start,end]. Optional371 arguments start and end are interpreted as in slice notation.372
373 Return -1 on failure.374
375 """
376 return s.rfind(*args)377
378 #for a bit of speed
379 _float =float380 _int =int381 _long =long382
383 #Convert string to float
384 defatof(s):385 """atof(s) -> float386
387 Return the floating point number represented by the string s.388
389 """
390 return_float(s)391
392
393 #Convert string to integer
394 def atoi(s , base=10):395 """atoi(s [,base]) -> int396
397 Return the integer represented by the string s in the given398 base, which defaults to 10. The string s must consist of one399 or more digits, possibly preceded by a sign. If base is 0, it400 is chosen from the leading characters of s, 0 for octal, 0x or401 0X for hexadecimal. If base is 16, a preceding 0x or 0X is402 accepted.403
404 """
405 return_int(s, base)406
407
408 #Convert string to long integer
409 def atol(s, base=10):410 """atol(s [,base]) -> long411
412 Return the long integer represented by the string s in the413 given base, which defaults to 10. The string s must consist414 of one or more digits, possibly preceded by a sign. If base415 is 0, it is chosen from the leading characters of s, 0 for416 octal, 0x or 0X for hexadecimal. If base is 16, a preceding417 0x or 0X is accepted. A trailing L or l is not accepted,418 unless base is 0.419
420 """
421 return_long(s, base)422
423
424 #Left-justify a string
425 def ljust(s, width, *args):426 """ljust(s, width[, fillchar]) -> string427
428 Return a left-justified version of s, in a field of the429 specified width, padded with spaces as needed. The string is430 never truncated. If specified the fillchar is used instead of spaces.431
432 """
433 return s.ljust(width, *args)434
435 #Right-justify a string
436 def rjust(s, width, *args):437 """rjust(s, width[, fillchar]) -> string438
439 Return a right-justified version of s, in a field of the440 specified width, padded with spaces as needed. The string is441 never truncated. If specified the fillchar is used instead of spaces.442
443 """
444 return s.rjust(width, *args)445
446 #Center a string
447 def center(s, width, *args):448 """center(s, width[, fillchar]) -> string449
450 Return a center version of s, in a field of the specified451 width. padded with spaces as needed. The string is never452 truncated. If specified the fillchar is used instead of spaces.453
454 """
455 return s.center(width, *args)456
457 #Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
458 #Decadent feature: the argument may be a string or a number
459 #(Use of this is deprecated; it should be a string as with ljust c.s.)
460 defzfill(x, width):461 """zfill(x, width) -> string462
463 Pad a numeric string x with zeros on the left, to fill a field464 of the specified width. The string x is never truncated.465
466 """
467 if notisinstance(x, basestring):468 x =repr(x)469 returnx.zfill(width)470
471 #Expand tabs in a string.
472 #Doesn't take non-printing chars into account, but does understand \n.
473 def expandtabs(s, tabsize=8):474 """expandtabs(s [,tabsize]) -> string475
476 Return a copy of the string s with all tab characters replaced477 by the appropriate number of spaces, depending on the current478 column, and the tabsize (default 8).479
480 """
481 returns.expandtabs(tabsize)482
483 #Character translation through look-up table.
484 def translate(s, table, deletions=""):485 """translate(s,table [,deletions]) -> string486
487 Return a copy of the string s, where all characters occurring488 in the optional argument deletions are removed, and the489 remaining characters have been mapped through the given490 translation table, which must be a string of length 256. The491 deletions argument is not allowed for Unicode strings.492
493 """
494 if deletions or table isNone:495 returns.translate(table, deletions)496 else:497 #Add s[:0] so that if s is Unicode and table is an 8-bit string,
498 #table is converted to Unicode. This means that table *cannot*
499 #be a dictionary -- for that feature, use u.translate() directly.
500 return s.translate(table +s[:0])501
502 #Capitalize a string, e.g. "aBc dEf" -> "Abc def".
503 defcapitalize(s):504 """capitalize(s) -> string505
506 Return a copy of the string s with only its first character507 capitalized.508
509 """
510 returns.capitalize()511
512 #Substring replacement (global)
513 def replace(s, old, new, maxreplace=-1):514 """replace (str, old, new[, maxreplace]) -> string515
516 Return a copy of string str with all occurrences of substring517 old replaced by new. If the optional argument maxreplace is518 given, only the first maxreplace occurrences are replaced.519
520 """
521 returns.replace(old, new, maxreplace)522
523
524 #Try importing optional built-in module "strop" -- if it exists,
525 #it redefines some string operations that are 100-1000 times faster.
526 #It also defines values for whitespace, lowercase and uppercase
527 #that match 's definitions.
528
529 try:530 from strop importmaketrans, lowercase, uppercase, whitespace531 letters = lowercase +uppercase532 exceptImportError:533 pass #Use the original versions
534
535 ########################################################################
536 #the Formatter class
537 #see PEP 3101 for details and purpose of this class
538
539 #The hard parts are reused from the C implementation. They're exposed as "_"
540 #prefixed methods of str and unicode.
541
542 #The overall parser is implemented in str._formatter_parser.
543 #The field name parser is implemented in str._formatter_field_name_split
544
545 classFormatter(object):546 def format(*args, **kwargs):547 if notargs:548 raise TypeError("descriptor 'format' of 'Formatter' object"
549 "needs an argument")550 self, args = args[0], args[1:] #allow the "self" keyword be passed
551 try:552 format_string, args = args[0], args[1:] #allow the "format_string" keyword be passed
553 exceptIndexError:554 if 'format_string' inkwargs:555 format_string = kwargs.pop('format_string')556 else:557 raise TypeError("format() missing 1 required positional"
558 "argument: 'format_string'")559 returnself.vformat(format_string, args, kwargs)560
561 defvformat(self, format_string, args, kwargs):562 used_args =set()563 result = self._vformat(format_string, args, kwargs, used_args, 2)564 self.check_unused_args(used_args, args, kwargs)565 returnresult566
567 def_vformat(self, format_string, args, kwargs, used_args, recursion_depth):568 if recursion_depth <0:569 raise ValueError('Max string recursion exceeded')570 result =[]571 for literal_text, field_name, format_spec, conversion in\572 self.parse(format_string):573
574 #output the literal text
575 ifliteral_text:576 result.append(literal_text)577
578 #if there's a field, output it
579 if field_name is notNone:580 #this is some markup, find the object and do
581 #the formatting
582
583 #given the field_name, find the object it references
584 #and the argument it came from
585 obj, arg_used =self.get_field(field_name, args, kwargs)586 used_args.add(arg_used)587
588 #do any conversion on the resulting object
589 obj =self.convert_field(obj, conversion)590
591 #expand the format spec, if needed
592 format_spec =self._vformat(format_spec, args, kwargs,593 used_args, recursion_depth-1)594
595 #format the object and append to the result
596 result.append(self.format_field(obj, format_spec))597
598 return ''.join(result)599
600
601 defget_value(self, key, args, kwargs):602 ifisinstance(key, (int, long)):603 returnargs[key]604 else:605 returnkwargs[key]606
607
608 defcheck_unused_args(self, used_args, args, kwargs):609 pass
610
611
612 defformat_field(self, value, format_spec):613 returnformat(value, format_spec)614
615
616 defconvert_field(self, value, conversion):617 #do any conversion on the resulting object
618 if conversion isNone:619 returnvalue620 elif conversion == 's':621 returnstr(value)622 elif conversion == 'r':623 returnrepr(value)624 raise ValueError("Unknown conversion specifier {0!s}".format(conversion))625
626
627 #returns an iterable that contains tuples of the form:
628 #(literal_text, field_name, format_spec, conversion)
629 #literal_text can be zero length
630 #field_name can be None, in which case there's no
631 #object to format and output
632 #if field_name is not None, it is looked up, formatted
633 #with format_spec and conversion and then used
634 defparse(self, format_string):635 returnformat_string._formatter_parser()636
637
638 #given a field_name, find the object it references.
639 #field_name: the field being looked up, e.g. "0.name"
640 #or "lookup[3]"
641 #used_args: a set of which args have been used
642 #args, kwargs: as passed in to vformat
643 defget_field(self, field_name, args, kwargs):644 first, rest =field_name._formatter_field_name_split()645
646 obj =self.get_value(first, args, kwargs)647
648 #loop through the rest of the field_name, doing
649 #getattr or getitem as needed
650 for is_attr, i inrest:651 ifis_attr:652 obj =getattr(obj, i)653 else:654 obj =obj[i]655
656 return obj, first