标准C++字符串string以及MFC6.0字符串CString的tokenize和split函数

标准字符串的方法:

 1 /******************************************** 

 2   

 3   the tokenize function for std::string 

 4   

 5 *********************************************/

 6 #include <string> 

 7 #include <vector> 

 8 #include <iostream> 

 9 using namespace std; 

10   

11 typedef basic_string<char>::size_type S_T; 

12 static const S_T npos = -1; 

13   

14 ////trim指示是否保留空串,默认为保留。 

15 vector<string> tokenize(const string& src, string tok,  bool trim=false, string null_subst="") 

16 { 

17  if( src.empty() || tok.empty() ) throw "tokenize: empty string\0"; 

18     

19  vector<string> v; 

20  S_T pre_index = 0, index = 0, len = 0; 

21  while( (index = src.find_first_of(tok, pre_index)) != npos ) 

22  { 

23   if( (len = index-pre_index)!=0 ) 

24    v.push_back(src.substr(pre_index, len)); 

25   else if(trim==false) 

26    v.push_back(null_subst); 

27   pre_index = index+1; 

28  } 

29  string endstr = src.substr(pre_index); 

30  if( trim==false ) v.push_back( endstr.empty()? null_subst:endstr ); 

31  else if( !endstr.empty() ) v.push_back(endstr); 

32  return v; 

33 } 

34   

35 ////使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项,即严格分割。 

36 vector<string> split(const string& src, string delimit, string null_subst="") 

37 { 

38  if( src.empty() || delimit.empty() ) throw "split: empty string\0"; 

39   

40  vector<string> v; 

41  S_T deli_len = delimit.size(); 

42  long index = npos, last_search_position = 0; 

43  while( (index=src.find(delimit, last_search_position))!=npos ) 

44  { 

45   if(index==last_search_position) 

46    v.push_back(null_subst); 

47   else

48    v.push_back( src.substr(last_search_position, index-last_search_position) ); 

49   last_search_position = index + deli_len; 

50  } 

51  string last_one = src.substr(last_search_position); 

52  v.push_back( last_one.empty()? null_subst:last_one ); 

53  return v; 

54 } 

55   

56 // test 

57 int main(void) 

58 { 

59  string src = ",ab,cde;,,fg,," ; 

60  string tok = ",;" ; 

61   

62  vector<string> v1 = tokenize(src, tok ,true); 

63  vector<string> v2 = tokenize(src, tok ,false, "<null>"); 

64   

65  cout<<"-------------v1:"<<endl; 

66  for(int i=0; i<v1.size();i++) 

67  { 

68   cout<<v1[i].c_str()<<endl; 

69  } 

70     

71  cout<<"-------------v2:"<<endl; 

72  for(int j=0; j<v2.size();j++) 

73  { 

74   cout<<v2[j].c_str()<<endl; 

75  } 

76   

77  try{ 

78     

79   string s = "######123#4###56########789###"; 

80   string del = "";//"###"; 

81   vector<string> v3 = split(s, del, "<null>"); 

82   cout<<"-------------v3:"<<endl; 

83   for(int k=0; k<v3.size();k++) 

84   { 

85    cout<<v3[k].c_str()<<endl; 

86   } 

87  } 

88  catch (char *s) { 

89   cout<<s<<endl; 

90  } 

91   

92  return 0; 

93 }

CString的方法:

  1 #include <stdio.h> 

  2 #include <afx.h> 

  3   

  4 /* 

  5  * 该函数用delimits里的字符拆分s,传出一个CStringList指针pList, 

  6  * 若trim为真,则不保留分割后的空串(注意不是空白字符)。比如: 

  7  * Tokenize( "a,bc;,d,", ",;", &out_list, TRUE) 

  8  * 会返回3个串:a、bc、d。 

  9  * 若trim为FALSE,则用nullSubst用来替代分割后的空串,比如: 

 10  *  Tokenize( "a,bc;,d;", ",;", &out_list, FALSE,"[null]" ) 

 11  * 会返回5个串:a、bc、[null]、d、[null]。 

 12  * trim默认为FALSE,nullSubst默认为空串。 

 13  */

 14 void Tokenize(CString s, CString delimits, CStringList* pList, BOOL trim=FALSE, CString nullSubst="") 

 15 { 

 16  ASSERT( !s.IsEmpty() && !delimits.IsEmpty() ); 

 17   

 18  s += delimits[0]; 

 19  for( long index=-1; (index=s.FindOneOf((LPCTSTR)delimits))!=-1; ) 

 20  { 

 21   if(index != 0) pList->AddTail( s.Left(index) ); 

 22   else if(!trim) pList->AddTail(nullSubst); 

 23   s = s.Right(s.GetLength()-index-1); 

 24  } 

 25 } 

 26   

 27   

 28 /*  

 29  * 类似java字符串的split()方法。 

 30  * 使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项, 

 31  * 即严格分割。num用来确定最多分割为多少个串,如果是0(默认),则按照delimit 

 32  * 分割,若为1,则返回源串。 

 33  */

 34 void Split(const CString& src, CString delimit, CStringList* pOutList, int num=0, CString nullSubst="") 

 35 { 

 36  ASSERT( !src.IsEmpty() && !delimit.IsEmpty() ); 

 37  if(num==1)  

 38  { 

 39   pOutList->AddTail(src); 

 40   return; 

 41  } 

 42   

 43  int deliLen = delimit.GetLength(); 

 44  long index = -1, lastSearchPosition = 0, cnt = 0; 

 45   

 46  while( (index=src.Find(delimit, lastSearchPosition))!=-1 ) 

 47  { 

 48   if(index==lastSearchPosition) 

 49    pOutList->AddTail(nullSubst); 

 50   else

 51    pOutList->AddTail(src.Mid(lastSearchPosition, index-lastSearchPosition)); 

 52   lastSearchPosition = index + deliLen; 

 53   

 54   if(num) 

 55   { 

 56    ++cnt; 

 57    if(cnt+1==num) break; 

 58   } 

 59  } 

 60  CString lastOne = src.Mid(lastSearchPosition); 

 61  pOutList->AddTail( lastOne.IsEmpty()? nullSubst:lastOne); 

 62 } 

 63   

 64   

 65   

 66 // test 

 67 int main(void) 

 68 { 

 69  CString s = ",ab;cde,f,,;gh,,"; 

 70  CString sub = ",;"; 

 71  CStringList list1,list2; 

 72   

 73     

 74  Tokenize(s,sub,&list1,TRUE,"no use"); // <----- 

 75  printf("-------[Tokenize_trim]-------\n"); 

 76  POSITION pos1 = list1.GetHeadPosition(); 

 77  while( pos1!= NULL ) 

 78  { 

 79   printf( list1.GetNext(pos1) ); 

 80   printf("\n"); 

 81  } 

 82  Tokenize(s,sub,&list2,FALSE,"[null]"); // <----- 

 83  printf("------[Tokenize_no_trim]-----\n"); 

 84  POSITION pos2 = list2.GetHeadPosition(); 

 85  while( pos2!= NULL ) 

 86  { 

 87   printf( list2.GetNext(pos2) ); 

 88   printf("\n"); 

 89  } 

 90     

 91  CStringList list3; 

 92  s = "###0123###567######89###1000###"; 

 93  sub = "###"; 

 94  Split(s,sub,&list3, 3, "<null>"); // <----- 

 95  printf("------[Split]-----\n"); 

 96  POSITION pos3 = list3.GetHeadPosition(); 

 97  while( pos3!= NULL ) 

 98  { 

 99   printf( list3.GetNext(pos3) ); 

100   printf("\n"); 

101  } 

102  return 0;  

103 }

 

你可能感兴趣的:(String)