从字符串中提取单词、从字符串中提取汉字的函数



{从字符串中提取单词的函数}

procedure StrToWordList(str: string; var List: TStringList);

var

  p: PChar;

  i: Integer;

begin

  if List = nil then List := TStringList.Create;

  List.Clear;

  {去除重复}

  List.Sorted := True;

  List.Duplicates := dupIgnore;

  

  p := PChar(str);



  {把单词以外的字符转为空格, 并把大写字母转小写}

  while p^ <> #0 do

  begin

    case p^ of

      'A'..'Z': p^ := Chr(Ord(p^) + 32);

      'a'..'z', '0'..'9', '''', '-': ;

      else p^ := #32;

    end;

    Inc(p);

  end;



  {用空格分离单词到列表}

  List.Delimiter := #32;

  List.DelimitedText := str;



  {单词的开头应该是字母, 去除其他}

  for i := List.Count - 1 downto 0 do

  begin

    if CharInSet(List[i][1], ['0'..'9', '-', '''']) then

    List.Delete(i);

  end;

end;



{从字符串中提取汉字的函数}

procedure StrToHanZiList(str: string; var List: TStringList);

var

  p: PWideChar;

begin

  if List = nil then List := TStringList.Create;

  List.Clear;

  {去除重复}

  List.Sorted := True;

  List.Duplicates := dupIgnore;

  

  p := PWideChar(str);

  while p^ <> #0 do

  begin

    case p^ of

      #$4E00..#$9FA5: List.Add(p^);

    end;

    Inc(p);

  end;

end;


 
   

你可能感兴趣的:(字符串)