TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm

unit Searches;

(*-----------------------------------------------------------------------------*
|  Components        TSearch & TFileSearch                                     |
|  Version:          2.2                                                       |
|  Last Update:      10 June 2004                                              |
|  Compilers:        Delphi 3 - Delphi 7                                       |
|  Author:           Angus Johnson - angusj-AT-myrealbox-DOT-com               |
|  Copyright:        © 2001 -2004  Angus Johnson                               |
|                                                                              |
|  Description:      Delphi implementation of the                              |
|                    Boyer-Moore-Horspool search algorithm.                    |
*-----------------------------------------------------------------------------*)

//10.06.04: Added support for widestring searches

interface

uses
  windows, sysutils, classes;

type

  TBaseSearch = class(TComponent)
  private
     fPos            : pchar;
     fEnd            : pchar;
     fPattern        : string;
     fPatLen         : integer;
     fPatInitialized : boolean;
     fCaseSensitive  : boolean;
     JumpShift       : integer;
     Shift           : array[#0..#255] of integer;
     CaseBlindTable  : array[#0..#255] of char;
     procedure InitPattern;
     procedure MakeCaseBlindTable;
     procedure SetCaseSensitive(CaseSens: boolean);
     procedure SetPattern(const Pattern: string);
     procedure SetWsPattern(const WsPattern: widestring);
     function  FindCaseSensitive: integer;
     function  FindCaseInsensitive: integer;
  protected
     fStart          : pchar;
     fDataLength     : integer;
     procedure ClearData;
     procedure SetData(Data: pchar; DataLength: integer); virtual;
  public
     constructor Create(aowner: tcomponent); override;
     destructor  Destroy; override;
     //The following Find functions return the 0 based offset of Pattern
     //else POSITION_EOF (-1) if the Pattern is not found  ...
     function  FindFirst: integer;
     function  FindNext: integer;
     function  FindFrom(StartPos: integer): integer;
     //To simplify searching for widestring patterns -
     //assign the WsPattern property instead of the Pattern property
     property WsPattern: widestring write SetWsPattern;
     property Data: pchar read fStart;
     property DataSize: integer read fDataLength;
  published
     property CaseSensitive: boolean read fCaseSensitive write SetCaseSensitive;
     property  Pattern: string read fPattern write SetPattern;
  end;

  TSearch = class(TBaseSearch)
  public
    //Changes visibility of base SetData() method to public ...
    //Note: TSearch does NOT own the data. To avoid the overhead of
    //copying it, it just gets a pointer to it.
    procedure SetData(Data: pchar; DataLength: integer); override;
  end;

  TFileSearch = class(TBaseSearch)
  private
    fFilename: string;
    procedure SetFilename(const Filename: string);
    procedure Closefile;
  public
    destructor Destroy; override;
  published
    //Assigning 'Filename' creates a memory map of the named file.
    //This memory mapping will be closed when either the Filename property is
    //assigned to '' or the FileSearch object is destroyed.
    property Filename: string read fFilename write SetFilename;
  end;

procedure Register;

const
  POSITION_EOF = -1;

implementation

procedure Register;
begin
  RegisterComponents('Samples', [TSearch, TFileSearch]);
end;

//------------------------------------------------------------------------------
// TBaseSearch methods ...
//------------------------------------------------------------------------------

procedure TBaseSearch.MakeCaseBlindTable;
var
  i: char;
begin
  for i:= #0 to #255 do
     CaseBlindTable[i]:= ansilowercase(i)[1];
end;
//------------------------------------------------------------------------------

constructor TBaseSearch.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);
  fStart := nil;
  fPattern := '';
  fPatLen := 0;
  MakeCaseBlindTable;
  fCaseSensitive := false;      //Default to case insensitive searches.
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

destructor TBaseSearch.Destroy;
begin
  ClearData;
  inherited Destroy;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.ClearData;
begin
  fStart := nil;
  fPos := nil;
  fEnd := nil;
  fDataLength := 0;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetPattern(const Pattern: string);
begin
  if fPattern = Pattern then exit;
  fPattern := Pattern;
  fPatLen := length(Pattern);
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetWsPattern(const WsPattern: widestring);
begin
  fPatLen := length(WsPattern)*2;
  fPatInitialized := false;
  if fPatLen = 0 then exit;
  SetString(fPattern, pchar(pointer(WsPattern)), fPatLen);
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetData(Data: pchar; DataLength: integer);
begin
  ClearData;
  if (Data = nil) or (DataLength < 1) then exit;
  fStart := Data;
  fDataLength := DataLength;
  fEnd := fStart + fDataLength;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetCaseSensitive(CaseSens: boolean);
begin
  if fCaseSensitive = CaseSens then exit;
  fCaseSensitive := CaseSens;
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.InitPattern;
var
  j: integer;
  i: char;
begin
  if fPatLen = 0 then exit;
  for i := #0 to #255 do Shift[i]:= fPatLen;
  if fCaseSensitive then
  begin
    for j := 1 to fPatLen-1 do
      Shift[fPattern[j]]:= fPatLen - j;
    JumpShift := Shift[fPattern[fPatLen]];
    Shift[fPattern[fPatLen]] := 0;
  end else
  begin
    for j := 1 to fPatLen-1 do
      Shift[CaseBlindTable[fPattern[j]]]:= fPatLen - j;
    JumpShift := Shift[CaseBlindTable[fPattern[fPatLen]]];
    Shift[CaseBlindTable[fPattern[fPatLen]]] := 0;
  end;
  fPatInitialized := true;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindFirst: integer;
begin
  fPos := fStart+fPatLen-1;
  result := FindNext;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindFrom(StartPos: integer): integer;
begin
  if StartPos < fPatLen-1 then  //ie: StartPos must never be less than fPatLen-1
    fPos := fStart+fPatLen-1 else   
    fPos := fStart+StartPos;
  result := FindNext;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindNext: integer;
begin
  if not fPatInitialized then InitPattern;
  if (fPatLen = 0) or (fPatLen >= fDataLength) or (fPos >= fEnd) then
  begin
     fPos := fEnd;
     result := POSITION_EOF;
     exit;
  end;
  if fCaseSensitive then
    result := FindCaseSensitive else
    result := FindCaseInsensitive;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindCaseSensitive: integer;
var
  i: integer;
  j: pchar;
begin
  result:= POSITION_EOF;
  while fPos < fEnd do
  begin
    i := Shift[fPos^];        //test last character first
    if i <> 0 then            //last char does not match
      inc(fPos,i)
    else
    begin                     //last char matches at least
      i := 1;
      j := fPos - fPatLen;
      while (i < fPatLen) and (fPattern[i] = (j+i)^) do inc(i);
      if (i = fPatLen) then
      begin
         result:= fPos-fStart-fPatLen+1;
         inc(fPos,fPatLen);
         break;               //FOUND!
      end
      else
        inc(fPos,JumpShift);
    end;
  end;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindCaseInsensitive: integer;
var
  i: integer;
  j: pchar;
begin
  result:= POSITION_EOF;
  while fPos < fEnd do
  begin
    i := Shift[CaseBlindTable[fPos^]];   //test last character first
    if i <> 0 then                       //last char does not match
      inc(fPos,i)
    else
    begin                                //last char matches at least
      i := 1;
      j := fPos - fPatLen;
      while (i < fPatLen) and
            (CaseBlindTable[fPattern[i]] = CaseBlindTable[(j+i)^]) do inc(i);
      if (i = fPatLen) then
      begin
         result:= fPos-fStart-fPatLen+1;
         inc(fPos,fPatLen);
         break;                          //FOUND!
      end
      else
        inc(fPos,JumpShift);
    end;
  end;
end;

//------------------------------------------------------------------------------
// TSearch methods ...
//------------------------------------------------------------------------------

procedure TSearch.SetData(Data: pchar; DataLength: integer);
begin
  inherited; //changes visibility of base method from protected to public
end;

//------------------------------------------------------------------------------
// TFileSearch methods ...
//------------------------------------------------------------------------------

destructor TFileSearch.Destroy;
begin
  CloseFile;
  inherited Destroy;
end;
//------------------------------------------------------------------------------

procedure TFileSearch.SetFilename(const Filename: string);
var
   filehandle: integer;
   filemappinghandle: thandle;
   size, highsize: integer;
begin
  if (csDesigning in ComponentState) then
  begin
    fFilename := Filename;
    exit;
  end;
  CloseFile;
  if (Filename = '') or not FileExists(Filename) then exit;
  filehandle := sysutils.FileOpen(Filename, fmopenread or fmsharedenynone);
  if filehandle = 0 then exit;                //error
  size := GetFileSize(filehandle, @highsize);
  if (size <= 0) or (highsize <> 0) then      //nb: files >2 gig not supported
  begin
     CloseHandle(filehandle);
     exit;
  end;
  filemappinghandle :=
    CreateFileMapping(filehandle, nil, page_readonly, 0, 0, nil);
  if GetLastError = error_already_exists then filemappinghandle := 0;
  if filemappinghandle <> 0 then
    SetData(MapViewOfFile(filemappinghandle,file_map_read,0,0,0),size);
  if fStart <> nil then fFilename := Filename;
  CloseHandle(filemappinghandle);
  CloseHandle(filehandle);
end;
//------------------------------------------------------------------------------

procedure TFileSearch.CloseFile;
begin
   if (csDesigning in ComponentState) then exit;
   if (fStart <> nil) then UnmapViewOfFile(fStart);
   fFilename := '';
   ClearData;
end;
//------------------------------------------------------------------------------

end.

 

你可能感兴趣的:(Algorithm)