A Simple C++ Template Class that Matches a String to a Wildcard Pattern

A recently implemented enhanced wildcard string matcher, features of which including,

  • Supporting wildcard character '*' for matching zero or more characters
  • Supporting wildcard character '?' for matching exactly one character
  • Supporting parentheses '(' and ')' for referencing the matches
  • Supporting escape character (back-slash)

C++ features demonstrated by this implementation,

  • Functors with a consideration of possible function pointers/user instantiated functors with user data
  • Specialized templates
  • Template rebinding

The implementation is maintained as part of the ongoing project of quanben's C++ template library qcpplib publicly on github at https://github.com/lincolnyu/qcpplib/

The current snapshot of the code is following,

  1 //

  2 //  qcpplib v1.00

  3 //  quanben's C++ template library

  4 //  

  5 //  Author Lincoln Yu

  6 //

  7 //  [email protected]

  8 //  https://github.com/lincolnyu/qcpplib

  9 //

 10 //  The MIT License (MIT)

 11 // 

 12 //  Copyright (c) <year> <copyright holders>

 13 // 

 14 //  Permission is hereby granted, free of charge, to any person obtaining a copy

 15 //  of this software and associated documentation files (the "Software"), to deal

 16 //  in the Software without restriction, including without limitation the rights

 17 //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

 18 //  copies of the Software, and to permit persons to whom the Software is

 19 //  furnished to do so, subject to the following conditions:

 20 // 

 21 //  The above copyright notice and this permission notice shall be included in

 22 //  all copies or substantial portions of the Software.

 23 // 

 24 //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 25 //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 26 //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 27 //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 28 //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 29 //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

 30 //  THE SOFTWARE.

 31 //

 32 

 33 #if !defined (_WILDCARD_H_)

 34 #define _WILDCARD_H_

 35 

 36 #include <map>

 37 #include <vector>

 38 #include <cstring>

 39 #include <string>

 40 

 41 /// @brief Contains class definitions that deal with wildcard matching

 42 namespace Qtl { namespace String { namespace Wildcard {

 43 

 44     /// @brief An implementation of the functor that returns the length of string whose iterators are applicable 

 45     ///        to subtract operator

 46     template <class TStringRef, class TSubtractableIter>

 47     struct CharDistFunctorIndexed

 48     {

 49         size_t operator()(TStringRef str, TSubtractableIter iterBegin, TSubtractableIter iterEnd)

 50         {

 51             return (iterEnd-iterBegin);

 52         }

 53     };

 54     

 55     /// @brief An implementation of the functor that returns the position of the first character for character-based 

 56     ///        zero-terminated string

 57     struct StringBeginFunctorPsz

 58     {

 59         char * operator()(char *str)

 60         {

 61             return (str);

 62         }

 63     };

 64 

 65     /// @brief An implementation of the functor that returns the position of the first character for a std::string

 66     struct StringBeginFunctorStdStr

 67     {

 68         std::string::const_iterator operator()(const std::string& str)

 69         {

 70             return str.begin();

 71         }

 72     };

 73     

 74     /// @brief An implementation of the functor that determines if the position is at the end of a character-based

 75     ///        zero-terminated string

 76     struct StringEndFunctorPsz

 77     {

 78         bool operator()(char *iter, char *str)

 79         {

 80             return (*iter == 0);

 81         }

 82     };

 83 

 84     /// @brief An implementation of the functor that determines if the position is at the end of a std::string

 85     struct StringEndFunctorStdStr

 86     {

 87         bool operator()(std::string::const_iterator iter, const std::string& str)

 88         {

 89             return (iter==str.end());

 90         }

 91     };

 92 

 93     /// @brief An implementation of the functor that appends a character to a character-based zero-terminated string

 94     struct AppendCharFunctorPsz

 95     {

 96         void operator()(char *str, char *&iter, char ch)

 97         {

 98             *iter++ = ch;

 99         }

100     };

101 

102     /// @brief An implementation of the functor that appends a character to a std::string

103     struct AppendCharFunctorStdStr

104     {

105         void operator()(std::string &str, std::string::iterator &iter, char ch)

106         {

107             str.push_back(ch);

108         }

109     };

110 

111     /// @brief The default class that provides string functors

112     struct DefaultStringFunctorSelector

113     {

114         /// @brief The generic rebinder

115         template <class TStringRef, class TCharIter>

116         struct rebind

117         {

118             // unimplemented, compiler error occurs if getting here

119         };

120         

121         /// @brief The rebinder to the character array based string functors

122         template <>

123         struct rebind<char*,char*>

124         {

125             typedef StringBeginFunctorPsz                StringBeginFunctor;

126             typedef StringEndFunctorPsz                StringEndFunctor;

127             typedef CharDistFunctorIndexed<char*,char*>    CharDistFunctor;

128         };

129 

130         /// @brief The rebinder to the std::string based string functors

131         template <>

132         struct rebind<const std::string&, std::string::const_iterator>

133         {

134             typedef StringBeginFunctorStdStr            StringBeginFunctor;

135             typedef StringEndFunctorStdStr                StringEndFunctor;

136             typedef CharDistFunctorIndexed<std::string::const_iterator, std::string::const_iterator>    CharDistFunctor;

137         };

138     };

139 

140     /// @brief The default class that provides functor that appends character to string

141     struct DefaultAppendCharFunctorSelector

142     {

143         /// @brief The generic binder

144         template <class TStringRef, class TCharIter, class TChar>

145         struct rebind

146         {

147             // unimplemented, compiler error occurs if getting here

148         };

149 

150         /// @brief The rebinder to the functor that appends character to character-based zero-terminating string

151         template <>

152         struct rebind<char*, char*, char>

153         {

154             typedef AppendCharFunctorPsz    AppendCharFunctor;

155         };

156 

157         /// @brief The rebinder to the functor that appends character to std::string

158         template <>

159         struct rebind<std::string&, std::string::iterator, char>

160         {

161             typedef AppendCharFunctorStdStr    AppendCharFunctor;

162         };

163     };

164     

165     /// @brief A class that encapsulates a wildcard pattern

166     /// @param TString The type of the pattern string

167     /// @param TStringRef The type of the reference to the pattern string (for efficient parameter passing)

168     /// @param TCharIter The type of the iterator through the characters

169     /// @param TStringBeginFunctor The type of the functor that returns the iterator at the beginning of a string

170     /// @param TStringEndFunctor The type of the functor that determines if the iterator is at the end of a string

171     template <class TString=char*, class TStringRef=char*, class TCharIter=char*,

172         class TStringFunctorSelector=DefaultStringFunctorSelector>

173     class Pattern

174     {

175     public:

176         typedef TStringRef StringRef;

177 

178         /// @brief The type of iterator through the characters in the pattern string

179         typedef TCharIter CharIter;

180         

181         /// @brief The type of the functor that returns the iterator at the beginning of a string

182         typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::StringBeginFunctor    StringBeginFunctor;

183         

184         /// @brief The type of the functor that determines if the iterator is at the end of a string

185         typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::StringEndFunctor    StringEndFunctor;

186 

187         /// @brief The type of the functor that returns the distance between two characters 

188         typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::CharDistFunctor CharDistFunctor;

189 

190     private:

191         /// @brief The pattern string

192         TString                 _pattern;

193         

194         /// @brief The functor that returns the beginning of the string

195         StringBeginFunctor         _getStringBegin;

196         

197         /// @brief The functor that returns if the iterator is at the end of the string

198         StringEndFunctor         _isStringEnd;

199 

200         /// @brief The functor that returns the distance between two characters

201         CharDistFunctor            _getCharDist;

202         

203         /// @brief The look-up table that maps iterator of pattern to the index of match result entry

204         std::map<CharIter, int> _mapIterToIndex;

205 

206     public:

207         // a typical wildcard pattern: 

208         //   a*b?C(*)

209         // 

210         /// @brief Instantiates a pattern with the pattern string and the functors

211         /// @param pattern The pattern string

212         /// @param stringBegin The functor that provides the beginning of the string

213         /// @param stringEnd The functor that determines the end of the string

214         /// @remarks A typical wildcard pattern is like: a*b?C(*)D\)

215         ///          where normal characters (alphanumerics, punctuation etc) expect exact match, asteroids match whatever 

216         ///          string of whatever length, question marks match any single character and an escape character 

217         ///          (back-slash) turns a succeeding special character to a normal matching character.

218         Pattern(TStringRef pattern, StringBeginFunctor stringBegin, StringBeginFunctor stringEnd) 

219             : _pattern(pattern), _getStringBegin(stringBegin), _isStringEnd(stringEnd)

220         {

221             PreProcessParentheses();

222         }

223 

224         /// @brief Instantiates a pattern with the pattern string

225         /// @param pattern The pattern string

226         Pattern(TStringRef pattern) : _pattern(pattern)

227         {

228             PreProcessParentheses();

229         }

230 

231     private:

232         /// @brief Creates the mapping from parenthesis pointer to index from the pattern string

233         void PreProcessParentheses()

234         {

235             _mapIterToIndex.clear();

236             int openingIndex = 0;

237             int closingIndex = 0;

238             for (CharIter iter = GetBegin(); !IsEnd(iter); ++iter)

239             {

240                 if (*iter=='\\')

241                 {

242                     ++iter;    // skip the character that follows

243                 }

244                 else if (*iter == '(')

245                 {

246                     _mapIterToIndex[iter] = closingIndex = openingIndex++;

247                 }

248                 else if (*iter == ')')

249                 {

250                     // NOTE We don't need to differentiate opening and closing parentheses as

251                     //      the matcher has the knowledge of the pattern characters

252                     _mapIterToIndex[iter] = closingIndex--;

253                 }

254             }

255         }

256 

257     public:

258         /// @brief Returns the beginning of the pattern string

259         /// @return The iterator point to the beginning of the pattern string

260         CharIter GetBegin()

261         {

262             return _getStringBegin(_pattern);

263         }

264 

265         /// @brief Determines if the iterator is at the end of the pattern string

266         /// @param The interator in question

267         /// @return true if the iterator is at the beginning of the pattern string

268         bool IsEnd(CharIter iter)

269         {

270             return _isStringEnd(iter, _pattern);

271         }

272 

273         /// @brief Returns the match entry index for the specified parenthesis pointer

274         /// @return The match entry index

275         int PatternIterToIndex(CharIter patternIter)

276         {

277             return _mapIterToIndex[patternIter];

278         }

279 

280         /// @brief Returns the distance between two characters (the number of characters in between plus one)

281         /// @param iterBegin The iterator that points to the character on the left hand

282         /// @param iterEnd The iterator that points to the character on the right hand

283         /// @return The distance

284         size_t GetQuotedLength(CharIter iterBegin, CharIter iterEnd)

285         {

286             return _getCharDist(_pattern, iterBegin, iterEnd);

287         }

288     };

289 

290     /// @brief A class that converts a wildcard pattern to its equivalent regular expression

291     /// @param TPattern The type of the pattern class 

292     /// @param TRegexStringRef The type of the reference to the string for regular expression

293     /// @param TRegexCharIter The iterator through characters in the string for regular expression

294     /// @param TPatternFunctorSelector The functor selector for pattern

295     /// @param TRegexAppendCharFunctorSelector The append-character functor selector for regular expression

296     /// @remarks NOTE TRegexChar has to be compatible with the character type TPattern::CharIter iterates through

297     template <class TPattern=Pattern<>, class TRegexStringRef=char*, class TRegexCharIter=char*, 

298         class TRegexChar=char, class TRegexAppendCharFunctorSelector=DefaultAppendCharFunctorSelector>

299     class WildCardToRegex

300     {

301     public:

302         /// @brief The type of the reference to regular expression string

303         typedef TRegexStringRef        RegexStringRef;

304         /// @brief The type of the iteartor through the characters in the regular expression string

305         typedef TRegexCharIter        RegexCharIter;

306         /// @brief The type of the character that can be append to the regular expression string

307         typedef TRegexChar            RegexChar;

308 

309         /// @brief The type of the reference to the wildcard string

310         typedef typename TPattern::StringRef    PatternStringRef;

311         /// @brief The type of the iterator through the characters in the wildcard string

312         typedef typename TPattern::CharIter        PatternStringIter;

313 

314         /// @brief

315         typedef typename TRegexAppendCharFunctorSelector::template rebind<RegexStringRef, RegexCharIter, RegexChar>::AppendCharFunctor

316             RegexAppendCharFunctor;

317 

318     private:

319         /// @brief The functor that appends character to the regular expression string

320         RegexAppendCharFunctor _regexAppendChar;

321 

322     public:

323         /// @brief Initialises a WildCardToRegex with the specified functor instances

324         /// @param regexAppendChar The functor that appends character to the regular expression string

325         WildCardToRegex(RegexAppendCharFunctor &regexAppendChar)

326             : _regexAppendChar(regexAppendChar)

327         {

328         }

329 

330         /// @brief Initialises a WildCardToRegex with the default settings

331         WildCardToRegex()

332         {

333         }

334 

335     public:

336         /// @brief Converts a wildcard string to its equivalent regular expression

337         /// @remarks This is supposed to comply with the rules set by the regex implementation in QSharp

338         ///          See https://qsharp.codeplex.com/SourceControl/latest#QSharp/QSharp.String.Rex/Creator.cs

339         ///          for more detail. It has yet to be tested though.

340         void Convert(TPattern &pattern, TRegexStringRef regex, TRegexCharIter iterRegex)

341         {

342             for (PatternStringIter iter = pattern.GetBegin(); !pattern.IsEnd(iter); ++iter)

343             {

344                 switch (*iter)

345                 {

346                 case '\\':

347                     _regexAppendChar(regex, iterRegex, *iter);

348                     ++iter;

349                     if (!pattern.IsEnd(iter))

350                     {

351                         _regexAppendChar(regex, iterRegex, *iter);

352                     }

353                     else

354                     {

355                         _regexAppendChar(regex, iterRegex, '\\');

356                     }

357                     break;

358                 case '*':

359                     _regexAppendChar(regex, iterRegex, '.');

360                     _regexAppendChar(regex, iterRegex, *iter);

361                     break;

362                 case '?':

363                     _regexAppendChar(regex, iterRegex, '.');

364                     break;

365                 case '(': case ')':

366                     _regexAppendChar(regex, iterRegex, *iter);

367                     break;

368                 case '[': case ']': case '{': case '}': case '^': case '.': case '-': case '+':

369                     _regexAppendChar(regex, iterRegex, '\\');

370                     _regexAppendChar(regex, iterRegex, *iter);

371                     break;

372                 default:

373                     _regexAppendChar(regex, iterRegex, *iter);

374                     break;

375                 }

376             }

377         }

378     };

379 

380     /// @brief A class that represents a match of quotation enclosed by a pair of parentheses in the pattern

381     /// @param TCharIter The type of iterator through the source string

382     /// @param TDiff The type of a integer number that indicates the length of string or the distance between characters

383     template <class TCharIter=char*, class TDiff=size_t>

384     class MatchQuote

385     {

386     public:

387         /// @brief The type of iterator through the source string

388         typedef TCharIter    CharIter;

389 

390         /// @brief The type of a integer number that indicates the length of string or the distance between characters

391         typedef TDiff        Diff;

392 

393     public:

394         /// @brief The beginning of the substring that matches

395         CharIter Begin;

396 

397         /// @brief The end of the substring that matches

398         CharIter End;

399     };    

400     

401     /// @brief A class that contains all the matched quotations

402     /// @param TCharIter The iterator through the source string

403     /// @param TDiff The type of the integer that indicates a string length or a character distance

404     template <class TCharIter=char*, class TDiff=size_t>

405     class MatchResult

406     {

407     public:

408         /// @brief The iterator through the source string

409         typedef TCharIter        CharIter;

410         /// @brief The type of the integer that indicates a string length or a character distance

411         typedef TDiff            Diff;

412         /// @brief The type of match entries listed in this object

413         typedef MatchQuote<CharIter, Diff>  MatchType;

414 

415     public:

416         /// @brief A list of matched quotation entries

417         std::vector<MatchType> Matches;

418 

419     public:

420         /// @brief Records the beginning of a quotation encountered

421         /// @param index The index of the match entry

422         /// @param iterChar The pointer to the source string where the quotation starts

423         void Open(int index, CharIter iterChar)

424         {

425             while (index >= Matches.size())

426             {

427                 Matches.push_back(MatchType());

428             }

429             Matches[index].Begin = iterChar;

430         }

431         

432         /// @brief Records the end of a quotation encountered

433         /// @param index The index of the match entry

434         /// @param iterChar The pointer to the source string where the quotation ends

435         void Close(int index, CharIter iterChar)

436         {

437             // cell index must have already been allocated in the array of Matches

438             Matches[index].End = iterChar;

439         }

440     };

441 

442     /// @brief A default trait class that provides types needed by Matcher

443     /// @param TChar 

444     template <class TStringRef=char*, class TCharIter=char*, class TDiff=size_t,

445         class TStringFunctorSelector=DefaultStringFunctorSelector>

446     struct MatcherTraits

447     {

448         /// @brief The type of the reference to the source string 

449         typedef TStringRef     StringRef;

450         /// @brief The type of iterator through the characters in the source string

451         typedef TCharIter      CharIter;

452         

453         /// @brief The type of the match result (matched quotation entry container)

454         typedef MatchResult<TCharIter, TDiff>    MatchResultType;

455         /// @brief The type of the reference to the match result

456         typedef MatchResultType &                MatchResultRef;

457         

458         /// @brief The type of the functor that returns the beginning of a string

459         typedef typename TStringFunctorSelector::template rebind<StringRef, CharIter>::StringBeginFunctor    StringBeginFunctor;

460         /// @brief The type of the functor that determines if an iterator is at the end of a string

461         typedef typename TStringFunctorSelector::template rebind<StringRef, CharIter>::StringEndFunctor        StringEndFunctor;

462     };

463     

464     /// @brief A wildcard string matcher

465     template <class Traits = MatcherTraits<>>

466     class Matcher

467     {

468     public:

469         /// @brief The type of the reference to the source string 

470         typedef typename Traits::StringRef            StringRef;

471         /// @brief The type of iterator through the characters in the source string

472         typedef typename Traits::CharIter             CharIter;

473         

474         /// @brief The type of the reference to the match result

475         typedef typename Traits::MatchResultRef        MatchResultRef;

476         

477         /// @brief The type of the functor that returns the beginning of a string

478         typedef typename Traits::StringBeginFunctor    StringBeginFunctor;

479         /// @brief The type of the functor that determines if an iterator is at the end of a string

480         typedef typename Traits::StringEndFunctor    StringEndFunctor;

481 

482     private:

483         /// @brief The functor that returns the beginning of a string

484         StringBeginFunctor     _stringBegin;

485 

486         /// @brief The functor that determines if an iterator is at the end of a string

487         StringEndFunctor    _stringEnd;

488                 

489     public:

490         /// @brief Instantiates a Matcher with the specified string functor instances

491         /// @param stringBegin The functor that returns the beginning of a string

492         /// @param stringEnd The functor that determines if an iterator is at the end of a string

493         Matcher(StringBeginFunctor &stringBegin, StringEndFunctor &stringEnd)

494             : _stringBegin(stringBegin), _stringEnd(stringEnd)

495         {

496         }

497         

498         /// @brief Instantiates a Matcher with default settings

499         Matcher()

500         {

501         }

502         

503     public:

504         /// @brief Match The source to the pattern

505         /// @param source The source string to match

506         /// @param pattern The pattern to match against

507         /// @param matchResult The container of matched quotation entries

508         /// @return true if the matching is successful (the pattern is completely consumed)

509         template <class TPattern>

510         bool Match(StringRef source, TPattern &pattern, MatchResultRef matchResult)

511         {

512             CharIter iterSource = _stringBegin(source);

513             TPattern::CharIter iterPattern = pattern.GetBegin();

514             return Match(source, iterSource, pattern, iterPattern, matchResult);

515         }

516         

517         /// @brief Match the source to the pattern (recursive)

518         /// @param source The source string to match

519         /// @param iterSource The iterator through the source string at its current position

520         /// @param pattern The pattern to match against

521         /// @param iterPattern The iterator through the pattern string at its current position

522         /// @param matchResult The container of matched quotation entries

523         /// @return true if the matching is successful (the pattern is completely consumed)

524         template <class TPattern>

525         bool Match(StringRef source, CharIter &iterSource, TPattern &pattern, typename TPattern::CharIter &iterPattern, 

526             MatchResultRef matchResult)

527         {

528             while (! pattern.IsEnd(iterPattern))

529             {

530                 if (*iterPattern == '\\')

531                 {

532                     ++iterPattern;

533                 }

534                 else if (*iterPattern == '*')

535                 {

536                     CharIter savedIterSource = iterSource;

537                     TPattern::CharIter savedIterPattern = iterPattern;

538                     // greedy strategy

539                     if (!_stringEnd(savedIterSource, source))

540                     {

541                         ++iterSource;

542                         if (Match(source, iterSource, pattern, savedIterPattern, matchResult))

543                         {

544                             return true;

545                         }

546                     }

547                     ++iterPattern;

548                     if (Match(source, savedIterSource, pattern, iterPattern, matchResult))

549                     {

550                         return true;

551                     }

552                     return false;

553                 }

554                 else if (*iterPattern == '?')

555                 {

556                     if (_stringEnd(iterSource, source))

557                     {

558                         return false;

559                     }

560                     ++iterPattern;

561                     ++iterSource;

562                     continue;

563                 }

564                 else if (*iterPattern == '(')

565                 {

566                     int index = pattern.PatternIterToIndex(iterPattern);

567                     matchResult.Open(index, iterSource);

568                     ++iterPattern;

569                     continue;

570                 }

571                 else if (*iterPattern == ')')

572                 {

573                     int index = pattern.PatternIterToIndex(iterPattern);

574                     matchResult.Close(index, iterSource);

575                     ++iterPattern;

576                     continue;

577                 }

578                 

579                 if (!_stringEnd(iterSource, source) && *iterPattern == *iterSource)

580                 {

581                     ++iterPattern;

582                     ++iterSource;

583                 }

584                 else

585                 {

586                     return false;

587                 }

588             }

589             return true;

590         }

591     };

592 }}}

593 

594 #endif

 

 

你可能感兴趣的:(template)