Monday, 30 April 2012

String Tokenizer in C++


#include <string>
#include <iostream>

using namespace std;

class ST {

public:

   ST(const string& s, const char* d = NULL) :
      str_(s), count_(-1), begin_(0), end_(0) {

      if (!d)
         delim_ = " \f\n\r\t\v"; 
      else
         delim_ = d;

      begin_ = str_.find_first_not_of(delim_);
      end_ = str_.find_first_of(delim_, begin_);
   }

   size_t countTokens( ) {
     if (count_ >= 0) 
       return(count_);

     string::size_type n = 0;
     string::size_type i = 0;

     for (;;) {
        if ((i = str_.find_first_not_of(delim_, i)) == string::npos)
           break;
        i = str_.find_first_of(delim_, i+1);
        n++;
        if (i == string::npos)
          break;
     }
     return (count_ = n);
   }
   bool hasMoreTokens( ) {return(begin_ != end_);}
   void nextToken(string& s) {
     if (begin_ != string::npos && end_ != string::npos) {
        s = str_.substr(begin_, end_-begin_);
        begin_ = str_.find_first_not_of(delim_, end_);
        end_ = str_.find_first_of(delim_, begin_);
     }
     else if (begin_ != string::npos && end_ == string::npos)
     {
        s = str_.substr(begin_, str_.length( )-begin_);
        begin_ = str_.find_first_not_of(delim_, end_);
     }

   }

private:
   ST( ) {};
   string delim_;
   string str_;
   int count_;
   int begin_;
   int end_;
};

int main( ) {
   string s = " razzle dazzle giddyup ";
   string tmp;

   ST st(s);

   cout << "there are " << st.countTokens( ) << " tokens.\n";
   while (st.hasMoreTokens( )) {
      st.nextToken(tmp);
      cout << "token = " << tmp << '\n';
   }
}

No comments:

Post a Comment