I read this morning a question on Stackoverflow where user tried to solve that text processing challenge with a really complex code.
With only 3 line of code we can do that in C++. Using STL and optionally a BOOST header.
It works in 3 steps:
1) transform to lower case,
2) split using multiple delimiters (boost::any_of),
3) sort the resulting vector.
….
#include <iostream>
#include <string>
#include <vector>
#include <iterator>
#include <algorithm>
using namespace std;
#include "boost/algorithm/string.hpp"
const char* testStr = "This is. a sample piece of, text to illustrate \n this problem.";
int main(int argc, char* argv[])
{
string inputText(testStr);
cout << inputText << endl << "*******************" << endl;
vector<string> strs;
boost::split(strs,boost::algorithm::to_lower_copy(inputText),boost::is_any_of("\t ,\n,,,."),boost::token_compress_on);
cout << "list : " << strs.size() << endl;
cout << "without sort\n", copy(begin(strs), end(strs), ostream_iterator<string>(cout, "\n"));
std::sort(strs.begin(), strs.end());
cout << "with sort\n", copy(begin(strs), end(strs), ostream_iterator<string>(cout, "\n"));
return 0;
}