| 1 | // (C) Copyright Jeremy Siek 2004 |
|---|
| 2 | // Distributed under the Boost Software License, Version 1.0. (See |
|---|
| 3 | // accompanying file LICENSE_1_0.txt or copy at |
|---|
| 4 | // http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 5 | |
|---|
| 6 | /* |
|---|
| 7 | * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3. |
|---|
| 8 | * |
|---|
| 9 | * Template function looks like this: |
|---|
| 10 | * |
|---|
| 11 | * template <typename Container> |
|---|
| 12 | * void stringtok (Container &l, |
|---|
| 13 | * string const &s, |
|---|
| 14 | * char const * const ws = " \t\n"); |
|---|
| 15 | * |
|---|
| 16 | * A nondestructive version of strtok() that handles its own memory and can |
|---|
| 17 | * be broken up by any character(s). Does all the work at once rather than |
|---|
| 18 | * in an invocation loop like strtok() requires. |
|---|
| 19 | * |
|---|
| 20 | * Container is any type that supports push_back(a_string), although using |
|---|
| 21 | * list<string> and deque<string> are indicated due to their O(1) push_back. |
|---|
| 22 | * (I prefer deque<> because op[]/at() is available as well.) The first |
|---|
| 23 | * parameter references an existing Container. |
|---|
| 24 | * |
|---|
| 25 | * s is the string to be tokenized. From the parameter declaration, it can |
|---|
| 26 | * be seen that s is not affected. Since references-to-const may refer to |
|---|
| 27 | * temporaries, you could use stringtok(some_container, readline("")) when |
|---|
| 28 | * using the GNU readline library. |
|---|
| 29 | * |
|---|
| 30 | * The final parameter is an array of characters that serve as whitespace. |
|---|
| 31 | * Whitespace characters default to one or more of tab, space, and newline, |
|---|
| 32 | * in any combination. |
|---|
| 33 | * |
|---|
| 34 | * 'l' need not be empty on entry. On return, 'l' will have the token |
|---|
| 35 | * strings appended. |
|---|
| 36 | * |
|---|
| 37 | * |
|---|
| 38 | * [Example: |
|---|
| 39 | * list<string> ls; |
|---|
| 40 | * stringtok (ls, " this \t is\t\n a test "); |
|---|
| 41 | * for (list<string>::const_iterator i = ls.begin(); |
|---|
| 42 | * i != ls.end(); ++i) |
|---|
| 43 | * { |
|---|
| 44 | * cerr << ':' << (*i) << ":\n"; |
|---|
| 45 | * } |
|---|
| 46 | * |
|---|
| 47 | * would print |
|---|
| 48 | * :this: |
|---|
| 49 | * :is: |
|---|
| 50 | * :a: |
|---|
| 51 | * :test: |
|---|
| 52 | * -end example] |
|---|
| 53 | * |
|---|
| 54 | * pedwards@jaj.com May 1999 |
|---|
| 55 | */ |
|---|
| 56 | |
|---|
| 57 | |
|---|
| 58 | #include <string> |
|---|
| 59 | #include <cstring> // for strchr |
|---|
| 60 | |
|---|
| 61 | |
|---|
| 62 | /***************************************************************** |
|---|
| 63 | * This is the only part of the implementation that I don't like. |
|---|
| 64 | * It can probably be improved upon by the reader... |
|---|
| 65 | */ |
|---|
| 66 | |
|---|
| 67 | inline bool |
|---|
| 68 | isws (char c, char const * const wstr) |
|---|
| 69 | { |
|---|
| 70 | using namespace std; |
|---|
| 71 | return (strchr(wstr,c) != NULL); |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | namespace boost { |
|---|
| 76 | |
|---|
| 77 | /***************************************************************** |
|---|
| 78 | * Simplistic and quite Standard, but a bit slow. This should be |
|---|
| 79 | * templatized on basic_string instead, or on a more generic StringT |
|---|
| 80 | * that just happens to support ::size_type, .substr(), and so on. |
|---|
| 81 | * I had hoped that "whitespace" would be a trait, but it isn't, so |
|---|
| 82 | * the user must supply it. Enh, this lets them break up strings on |
|---|
| 83 | * different things easier than traits would anyhow. |
|---|
| 84 | */ |
|---|
| 85 | template <typename Container> |
|---|
| 86 | void |
|---|
| 87 | stringtok (Container &l, std::string const &s, char const * const ws = " \t\n") |
|---|
| 88 | { |
|---|
| 89 | typedef std::string::size_type size_type; |
|---|
| 90 | const size_type S = s.size(); |
|---|
| 91 | size_type i = 0; |
|---|
| 92 | |
|---|
| 93 | while (i < S) { |
|---|
| 94 | // eat leading whitespace |
|---|
| 95 | while ((i < S) && (isws(s[i],ws))) ++i; |
|---|
| 96 | if (i == S) return; // nothing left but WS |
|---|
| 97 | |
|---|
| 98 | // find end of word |
|---|
| 99 | size_type j = i+1; |
|---|
| 100 | while ((j < S) && (!isws(s[j],ws))) ++j; |
|---|
| 101 | |
|---|
| 102 | // add word |
|---|
| 103 | l.push_back(s.substr(i,j-i)); |
|---|
| 104 | |
|---|
| 105 | // set up for next loop |
|---|
| 106 | i = j+1; |
|---|
| 107 | } |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | } // namespace boost |
|---|