Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: orxonox.OLD/trunk/src/lib/util/substring.cc @ 7474

Last change on this file since 7474 was 7474, checked in by bensch, 18 years ago

orxonox/trunk: SubString::split algorithm revisited: now it Splits Strings where Delimitters are, but it ereases the Neighbours of those delimiters, if you want it
also there is now a way to have empty Entries.

File size: 12.7 KB
Line 
1/*
2   orxonox - the future of 3D-vertical-scrollers
3
4   Copyright (C) 2004 orx
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   ### File Specific:
12   main-programmer: Christian Meyer
13   co-programmer: Benjamin Grauer
14
15   2005-06-10: some naming conventions
16
17//
18//  splitLine
19//  STL string tokenizer
20//
21//  Created by Clemens Wacha.
22//  Version 1.0
23//  Copyright (c) 2005 Clemens Wacha. All rights reserved.
24//
25
26*/
27
28#include "substring.h"
29
30#include <string.h>
31#include <cassert>
32
33
34/**
35 * @brief default constructor
36 */
37SubString::SubString()
38{}
39
40
41/**
42 * @brief create a SubString from
43 * @param string the String to Spilit
44 * @param delimiter the Character at which to split string (delimiter)
45 */
46SubString::SubString(const std::string& string, char delimiter)
47{
48  this->split(string, delimiter);
49}
50
51
52/**
53 * @brief Splits a String into multiple splitters.
54 * @param string the String to split
55 * @param delimiters multiple set of characters at what to split. (delimiters)
56 * @param delimiterNeighbours neighbours of the delimiters, that will be erased only when near a delimiter.
57 * @param escapeChar The Escape Character that overrides splitters commends and so on...
58 * @param safemode_char within these characters splitting won't happen
59 * @param comment_char the Comment character.
60 */
61SubString::SubString(const std::string& string,
62                     const std::string& delimiters, const std::string& delimiterNeighbours, bool emptyEntries,
63                     char escapeChar, char safemode_char, char comment_char)
64{
65  SubString::splitLine(this->strings, string, delimiters, delimiterNeighbours, emptyEntries, escapeChar, safemode_char, comment_char);
66}
67
68/**
69 * @brief creates a SubSet of a SubString.
70 * @param subString the SubString to take a set from.
71 * @param subSetBegin the beginning to the end
72 */
73SubString::SubString(const SubString& subString, unsigned int subSetBegin)
74{
75  for (unsigned int i = subSetBegin; i < subString.size(); i++)
76    this->strings.push_back(subString[i]);
77}
78
79
80/**
81 * @brief creates a SubSet of a SubString.
82 * @param subString the SubString to take a Set from
83 * @param subSetBegin the beginning to the end
84 * @param subSetEnd the end of the SubSet (max subString.size() will be checked internaly)
85 */
86SubString::SubString(const SubString& subString, unsigned int subSetBegin, unsigned int subSetEnd)
87{
88  for (unsigned int i = subSetBegin; i < subString.size() || i < subSetEnd; i++)
89    this->strings.push_back(subString[i]);
90}
91
92
93/**
94 * @brief removes the object from memory
95 */
96SubString::~SubString()
97{ }
98
99/** @brief An empty String */
100const std::string SubString::emptyString = "";
101/** @brief Helper that gets you a String consisting of all White Spaces */
102const std::string SubString::WhiteSpaces = " \n\t";
103/** @brief Helper that gets you a String consisting of all WhiteSpaces and the Comma */
104const std::string SubString::WhiteSpacesWithComma = " \n\t,";
105
106/**
107 * @brief stores the Value of subString in this SubString
108 * @param subString will be copied into this String.
109 * @returns this SubString.
110 */
111SubString& SubString::operator=(const SubString& subString)
112{
113  this->strings = subString.strings;
114  return *this;
115}
116
117
118/**
119 * @brief comparator.
120 * @param subString the SubString to compare against this one.
121 * @returns true if the Stored Strings match
122 */
123bool SubString::operator==(const SubString& subString) const
124{
125  return (this->strings == subString.strings);
126}
127
128/**
129 * @brief comparator.
130 * @param subString the SubString to compare against this one.
131 * @returns true if the Stored Strings match
132 */
133bool SubString::compare(const SubString& subString) const
134{
135  return (*this == subString);
136}
137
138/**
139 * @brief comparator.
140 * @param subString the SubString to compare against this one.
141 * @returns true if the Stored Strings match
142 */
143bool SubString::compare(const SubString& subString, unsigned int length) const
144{
145  if (length > this->size() || length > subString.size())
146    return false;
147
148  for (unsigned int i = 0; i < length; i++)
149    if (this->strings[i] != subString.strings[i])
150      return false;
151  return true;
152}
153
154
155/**
156 * @brief append operator
157 * @param subString the String to append.
158 * @returns a SubString where this and subString are appended.
159 */
160SubString SubString::operator+(const SubString& subString) const
161{
162  return SubString(*this) += subString;
163}
164
165
166/**
167 * @brief append operator.
168 * @param subString append subString to this SubString.
169 * @returns this substring appended with subString
170 */
171SubString& SubString::operator+=(const SubString& subString)
172{
173  for (unsigned int i = 0; i < subString.size(); i++)
174    this->strings.push_back(subString[i]);
175  return *this;
176}
177
178
179/**
180 * @brief Split the String at
181 * @param string where to split
182 * @param splitter delimiter.
183 */
184unsigned int SubString::split(const std::string& string, char splitter)
185{
186  this->strings.clear();
187  char split[2];
188  split[0] = splitter;
189  split[1] = '\0';
190  SubString::splitLine(this->strings, string, split);
191  return strings.size();
192}
193
194
195/**
196 * @brief Splits a String into multiple splitters.
197 * @param string the String to split
198 * @param delimiters multiple set of characters at what to split. (delimiters)
199 * @param delimiterNeighbours: Neighbours to the Delimiters that will be erased too.
200 * @param emptyEntries: If empty entries are added to the List of SubStrings
201 * @param escapeChar The Escape Character that overrides splitters commends and so on...
202 * @param safemode_char within these characters splitting won't happen
203 * @param comment_char the Comment character.
204 */
205unsigned int SubString::split(const std::string& string,
206                              const std::string& delimiters, const std::string& delimiterNeighbours, bool emptyEntries,
207                              char escapeChar,char safemode_char, char comment_char)
208{
209  this->strings.clear();
210  SubString::splitLine(this->strings, string, delimiters, delimiterNeighbours, emptyEntries, escapeChar, safemode_char, comment_char);
211  return this->strings.size();
212}
213
214
215/**
216 * @brief joins together all Strings of this Substring.
217 * @param delimiter the String between the subStrings.
218 * @returns the joined String.
219 */
220std::string SubString::join(const std::string& delimiter) const
221{
222  if (!this->strings.empty())
223  {
224    std::string retVal = this->strings[0];
225    for (unsigned int i = 1; i < this->strings.size(); i++)
226      retVal += delimiter + this->strings[i];
227    return retVal;
228  }
229  else
230    return SubString::emptyString;
231}
232
233
234/**
235 * @brief creates a SubSet of a SubString.
236 * @param subSetBegin the beginning to the end
237 * @returns the SubSet
238 *
239 * This function is added for your convenience, and does the same as
240 * SubString::SubString(const SubString& subString, unsigned int subSetBegin)
241 */
242SubString SubString::getSubSet(unsigned int subSetBegin) const
243{
244  return SubString(*this, subSetBegin);
245}
246
247
248/**
249 * @brief creates a SubSet of a SubString.
250 * @param subSetBegin the beginning to
251 * @param subSetEnd the end of the SubSet to select (if bigger than subString.size() it will be downset.)
252 * @returns the SubSet
253 *
254 * This function is added for your convenience, and does the same as
255 * SubString::SubString(const SubString& subString, unsigned int subSetBegin)
256 */
257SubString SubString::getSubSet(unsigned int subSetBegin, unsigned int subSetEnd) const
258{
259  return SubString(*this, subSetBegin, subSetEnd);
260}
261
262
263/**
264 * @brief splits line into tokens and stores them in ret.
265 * @param ret the Array, where the Splitted strings will be stored in
266 * @param offsets an Array of Offsets, here the distance from the inputstring
267 * to the beginning of the current token is stored
268 * @param line the inputLine to split
269 * @param delimiters a String of Delimiters (here the input will be splitted)
270 * @param delimiterNeighbour Naighbours to the Delimitter, that will be removed if they are to the left or the right of a Delimiter.
271 * @param emptyEntries: if empty Strings are added to the List of Strings.
272 * @param escape_char: Escape carater (escapes splitters)
273 * @param safemode_char: the beginning of the safemode is marked with this
274 * @param comment_char: the beginning of a comment is marked with this: (until the end of a Line)
275 * @param start_state: the Initial state on how to parse the String.
276 * @returns SPLIT_LINE_STATE the parser was in when returning
277 *
278 * This is the Actual Splitting Algorithm from Clemens Wacha
279 * Supports delimiters, escape characters,
280 * ignores special  characters between safemode_char and between comment_char and linend '\n'.
281 *
282 *
283 */
284SubString::SPLIT_LINE_STATE
285SubString::splitLine(std::vector<std::string>& ret,
286                     const std::string& line,
287                     const std::string& delimiters,
288                     const std::string& delimiterNeighbours,
289                     bool emptyEntries,
290                     char escape_char,
291                     char safemode_char,
292                     char comment_char,
293                     SPLIT_LINE_STATE start_state)
294{
295  SPLIT_LINE_STATE state = start_state;
296  unsigned int i = 0;
297  unsigned int fallBackNeighbours = 0;
298
299  std::string token;
300
301  if(start_state != SL_NORMAL && ret.size() > 0)
302  {
303    token = ret[ret.size()-1];
304    ret.pop_back();
305  }
306
307  while(i < line.size())
308  {
309    switch(state)
310    {
311      case SL_NORMAL:
312        if(line[i] == escape_char)
313        {
314          state = SL_ESCAPE;
315        }
316        else if(line[i] == safemode_char)
317        {
318          state = SL_SAFEMODE;
319        }
320        else if(line[i] == comment_char)
321        {
322          if (fallBackNeighbours > 0)
323            token = token.substr(0, token.size() - fallBackNeighbours);
324          /// FINISH
325          if(emptyEntries || token.size() > 0)
326          {
327            ret.push_back(token);
328            token.clear();
329          }
330          token += line[i];       // EAT
331          state = SL_COMMENT;
332        }
333        else if(delimiters.find(line[i]) != std::string::npos)
334        {
335          // line[i] is a delimiter
336          if (fallBackNeighbours > 0)
337            token = token.substr(0, token.size() - fallBackNeighbours);
338          /// FINISH
339          if(emptyEntries || token.size() > 0)
340          {
341            ret.push_back(token);
342            token.clear();
343          }
344          state = SL_NORMAL;
345        }
346        else
347        {
348          if (delimiterNeighbours.find(line[i]) != std::string::npos)
349          {
350            if (token.size() > 0)
351              ++fallBackNeighbours;
352            else
353            {
354              i++;
355              continue;
356            }
357          }
358          else
359            fallBackNeighbours = 0;
360          token += line[i];       // EAT
361        }
362        break;
363      case SL_ESCAPE:
364        if(line[i] == 'n') token += '\n';
365        else if(line[i] == 't') token += '\t';
366        else if(line[i] == 'v') token += '\v';
367        else if(line[i] == 'b') token += '\b';
368        else if(line[i] == 'r') token += '\r';
369        else if(line[i] == 'f') token += '\f';
370        else if(line[i] == 'a') token += '\a';
371        else if(line[i] == '?') token += '\?';
372        else token += line[i];  // EAT
373        state = SL_NORMAL;
374        break;
375      case SL_SAFEMODE:
376        if(line[i] == safemode_char)
377        {
378          state = SL_NORMAL;
379        }
380        else if(line[i] == escape_char)
381        {
382          state = SL_SAFEESCAPE;
383        }
384        else
385        {
386          token += line[i];       // EAT
387        }
388        break;
389
390      case SL_SAFEESCAPE:
391        if(line[i] == 'n') token += '\n';
392        else if(line[i] == 't') token += '\t';
393        else if(line[i] == 'v') token += '\v';
394        else if(line[i] == 'b') token += '\b';
395        else if(line[i] == 'r') token += '\r';
396        else if(line[i] == 'f') token += '\f';
397        else if(line[i] == 'a') token += '\a';
398        else if(line[i] == '?') token += '\?';
399        else token += line[i];  // EAT
400        state = SL_SAFEMODE;
401        break;
402
403      case SL_COMMENT:
404        if(line[i] == '\n')
405        {
406          /// FINISH
407          if(token.size() > 0)
408          {
409            ret.push_back(token);
410            token.clear();
411          }
412          state = SL_NORMAL;
413        }
414        else
415        {
416          token += line[i];       // EAT
417        }
418        break;
419
420      default:
421        // nothing
422        break;
423    }
424    i++;
425  }
426
427  /// FINISH
428  if (fallBackNeighbours > 0)
429    token = token.substr(0, token.size() - fallBackNeighbours);
430  if(emptyEntries || token.size() > 0)
431  {
432    ret.push_back(token);
433    token.clear();
434  }
435  return(state);
436}
437
438
439/**
440 * @brief Some nice debug information about this SubString
441 */
442void SubString::debug() const
443{
444  printf("Substring-information::count=%d ::", this->strings.size());
445  for (unsigned int i = 0; i < this->strings.size(); i++)
446    printf("s%d='%s'::", i, this->strings[i].c_str());
447  printf("\n");
448}
Note: See TracBrowser for help on using the repository browser.