Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: orxonox.OLD/trunk/src/lib/util/substring.cc @ 7319

Last change on this file since 7319 was 7319, checked in by bensch, 18 years ago

orxonox/trunk: MUCH more comprehensive SubString

File size: 11.6 KB
Line 
1/*
2   orxonox - the future of 3D-vertical-scrollers
3
4   Copyright (C) 2004 orx
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   ### File Specific:
12   main-programmer: Christian Meyer
13   co-programmer: Benjamin Grauer
14
15   2005-06-10: some naming conventions
16
17//
18//  splitLine
19//  STL string tokenizer
20//
21//  Created by Clemens Wacha.
22//  Version 1.0
23//  Copyright (c) 2005 Clemens Wacha. All rights reserved.
24//
25
26*/
27
28#include "substring.h"
29
30#include <string.h>
31#include <cassert>
32
33/**
34 * @brief create a SubString from
35 * @param string the String to Spilit
36 * @param splitter the Character at which to split string (delimiter)
37 */
38SubString::SubString(const std::string& string, char splitter)
39{
40  char split[2];
41  split[0] = splitter;
42  split[1] = '\0';
43  SubString::splitLine(this->strings, this->offsets,
44                       string, split);
45}
46
47/**
48 * @brief Splits a String into a SubString removing all whiteSpaces
49 * @param string the String to Split
50 * @param whiteSpaces MUST BE __TRUE__ or __FALSE__ (will be ignored)
51 */
52SubString::SubString(const std::string& string, bool whiteSpaces)
53{
54  SubString::splitLine(this->strings, this->offsets,
55                       string);
56}
57
58/**
59 * @brief Splits a String into multiple splitters.
60 * @param string the String to split
61 * @param splitters multiple set of characters at what to split. (delimiters)
62 * @param escapeChar The Escape Character that overrides splitters commends and so on...
63 * @param safemode_char within these characters splitting won't happen
64 * @param comment_char the Comment character.
65 */
66SubString::SubString(const std::string& string, const std::string& splitters, char escapeChar,char safemode_char, char comment_char)
67{
68  SubString::splitLine(this->strings, this->offsets,
69                       string, splitters, escapeChar, safemode_char, comment_char);
70}
71
72/**
73 * @brief creates a SubSet of a SubString.
74 * @param subString the SubString to take a set from.
75 * @param subSetBegin the beginning to the end
76 */
77SubString::SubString(const SubString& subString, unsigned int subSetBegin)
78{
79  for (unsigned int i = subSetBegin; i < subString.size(); i++)
80    this->strings.push_back(subString[i]);
81}
82
83
84/**
85 * @brief creates a SubSet of a SubString.
86 * @param subString the SubString to take a Set from
87 * @param subSetBegin the beginning to the end
88 * @param subSetEnd the end of the SubSet (max subString.size() will be checked internaly)
89 */
90SubString::SubString(const SubString& subString, unsigned int subSetBegin, unsigned int subSetEnd)
91{
92  for (unsigned int i = subSetBegin; i < subString.size() || i < subSetEnd; i++)
93    this->strings.push_back(subString[i]);
94}
95
96
97/**
98 * @brief removes the object from memory
99 */
100SubString::~SubString()
101{ }
102
103/**
104 * @brief An empty String
105 */
106const std::string SubString::emptyString = "";
107
108/**
109 * @brief stores the Value of subString in this SubString
110 * @param subString will be copied into this String.
111 * @returns this SubString.
112 */
113SubString& SubString::operator=(const SubString& subString)
114{
115  this->offsets = subString.offsets;
116  this->strings = subString.strings;
117  return *this;
118}
119
120
121/**
122 * @brief comparator.
123 * @param subString the SubString to compare against this one.
124 * @returns true if the Stored Strings match
125 */
126bool SubString::operator==(const SubString& subString)
127{
128  return (this->strings == subString.strings);
129}
130
131
132/**
133 * @brief append operator
134 * @param subString the String to append.
135 * @returns a SubString where this and subString are appended.
136 */
137SubString SubString::operator+(const SubString& subString) const
138{
139  return SubString(subString) += subString;
140}
141
142
143/**
144 * @brief append operator.
145 * @param subString append subString to this SubString.
146 * @returns this substring appended with subString
147 */
148SubString& SubString::operator+=(const SubString& subString)
149{
150  for (unsigned int i = 0; i < subString.size(); i++)
151    this->strings.push_back(subString[i]);
152}
153
154
155/**
156 * @brief Split the String at
157 * @param string where to split
158 * @param splitter delimiter.
159 */
160unsigned int SubString::split(const std::string& string, char splitter)
161{
162  this->offsets.clear();
163  this->strings.clear();
164  char split[2];
165  split[0] = splitter;
166  split[1] = '\0';
167  SubString::splitLine(this->strings, this->offsets, string, split);
168  return strings.size();
169}
170
171
172/**
173 * @brief Splits a String into a Substring removing all whiteSpaces
174 * @param string the String to Split
175 * @param whiteSpaces MUST BE __TRUE__
176 *
177 */
178unsigned int SubString::split(const std::string& string, bool whiteSpaces)
179{
180  this->offsets.clear();
181  this->strings.clear();
182  SubString::splitLine(this->strings, this->offsets, string);
183  return strings.size();
184}
185
186
187/**
188 * @brief Splits a String into multiple splitters.
189 * @param string the String to split
190 * @param splitters multiple set of characters at what to split. (delimiters)
191 * @param escapeChar The Escape Character that overrides splitters commends and so on...
192 * @param safemode_char within these characters splitting won't happen
193 * @param comment_char the Comment character.
194 */
195unsigned int SubString::split(const std::string& string, const std::string& splitters, char escapeChar,char safemode_char, char comment_char)
196{
197  this->offsets.clear();
198  this->strings.clear();
199  SubString::splitLine(this->strings, this->offsets,
200                       string, splitters, escapeChar, safemode_char);
201  return strings.size();
202}
203
204
205/**
206 * @brief joins together all Strings of this Substring.
207 * @param delimiter the String between the subStrings.
208 * @returns the joined String.
209 */
210std::string SubString::join(const std::string& delimiter) const
211{
212  if (!this->strings.empty())
213  {
214    std::string retVal = this->strings[0];
215    for (unsigned int i = 0; i < this->strings.size(); i++)
216      retVal += delimiter + this->strings[i];
217    return retVal;
218  }
219  else
220    return SubString::emptyString;
221}
222
223
224/**
225 * @brief creates a SubSet of a SubString.
226 * @param subSetBegin the beginning to the end
227 * @returns the SubSet
228 *
229 * This function is added for your convenience, and does the same as
230 * SubString::SubString(const SubString& subString, unsigned int subSetBegin)
231 */
232SubString SubString::getSubSet(unsigned int subSetBegin) const
233{
234  return SubString(*this, subSetBegin);
235}
236
237
238/**
239 * @brief creates a SubSet of a SubString.
240 * @param subSetBegin the beginning to
241 * @param subSetEnd the end of the SubSet to select (if bigger than subString.size() it will be downset.)
242 * @returns the SubSet
243 *
244 * This function is added for your convenience, and does the same as
245 * SubString::SubString(const SubString& subString, unsigned int subSetBegin)
246 */
247SubString SubString::getSubSet(unsigned int subSetBegin, unsigned int subSetEnd) const
248{
249  return SubString(*this, subSetBegin, subSetEnd);
250}
251
252
253/**
254 * @brief get a particular substring's offset
255 * @param i the ID of the substring to get the offset from
256 * @returns the offset or NULL if an invalid ID was given
257 */
258unsigned int SubString::getOffset(unsigned int i) const
259{
260  if( i < this->offsets.size() && i >= 0)
261    return this->offsets[i];
262  else
263    return 0;
264}
265
266
267/**
268 * @brief splits line into tokens and stores them in ret.
269 * @param ret the Array, where the Splitted strings will be stored in
270 * @param offsets an Array of Offsets, here the distance from the inputstring
271 * to the beginning of the current token is stored
272 * @param line the inputLine to split
273 * @param delimiters a String of Delimiters (here the input will be splitted)
274 * @param escape_char: Escape carater (escapes splitters)
275 * @param safemode_char: the beginning of the safemode is marked with this
276 * @param comment_char: the beginning of a comment is marked with this: (until the end of a Line)
277 * @param start_state: the Initial state on how to parse the String.
278 * @returns SPLIT_LINE_STATE the parser was in when returning
279 *
280 * This is the Actual Splitting Algorithm from Clemens Wacha
281 * Supports delimiters, escape characters,
282 * ignores special  characters between safemode_char and between comment_char and linend '\n'.
283 *
284 */
285SPLIT_LINE_STATE SubString::splitLine(std::vector<std::string>& ret, std::vector<unsigned int>& offsets,
286                                      const std::string& line, const std::string& delimiters,
287                                      char escape_char, char safemode_char, char comment_char,
288                                      SPLIT_LINE_STATE start_state)
289{
290  SPLIT_LINE_STATE state = start_state;
291  unsigned int i = 0;
292  std::string token;
293
294  if(start_state != SL_NORMAL && ret.size() > 0)
295  {
296    token = ret[ret.size()-1];
297    ret.pop_back();
298  }
299
300  while(i < line.size())
301  {
302    switch(state)
303    {
304      case SL_NORMAL:
305        if(line[i] == escape_char)
306        {
307          state = SL_ESCAPE;
308        }
309        else if(line[i] == safemode_char)
310        {
311          state = SL_SAFEMODE;
312        }
313        else if(line[i] == comment_char)
314        {
315          /// FINISH
316          if(token.size() > 0)
317          {
318            ret.push_back(token);
319            offsets.push_back(i);
320            token.clear();
321          }
322          token += line[i];       // EAT
323          state = SL_COMMENT;
324        }
325        else if(delimiters.find(line[i]) != std::string::npos)
326        {
327          // line[i] is a delimiter
328          /// FINISH
329          if(token.size() > 0)
330          {
331            ret.push_back(token);
332            offsets.push_back(i);
333            token.clear();
334          }
335        }
336        else
337        {
338          token += line[i];       // EAT
339        }
340        break;
341      case SL_ESCAPE:
342        if(line[i] == 'n') token += '\n';
343        else if(line[i] == 't') token += '\t';
344        else if(line[i] == 'v') token += '\v';
345        else if(line[i] == 'b') token += '\b';
346        else if(line[i] == 'r') token += '\r';
347        else if(line[i] == 'f') token += '\f';
348        else if(line[i] == 'a') token += '\a';
349        else if(line[i] == '?') token += '\?';
350        else token += line[i];  // EAT
351        state = SL_NORMAL;
352        break;
353      case SL_SAFEMODE:
354        if(line[i] == safemode_char)
355        {
356          state = SL_NORMAL;
357        }
358        else if(line[i] == escape_char)
359        {
360          state = SL_SAFEESCAPE;
361        }
362        else
363        {
364          token += line[i];       // EAT
365        }
366        break;
367      case SL_SAFEESCAPE:
368        if(line[i] == 'n') token += '\n';
369        else if(line[i] == 't') token += '\t';
370        else if(line[i] == 'v') token += '\v';
371        else if(line[i] == 'b') token += '\b';
372        else if(line[i] == 'r') token += '\r';
373        else if(line[i] == 'f') token += '\f';
374        else if(line[i] == 'a') token += '\a';
375        else if(line[i] == '?') token += '\?';
376        else token += line[i];  // EAT
377        state = SL_SAFEMODE;
378        break;
379      case SL_COMMENT:
380        if(line[i] == '\n')
381        {
382          /// FINISH
383          if(token.size() > 0)
384          {
385            ret.push_back(token);
386            offsets.push_back(i);
387            token.clear();
388          }
389          state = SL_NORMAL;
390        }
391        else
392        {
393          token += line[i];       // EAT
394        }
395        break;
396      default:
397        // nothing
398        break;
399    }
400    i++;
401  }
402
403  /// FINISH
404  if(token.size() > 0)
405  {
406    ret.push_back(token);
407    offsets.push_back(i);
408    token.clear();
409  }
410  return(state);
411}
412
413
414/**
415 * @brief Some nice debug information about this SubString
416 */
417void SubString::debug() const
418{
419  printf("Substring-information::count=%d ::", this->strings.size());
420  for (unsigned int i = 0; i < this->strings.size(); i++)
421    printf("s%d='%s'::", i, this->strings[i].c_str());
422  printf("\n");
423}
Note: See TracBrowser for help on using the repository browser.