Changeset 7221 in orxonox.OLD for trunk/src/lib/util/substring.cc
- Timestamp:
- Mar 15, 2006, 3:10:45 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/lib/util/substring.cc
r5656 r7221 14 14 15 15 2005-06-10: some naming conventions 16 17 // 18 // splitLine 19 // STL string tokenizer 20 // 21 // Created by Clemens Wacha. 22 // Version 1.0 23 // Copyright (c) 2005 Clemens Wacha. All rights reserved. 24 // 25 16 26 */ 17 27 … … 24 34 #include "substring.h" 25 35 26 #include "debug.h"27 36 #include <string.h> 28 #include <assert.h> 29 30 SubString::SubString( const char* string, char splitter) 31 { 32 this->splittersCount = 0; 33 if (string == NULL) 34 { 35 this->strings = NULL; 36 this->offsets = NULL; 37 return; 38 } 39 40 for( int i = 0; i < strlen(string); i++) 41 if( string[i] == splitter) 42 this->splittersCount++; 43 44 this->splittersCount += 1; 45 46 this->strings = new char*[this->splittersCount]; 47 this->offsets = new unsigned int[this->splittersCount]; 48 assert (this->strings != NULL && this->offsets != NULL); 49 50 int i = 0; 51 int l = 0; 52 53 if( this->splittersCount > 1) 54 { 55 const char* offset = string; 56 const char* end = strchr( string, splitter); 57 while( end != NULL) 58 { 59 assert( i < this->splittersCount); 60 l = end - offset; 61 this->strings[i] = new char[l + 1]; 62 assert( strings[i] != NULL); 63 strncpy( strings[i], offset, l); 64 strings[i][l] = '\0'; 65 this->offsets[i] = offset - string; 66 i++; 67 end++; 68 offset = end; 69 end = strchr( offset, splitter); 70 } 71 72 l = strlen( offset); 73 strings[i] = new char[l + 1]; 74 strncpy( strings[i], offset, l); 75 strings[i][l] = '\0'; 76 this->offsets[i] = offset - string; 77 } 78 else 79 { 80 this->strings[0] = new char[strlen(string)+1]; 81 strcpy(this->strings[0], string); 82 this->offsets[0] = 0; 83 } 84 } 85 37 #include <cassert> 38 39 SubString::SubString(const std::string& string, char splitter) 40 { 41 char split[2]; 42 split[0] = splitter; 43 split[1] = '\0'; 44 SubString::splitLine(this->strings, this->offsets, 45 string, split); 46 } 86 47 87 48 /** … … 91 52 * 92 53 */ 93 SubString::SubString(const char* string, bool whiteSpaces) 94 { 95 this->splittersCount = 0; 96 if (string == NULL || whiteSpaces == false) 54 SubString::SubString(const std::string& string, bool whiteSpaces) 55 { 56 SubString::splitLine(this->strings, this->offsets, 57 string); 58 } 59 SubString::SubString(const std::string& string, const std::string& splitters, char escapeChar,char safemode_char, char comment_char) 60 { 61 SubString::splitLine(this->strings, this->offsets, 62 string, splitters, escapeChar, safemode_char); 63 } 64 65 /** 66 * An empty String 67 */ 68 const std::string SubString::emptyString = ""; 69 70 71 72 unsigned int SubString::split(const std::string& string, char splitter) 73 { 74 this->offsets.clear(); 75 this->strings.clear(); 76 char split[2]; 77 split[0] = splitter; 78 split[1] = '\0'; 79 SubString::splitLine(this->strings, this->offsets, string, split); 80 return strings.size(); 81 } 82 83 84 /** 85 * Splits a String into a Substring removing all whiteSpaces 86 * @param string the String to Split 87 * @param whiteSpaces MUST BE __TRUE__ 88 * 89 */ 90 unsigned int SubString::split(const std::string& string, bool whiteSpaces) 91 { 92 this->offsets.clear(); 93 this->strings.clear(); 94 SubString::splitLine(this->strings, this->offsets, string); 95 return strings.size(); 96 } 97 98 unsigned int SubString::split(const std::string& string, const std::string& splitters, char escapeChar,char safemode_char, char comment_char) 99 { 100 this->offsets.clear(); 101 this->strings.clear(); 102 SubString::splitLine(this->strings, this->offsets, 103 string, splitters, escapeChar, safemode_char); 104 return strings.size(); 105 } 106 107 108 /** 109 * @brief splits line into tokens and stores them in ret. 110 * @param ret the Array, where the Splitted strings will be stored in 111 * @param offsets an Array of Offsets, here the distance from the inputstring 112 * to the beginning of the current token is stored 113 * @param line the inputLine to split 114 * @param delimiters a String of Delimiters (here the input will be splitted) 115 * @param escape_char: Escape carater (escapes splitters) 116 * @param safemode_char: the beginning of the safemode is marked with this 117 * @param comment_char: the beginning of a comment is marked with this: (until the end of a Line) 118 * @param start_state: the Initial state on how to parse the String. 119 * @returns SPLIT_LINE_STATE the parser was in when returning 120 * 121 * Supports delimiters, escape characters, 122 * ignores special characters between safemode_char and between comment_char and linend '\n'. 123 * 124 */ 125 SPLIT_LINE_STATE SubString::splitLine(std::vector<std::string>& ret, std::vector<unsigned int>& offsets, 126 const std::string& line, const std::string& delimiters, 127 char escape_char, char safemode_char, char comment_char, 128 SPLIT_LINE_STATE start_state) 129 { 130 SPLIT_LINE_STATE state = start_state; 131 unsigned int i = 0; 132 std::string token; 133 134 if(start_state != SL_NORMAL && ret.size() > 0) 97 135 { 98 this->strings = NULL; 99 this->offsets = NULL; 100 return; 136 token = ret[ret.size()-1]; 137 ret.pop_back(); 101 138 } 102 139 103 // chop the input to the beginning of something usefull 104 if (strlen(string) > 0) 105 string = string + strspn(string, " \t\n"); 106 107 // count the Splitters 108 bool lastWasWhiteSpace = false; 109 for(unsigned int i = 0; i < strlen(string); i++) 110 if( string[i] == ' ' || string[i] == '\t' || string[i] == '\n' ) 111 lastWasWhiteSpace = true; 112 else 140 while(i < line.size()) 141 { 142 switch(state) 113 143 { 114 if (lastWasWhiteSpace) 115 this->splittersCount ++; 116 lastWasWhiteSpace = false; 144 case SL_NORMAL: 145 if(line[i] == escape_char) 146 { 147 state = SL_ESCAPE; 148 } 149 else if(line[i] == safemode_char) 150 { 151 state = SL_SAFEMODE; 152 } 153 else if(line[i] == comment_char) 154 { 155 /// FINISH 156 if(token.size() > 0) 157 { 158 ret.push_back(token); 159 offsets.push_back(i); 160 token.clear(); 161 } 162 token += line[i]; // EAT 163 state = SL_COMMENT; 164 } 165 else if(delimiters.find(line[i]) != std::string::npos) 166 { 167 // line[i] is a delimiter 168 /// FINISH 169 if(token.size() > 0) 170 { 171 ret.push_back(token); 172 offsets.push_back(i); 173 token.clear(); 174 } 175 } 176 else 177 { 178 token += line[i]; // EAT 179 } 180 break; 181 case SL_ESCAPE: 182 if(line[i] == 'n') token += '\n'; 183 else if(line[i] == 't') token += '\t'; 184 else if(line[i] == 'v') token += '\v'; 185 else if(line[i] == 'b') token += '\b'; 186 else if(line[i] == 'r') token += '\r'; 187 else if(line[i] == 'f') token += '\f'; 188 else if(line[i] == 'a') token += '\a'; 189 else if(line[i] == '?') token += '\?'; 190 else token += line[i]; // EAT 191 state = SL_NORMAL; 192 break; 193 case SL_SAFEMODE: 194 if(line[i] == safemode_char) 195 { 196 state = SL_NORMAL; 197 } 198 else if(line[i] == escape_char) 199 { 200 state = SL_SAFEESCAPE; 201 } 202 else 203 { 204 token += line[i]; // EAT 205 } 206 break; 207 case SL_SAFEESCAPE: 208 if(line[i] == 'n') token += '\n'; 209 else if(line[i] == 't') token += '\t'; 210 else if(line[i] == 'v') token += '\v'; 211 else if(line[i] == 'b') token += '\b'; 212 else if(line[i] == 'r') token += '\r'; 213 else if(line[i] == 'f') token += '\f'; 214 else if(line[i] == 'a') token += '\a'; 215 else if(line[i] == '?') token += '\?'; 216 else token += line[i]; // EAT 217 state = SL_SAFEMODE; 218 break; 219 case SL_COMMENT: 220 if(line[i] == '\n') 221 { 222 /// FINISH 223 if(token.size() > 0) 224 { 225 ret.push_back(token); 226 offsets.push_back(i); 227 token.clear(); 228 } 229 state = SL_NORMAL; 230 } 231 else 232 { 233 token += line[i]; // EAT 234 } 235 break; 236 default: 237 // nothing 238 break; 117 239 } 118 this->splittersCount += 1; 119 120 // allocate memory 121 this->strings = new char*[this->splittersCount]; 122 this->offsets = new unsigned int[this->splittersCount]; 123 assert (this->strings != NULL && this->offsets != NULL); 124 125 126 // split the String into substrings 127 int l = 0; 128 unsigned int i = 0; 129 if( this->splittersCount > 1) 240 i++; 241 } 242 243 /// FINISH 244 if(token.size() > 0) 130 245 { 131 const char* offset = string; 132 const char* end = offset + strcspn(offset, " \t\n"); 133 for (i = 0; i < this->splittersCount; i++) 134 { 135 assert( i < this->splittersCount); 136 l = end - offset; 137 this->strings[i] = new char[l + 1]; 138 assert( strings[i] != NULL); 139 strncpy( strings[i], offset, l); 140 strings[i][l] = '\0'; 141 this->offsets[i] = offset - string; 142 end += strspn(end, " \t\n"); 143 offset = end; 144 end = offset + strcspn(offset, " \t\n"); 145 } 246 ret.push_back(token); 247 offsets.push_back(i); 248 token.clear(); 146 249 } 147 else 148 { 149 unsigned int length = strcspn(string, " \t\n"); 150 this->strings[0] = new char[length+1]; 151 strncpy(this->strings[0], string, length); 152 this->strings[0][length] = '\0'; 153 offsets[0] = 0; 154 } 155 } 156 157 SubString::SubString(const char* string, const char* splitters, char escapeChar) 158 { 159 this->splittersCount = 0; 160 if (string == NULL || splitters == NULL) 161 { 162 this->strings = NULL; 163 this->offsets = NULL; 164 return; 165 } 166 167 // chop the input to the beginning of something usefull 168 if (strlen(string) > 0) 169 string = string + strspn(string, splitters); 170 171 // count the Splitters 172 bool lastWasSplitter = false; 173 for(unsigned int i = 0; i < strlen(string); i++) 174 { 175 176 if( strchr(splitters, string[i] )) 177 lastWasSplitter = true; 178 else 179 { 180 if (lastWasSplitter) 181 { 182 this->splittersCount ++; 183 lastWasSplitter = false; 184 } 185 } 186 } 187 this->splittersCount += 1; 188 189 // allocate memory 190 this->strings = new char*[this->splittersCount]; 191 this->offsets = new unsigned int[this->splittersCount]; 192 assert (this->strings != NULL && this->offsets != NULL); 193 194 195 // split the String into substrings 196 int l = 0; 197 unsigned int i = 0; 198 if( this->splittersCount > 1) 199 { 200 const char* offset = string; 201 const char* end = offset + strcspn(offset, splitters); 202 for (i = 0; i < this->splittersCount; i++) 203 { 204 assert( i < this->splittersCount); 205 l = end - offset; 206 this->strings[i] = new char[l + 1]; 207 assert( strings[i] != NULL); 208 strncpy( strings[i], offset, l); 209 strings[i][l] = '\0'; 210 this->offsets[i] = offset - string; 211 end += strspn(end, splitters); 212 offset = end; 213 end = offset + strcspn(offset, splitters); 214 } 215 } 216 else 217 { 218 unsigned int length = strcspn(string, splitters); 219 this->strings[0] = new char[length+1]; 220 strncpy(this->strings[0], string, length); 221 this->strings[0][length] = '\0'; 222 offsets[0] = 0; 223 } 224 } 225 250 return(state); 251 } 226 252 227 253 /** … … 229 255 */ 230 256 SubString::~SubString() 231 { 232 if (this->strings) 233 { 234 for(unsigned int i = 0; i < this->splittersCount; i++) 235 delete[] this->strings[i]; 236 delete[] this->strings; 237 } 238 delete[] this->offsets; 239 } 240 241 /** 242 * get a particular substring 243 * @param i the ID of the substring to return 244 * @returns the designated substring or NULL if an invalid ID was given 245 */ 246 const char* SubString::getString(unsigned int i) 247 { 248 if( i < this->splittersCount && i >= 0) 249 return this->strings[i]; 250 else 251 return NULL; 252 } 257 { } 253 258 254 259 /** … … 259 264 unsigned int SubString::getOffset(unsigned int i) 260 265 { 261 if( i < this-> splittersCount&& i >= 0)266 if( i < this->offsets.size() && i >= 0) 262 267 return this->offsets[i]; 263 268 else … … 270 275 void SubString::debug() const 271 276 { 272 PRINT(0)("Substring-information::count=%d ::", this->splittersCount); 273 if (this->strings != NULL) 274 for (unsigned int i = 0; i < this->splittersCount; i++) 275 PRINT(0)("s%d='%s'::", i, this->strings[i]); 276 PRINT(0)("\n"); 277 } 277 printf("Substring-information::count=%d ::", this->strings.size()); 278 for (unsigned int i = 0; i < this->strings.size(); i++) 279 printf("s%d='%s'::", i, this->strings[i].c_str()); 280 printf("\n"); 281 }
Note: See TracChangeset
for help on using the changeset viewer.