Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/branches/tutorial/src/libraries/util/StringUtils.cc @ 10607

Last change on this file since 10607 was 7401, checked in by landauf, 15 years ago
merged doc branch back to trunk
Property svn:eol-style set to `native`
File size: 16.8 KB

Line
1	/*
2	* ORXONOX - the hottest 3D action shooter ever to exist
3	* > www.orxonox.net <
4	*
5	*
6	* License notice:
7	*
8	* This program is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU General Public License
10	* as published by the Free Software Foundation; either version 2
11	* of the License, or (at your option) any later version.
12	*
13	* This program is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	* GNU General Public License for more details.
17	*
18	* You should have received a copy of the GNU General Public License
19	* along with this program; if not, write to the Free Software
20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21	*
22	* Author:
23	* Fabian 'x3n' Landau
24	* Co-authors:
25	* Benjamin Grauer
26	*
27	*/
28
29	/**
30	@file
31	@brief Implementation of several string manipulation functions.
32	*/
33
34	#include "StringUtils.h"
35
36	#include <cctype>
37	#include <boost/scoped_array.hpp>
38	#include "Convert.h"
39	#include "Math.h"
40
41	namespace orxonox
42	{
43	/// A blank string (""). Used to return a blank string by reference.
44	std::string BLANKSTRING;
45
46	/// Returns a string of a unique number. This function is guaranteed to never return the same string twice.
47	std::string getUniqueNumberString()
48	{
49	return multi_cast<std::string>(getUniqueNumber());
50	}
51
52	/// Removes all whitespaces from a string.
53	void strip(std::string* str)
54	{
55	size_t pos;
56	while ((pos = str->find(' ')) < str->length())
57	str->erase(pos, 1);
58	while ((pos = str->find('\t')) < str->length())
59	str->erase(pos, 1);
60	while ((pos = str->find('\n')) < str->length())
61	str->erase(pos, 1);
62	}
63
64	/// Returns a copy of a string without whitespaces.
65	std::string getStripped(const std::string& str)
66	{
67	std::string output(str);
68	strip(&output);
69	return output;
70	}
71
72	/// Returns a copy of a string without trailing whitespaces.
73	std::string removeTrailingWhitespaces(const std::string& str)
74	{
75	size_t pos1 = 0;
76	int pos2 = static_cast<int>(str.size() - 1);
77	for (; pos1 < str.size() && (str[pos1] == ' ' \|\| str[pos1] == '\t' \|\| str[pos1] == '\n'); pos1++);
78	for (; pos2 > 0 && (str[pos2] == ' ' \|\| str[pos2] == '\t' \|\| str[pos2] == '\n'); pos2--);
79	return str.substr(pos1, pos2 - pos1 + 1);
80	}
81
82	/**
83	@brief Returns the position of the next quotation mark in the string, starting with start.
84	@param str The string
85	@param start The first position to look at
86	@return The position of the next quotation mark (@c std::string::npos if there is none)
87	*/
88	size_t getNextQuote(const std::string& str, size_t start)
89	{
90	size_t quote = start - 1;
91
92	while ((quote = str.find('"', quote + 1)) != std::string::npos)
93	{
94	size_t backslash = quote;
95	size_t numbackslashes = 0;
96	for (; backslash > 0; backslash--, numbackslashes++)
97	if (str[backslash - 1] != '\\')
98	break;
99
100	if (numbackslashes % 2 == 0)
101	break;
102	}
103
104	return quote;
105	}
106
107	/**
108	@brief Returns true if pos is between two quotation marks.
109	@param str The string
110	@param pos The position to check
111	@return True if pos is between two quotation marks
112	*/
113	bool isBetweenQuotes(const std::string& str, size_t pos)
114	{
115	if (pos == std::string::npos)
116	return false;
117
118	size_t quotecount = 0;
119	size_t quote = static_cast<size_t>(-1);
120	while ((quote = getNextQuote(str, quote + 1)) < pos)
121	{
122	if (quote == pos)
123	return false;
124	quotecount++;
125	}
126
127	if (quote == std::string::npos)
128	return false;
129
130	return ((quotecount % 2) == 1);
131	}
132
133	/// Returns true if the string contains something like '..."between quotaton marks"...'.
134	bool hasStringBetweenQuotes(const std::string& str)
135	{
136	size_t pos1 = getNextQuote(str, 0);
137	size_t pos2 = getNextQuote(str, pos1 + 1);
138	return (pos1 != std::string::npos && pos2 != std::string::npos && pos2 > pos1 + 1);
139	}
140
141	/// If the string contains something like '..."between quotaton marks"...' then 'between quotaton marks' gets returned, otherwise "".
142	std::string getStringBetweenQuotes(const std::string& str)
143	{
144	size_t pos1 = getNextQuote(str, 0);
145	size_t pos2 = getNextQuote(str, pos1 + 1);
146	if (pos1 != std::string::npos && pos2 != std::string::npos)
147	return str.substr(pos1, pos2 - pos1 + 1);
148	else
149	return "";
150	}
151
152	/**
153	@brief Removes enclosing quotation marks if available (including whitespaces at the outside of the quotation marks).
154	@return The striped string without quotation marks
155	*/
156	std::string stripEnclosingQuotes(const std::string& str)
157	{
158	size_t start = std::string::npos;
159	size_t end = 0;
160
161	for (size_t pos = 0; (pos < str.size()) && (pos < std::string::npos); pos++)
162	{
163	if (str[pos] == '"')
164	{
165	start = pos;
166	break;
167	}
168
169	if ((str[pos] != ' ') && (str[pos] != '\t') && (str[pos] != '\n'))
170	return str;
171	}
172
173	for (size_t pos = str.size() - 1; pos < std::string::npos; pos--)
174	{
175	if (str[pos] == '"')
176	{
177	end = pos;
178	break;
179	}
180
181	if ((str[pos] != ' ') && (str[pos] != '\t') && (str[pos] != '\n'))
182	return str;
183	}
184
185	if ((start != std::string::npos) && (end != 0))
186	return str.substr(start + 1, end - start - 1);
187	else
188	return str;
189	}
190
191	/**
192	@brief Removes enclosing braces '{' and '}' (the braces must be exactly on the beginning and the end of the string).
193	@return The striped string without braces
194	*/
195	std::string stripEnclosingBraces(const std::string& str)
196	{
197	std::string output = str;
198
199	while (output.size() >= 2 && output[0] == '{' && output[output.size() - 1] == '}')
200	output = output.substr(1, output.size() - 2);
201
202	return output;
203	}
204
205	/**
206	@brief Determines if a string is a comment (starts with a comment-symbol).
207
208	A comment is defined by a leading '#', '%', ';' or '//'.
209	*/
210	bool isComment(const std::string& str)
211	{
212	// Strip the line, whitespaces are disturbing
213	const std::string& teststring = getStripped(str);
214
215	// There are four possible comment-symbols:
216	// 1) #comment in script-language style
217	// 2) %comment in matlab style
218	// 3) ;comment in unreal tournament config-file style
219	// 4) //comment in code style
220	if (teststring.size() >= 2)
221	{
222	if (teststring[0] == '#' \|\| teststring[0] == '%' \|\| teststring[0] == ';' \|\| (teststring[0] == '/' && teststring[1] == '/'))
223	return true;
224	}
225	else if (teststring.size() == 1)
226	{
227	if (teststring[0] == '#' \|\| teststring[0] == '%' \|\| teststring[0] == ';')
228	return true;
229	}
230
231	return false;
232	}
233
234	/// Determines if a string is empty (contains only whitespaces).
235	bool isEmpty(const std::string& str)
236	{
237	return getStripped(str).empty();
238	}
239
240	/// Determines if a string contains only numbers and maximal one '.'.
241	bool isNumeric(const std::string& str)
242	{
243	bool foundPoint = false;
244
245	for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
246	{
247	if (((it) < '0' \|\| (it) > '9'))
248	{
249	if ((*it) != '.' && !foundPoint)
250	foundPoint = true;
251	else
252	return false;
253	}
254	}
255
256	return true;
257	}
258
259	/**
260	@brief Adds backslashes to the given string which makes special chars visible. Existing slashes will be doubled.
261
262	This function converts all special chars like line breaks, tabs, quotation marks etc. into
263	a human readable format by adding a backslash. So for example "\n" will be converted to
264	"\\" + "n".
265
266	This is usually used when a string is written to a file.
267
268	@see removeSlashes
269	*/
270	std::string addSlashes(const std::string& str)
271	{
272	std::string output(str.size() * 2, ' ');
273	size_t i = 0;
274	for (size_t pos = 0; pos < str.size(); ++pos)
275	{
276	switch (str[pos])
277	{
278	case '\\': output[i] = '\\'; output[i + 1] = '\\'; break;
279	case '\n': output[i] = '\\'; output[i + 1] = 'n'; break;
280	case '\t': output[i] = '\\'; output[i + 1] = 't'; break;
281	case '\v': output[i] = '\\'; output[i + 1] = 'v'; break;
282	case '\b': output[i] = '\\'; output[i + 1] = 'b'; break;
283	case '\r': output[i] = '\\'; output[i + 1] = 'r'; break;
284	case '\f': output[i] = '\\'; output[i + 1] = 'f'; break;
285	case '\a': output[i] = '\\'; output[i + 1] = 'a'; break;
286	case '"': output[i] = '\\'; output[i + 1] = '"'; break;
287	case '\0': output[i] = '\\'; output[i + 1] = '0'; break;
288	default : output[i] = str[pos]; ++i; continue;
289	}
290	i += 2;
291	}
292	output.resize(i);
293
294	return output;
295	}
296
297	/**
298	@brief Removes backslashes from the given string. Double backslashes are interpreted as one backslash.
299
300	This function removes all backslashes and converts the human readable equivalents of
301	special chars like "\\" + "n" into their real meaning (in this case a line break or "\n").
302
303	This is usually used when reading a string from a file.
304
305	@see addSlashes
306	*/
307	std::string removeSlashes(const std::string& str)
308	{
309	if (str.size() <= 1)
310	return str;
311
312	std::string output(str.size(), ' ');
313	size_t i = 0;
314	size_t pos = 0;
315	while (pos < str.size() - 1)
316	{
317	if (str[pos] == '\\')
318	{
319	switch (str[pos + 1])
320	{
321	case '\\': output[i] = '\\'; break;
322	case 'n': output[i] = '\n'; break;
323	case 't': output[i] = '\t'; break;
324	case 'v': output[i] = '\v'; break;
325	case 'b': output[i] = '\b'; break;
326	case 'r': output[i] = '\r'; break;
327	case 'f': output[i] = '\f'; break;
328	case 'a': output[i] = '\a'; break;
329	case '"': output[i] = '"'; break;
330	case '0': output[i] = '\0'; break;
331	default: ++pos; continue;
332	}
333	pos += 2; ++i;
334	}
335	else
336	output[i++] = str[pos++];
337	}
338	if (pos < str.size())
339	output[i++] = str[pos];
340	output.resize(i);
341
342	return output;
343	}
344
345	/// Replaces each char between A and Z with its lowercase equivalent.
346	void lowercase(std::string* str)
347	{
348	for (size_t i = 0; i < str->size(); ++i)
349	{
350	(str)[i] = static_cast<char>(tolower((str)[i]));
351	}
352	}
353
354	/// Returns a copy of the given string where all chars are converted to lowercase.
355	std::string getLowercase(const std::string& str)
356	{
357	std::string output(str);
358	lowercase(&output);
359	return output;
360	}
361
362	/// Replaces each char between a and z with its uppercase equivalent.
363	void uppercase(std::string* str)
364	{
365	for (size_t i = 0; i < str->size(); ++i)
366	{
367	(str)[i] = static_cast<char>(toupper((str)[i]));
368	}
369	}
370
371	/// Returns a copy of the given string where all chars are converted to uppercase.
372	std::string getUppercase(const std::string& str)
373	{
374	std::string output(str);
375	uppercase(&output);
376	return output;
377	}
378
379	/**
380	@brief Compares two strings ignoring different casing.
381	@return s1 == s1 -> returns 0 / s1 < s2 -> returns -1 / s1 >= s2 -> returns 1
382	*/
383	int nocaseCmp(const std::string& s1, const std::string& s2)
384	{
385	std::string::const_iterator it1=s1.begin();
386	std::string::const_iterator it2=s2.begin();
387
388	//stop when either string's end has been reached
389	while ( (it1!=s1.end()) && (it2!=s2.end()) )
390	{
391	if(::toupper(it1) != ::toupper(it2)) //letters differ?
392	// return -1 to indicate smaller than, 1 otherwise
393	return (::toupper(it1) < ::toupper(it2)) ? -1 : 1;
394	//proceed to the next character in each string
395	++it1;
396	++it2;
397	}
398	size_t size1=s1.size(), size2=s2.size();// cache lengths
399	//return -1,0 or 1 according to strings' lengths
400	if (size1==size2)
401	return 0;
402	return (size1<size2) ? -1 : 1;
403	}
404
405
406	/**
407	@brief Compares the first @a len chars of two strings ignoring different casing.
408	@param s1 First string
409	@param s2 Second string
410	@param len Maximal number of chars to compare
411	*/
412	int nocaseCmp(const std::string& s1, const std::string& s2, size_t len)
413	{
414	if (len == 0)
415	return 0;
416	std::string::const_iterator it1=s1.begin();
417	std::string::const_iterator it2=s2.begin();
418
419	//stop when either string's end has been reached
420	while ( (it1!=s1.end()) && (it2!=s2.end()) && len-- > 0)
421	{
422	if(::toupper(it1) != ::toupper(it2)) //letters differ?
423	// return -1 to indicate smaller than, 1 otherwise
424	return (::toupper(it1) < ::toupper(it2)) ? -1 : 1;
425	//proceed to the next character in each string
426	++it1;
427	++it2;
428	}
429	return 0;
430	}
431
432	/// Returns true if the string contains a comment, introduced by #, %, ; or //.
433	bool hasComment(const std::string& str)
434	{
435	return (getCommentPosition(str) != std::string::npos);
436	}
437
438	/// If the string contains a comment, the comment gets returned (including the comment symbol), an empty string otherwise.
439	std::string getComment(const std::string& str)
440	{
441	return str.substr(getCommentPosition(str));
442	}
443
444	/// If the string contains a comment, the position of the comment-symbol gets returned, @c std::string::npos otherwise.
445	size_t getCommentPosition(const std::string& str)
446	{
447	return getNextCommentPosition(str, 0);
448	}
449
450	/**
451	@brief Returns the position of the next comment-symbol, starting with @a start.
452	@param str The string
453	@param start The first position to look at
454	*/
455	size_t getNextCommentPosition(const std::string& str, size_t start)
456	{
457	for (size_t i = start; i < str.size(); i++)
458	if (isComment(str.substr(i)))
459	return i;
460
461	return std::string::npos;
462	}
463
464	/**
465	@brief Replaces individual charaters
466	@param str String to be manipulated
467	@param target Character to be replaced
468	@param replacement Replacement character
469	@return Number of replacements
470	*/
471	size_t replaceCharacters(std::string& str, char target, char replacement)
472	{
473	size_t j = 0;
474	for (size_t i = 0; i < str.size(); ++i)
475	{
476	if (str[i] == target)
477	{
478	str[i] = replacement;
479	++j;
480	}
481	}
482	return j;
483	}
484
485	/**
486	@brief Calculates the Levenshtein distance between two strings.
487
488	The Levenshtein distance is defined by the number of transformations needed to convert str1
489	into str2. Possible transformations are substituted, added, or removed characters.
490	*/
491	unsigned int getLevenshteinDistance(const std::string& str1, const std::string& str2)
492	{
493	size_t cols = str1.size() + 1;
494	size_t rows = str2.size() + 1;
495	boost::scoped_array<int> matrix(new int[rows * cols]);
496
497	for (size_t r = 0; r < rows; ++r)
498	for (size_t c = 0; c < cols; ++c)
499	matrix[r*cols + c] = 0;
500
501	for (size_t i = 1; i < cols; ++i)
502	matrix[0*cols + i] = i;
503	for (size_t i = 1; i < rows; ++i)
504	matrix[i*cols + 0] = i;
505
506	for (size_t r = 1; r < rows; ++r)
507	for (size_t c = 1; c < cols; ++c)
508	matrix[r*cols + c] = (str1[c-1] != str2[r-1]);
509
510	for (size_t r = 1; r < rows; ++r)
511	for (size_t c = 1; c < cols; ++c)
512	matrix[rcols + c] = std::min(std::min(matrix[(r-1)cols + c] + 1,
513	matrix[r*cols + c-1] + 1),
514	matrix[(r-1)*cols + c-1] + (str1[c-1] != str2[r-1]));
515
516	return matrix[(rows-1)*cols + cols-1];
517	}
518	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: