Line data Source code
1 : /*
2 : * Copyright (c) 2010-2015: G-CSC, Goethe University Frankfurt
3 : * Author: Andreas Vogel
4 : *
5 : * This file is part of UG4.
6 : *
7 : * UG4 is free software: you can redistribute it and/or modify it under the
8 : * terms of the GNU Lesser General Public License version 3 (as published by the
9 : * Free Software Foundation) with the following additional attribution
10 : * requirements (according to LGPL/GPL v3 §7):
11 : *
12 : * (1) The following notice must be displayed in the Appropriate Legal Notices
13 : * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 : *
15 : * (2) The following notice must be displayed at a prominent place in the
16 : * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 : *
18 : * (3) The following bibliography is recommended for citation and must be
19 : * preserved in all covered files:
20 : * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 : * parallel geometric multigrid solver on hierarchically distributed grids.
22 : * Computing and visualization in science 16, 4 (2013), 151-164"
23 : * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 : * flexible software system for simulating pde based models on high performance
25 : * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 : *
27 : * This program is distributed in the hope that it will be useful,
28 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : * GNU Lesser General Public License for more details.
31 : */
32 :
33 : #ifndef __H__COMMON_STRING_UTIL__
34 : #define __H__COMMON_STRING_UTIL__
35 :
36 : #include <string>
37 : #include <vector>
38 : #include <algorithm>
39 : #include <sstream>
40 : #include <cctype>
41 : #include <cstdio>
42 : #include <string>
43 : #include <cassert>
44 :
45 : #include "hash_function.h"
46 : #include "common/ug_config.h"
47 : #include "stringify.h"
48 :
49 : namespace ug{
50 :
51 : /**
52 : * \defgroup ugbase_common_util_strings String Utilities
53 : * \ingroup ugbase_common_util
54 : * \{
55 : */
56 :
57 : /**
58 : * \brief splits the string into parts based on a separating char
59 : * \details the string parts using a separator char in order to indicate parts
60 : * \note any prior content of \c vToken will get deleted
61 : * \param[in] str original string
62 : * \param[in,out] vToken tokenized parts
63 : * \param[in] delimiter char used as separator
64 : */
65 : UG_API void TokenizeString( const std::string& str, std::vector<std::string>& vToken,
66 : const char delimiter=',' );
67 :
68 : /**
69 : * \brief splits the string into parts based on a separating character
70 : * \details returns the string parts using a separator char in order to indicate parts
71 : * \param[in] str original string
72 : * \param[in] delimiter char used as separator
73 : * \return tokenized parts
74 : */
75 : UG_API std::vector<std::string> TokenizeString( const std::string& str,
76 : const char delimiter=',' );
77 :
78 : /**
79 : * \brief splits the string into parts based on a separating character
80 : * \details returns the string parts using a separator char in order to indicate parts
81 : * \param[in] str original string
82 : * \param[in] delimiter char used as separator
83 : * \return tokenized parts
84 : */
85 : UG_API std::vector<std::string> TokenizeString( const char* str,
86 : const char delimiter=',' );
87 :
88 : /**
89 : * \brief splits the string into trimmed parts based on a separating char
90 : * \details places the generated tokens in the given vector.
91 : * \note any prior content of \c vToken will get deleted
92 : * \param[in] str original string
93 : * \param[in,out] vToken tokenized parts
94 : * \param[in] delimiter char used as separator
95 : */
96 : UG_API void TokenizeTrimString(const std::string& str, std::vector<std::string>& vToken,
97 : const char delimiter=',');
98 :
99 : /**
100 : * \brief splits the string into trimmed parts based on a separating char
101 : * \details returns the string parts separated by \c delimiter and trims all parts
102 : * \param[in] str original string
103 : * \param[in] delimiter char used as separator
104 : * \return tokenized and trimmed parts
105 : */
106 : UG_API std::vector<std::string> TokenizeTrimString( const std::string& str,
107 : const char delimiter=',' );
108 :
109 : /**
110 : * \brief removes all white space from a string, also within the string
111 : * \param[in,out] string the string to modify
112 : */
113 : UG_API void RemoveWhitespaceFromString(std::string& string);
114 :
115 : /**
116 : * \brief removes all white space from the front and end of a string
117 : * \param[in] string the string to modify
118 : * \return the modified string
119 : */
120 : UG_API std::string TrimString(const std::string& str);
121 :
122 : /**
123 : * \brief creates a truncated string and may add truncation symbol at end
124 : * \param[in] string the string to modify
125 : * \param[in] totalSize the total size of snippet
126 : * \param[in] replaceLast the number of last chars to be replaced by symbol
127 : * \param[in] replace the replace symbol
128 : * \return the modified string
129 : */
130 : UG_API std::string SnipString(const std::string& str, size_t totalSize,
131 : size_t replaceLast = 0, const char replace = '.');
132 :
133 : /**
134 : * \brief creates a truncated string and may add truncation symbol at front
135 : * \param[in] string the string to modify
136 : * \param[in] totalSize the total size of snippet
137 : * \param[in] replaceLast the number of last chars to be replaced by symbol
138 : * \param[in] replace the replace symbol
139 : * \return the modified string
140 : */
141 : UG_API std::string SnipStringFront(const std::string& str, size_t totalSize,
142 : size_t replaceFront = 0, const char replace = '.');
143 :
144 : /**
145 : * \brief returns the number of digits of an integer (expressed with base 10)
146 : * \details Determines the number of digits for the passed base-10 number.
147 : * A minus sign is ignored.
148 : * \param[in] n number to count the number of digits
149 : * \returns number of digits
150 : */
151 : UG_API int NumberOfDigits(int n);
152 :
153 : /**
154 : * \brief appends a counter number to a string
155 : * \details This functions appends to a string a counter preceded by some
156 : * indicator.
157 : * If a \c maxCounter is passed, the field is adjusted to the maximum needed
158 : * width and additional space is filled by zeros.
159 : * \param[in,out] str string to append the counter
160 : * \param[in] indicator some string preceding the counter
161 : * \param[in] counter counter added
162 : * \param[in] maxCounter maximum counter to be added
163 : */
164 : UG_API void AppendCounterToString( std::string& str, std::string indicator,
165 : int counter, int maxCounter=-1 );
166 :
167 : /**
168 : * \brief padding a string with spaces to predefined length
169 : * \details Appends spaces to the given string so that the resulting string has
170 : * a predefined length of \c totalLength
171 : * \param[in] str string to be padded
172 : * \param[in] totalLength desired total length of the string
173 : * \returns padded string
174 : */
175 : UG_API std::string AppendSpacesToString(std::string& str, int totalLength);
176 :
177 : /**
178 : * \brief creates a hash key from a string value
179 : * \details this template function creates a hash key for a string value
180 : * \param[in] str string to create hash for
181 : * \returns hash key for given \c key
182 : * \note Implementation is copied from some book or website. Can't remember... (sreiter)
183 : */
184 : template <> UG_API size_t hash_key(const std::string& str);
185 :
186 : /**
187 : * \brief determines last occurrence of '/' or '\'
188 : * \param[in] str string to lock in
189 : * \returns position of the last occurrence of '/' or '\' in \c str;
190 : * returns `std::string::npos` if none are found
191 : */
192 : std::string::size_type GetDirectorySeperatorPos(const std::string &str);
193 :
194 : /**
195 : * \brief returns best guess of a filename from a given string
196 : * \details returns the part of the string after the last '/' or '\' character
197 : * (e.g. `/sw/bla.txt` -> `bla.txt`)
198 : * \param[in] str to retrieve the filename from
199 : * \return best guess of the file name from given path; if no guess can be made
200 : * the complete string is returned
201 : */
202 : UG_API std::string FilenameWithoutPath(const std::string &str);
203 :
204 : /**
205 : * \brief returns best guess of a path without a filename from a given string
206 : * \details returns the part of the string before the last '/' or '\' character
207 : * (e.g. `/sw/bla.txt` -> `/sw/`)
208 : * \param[in] str to retrieve the filename from
209 : * \return best guess of the file name from given path; if no guess can be made
210 : * '.' is returned
211 : */
212 : UG_API std::string PathFromFilename(const std::string &str);
213 :
214 : /**
215 : * \brief returns the best guess of the filename from given string
216 : * \details returns the part of the string without path and extension
217 : * (e.g. `/sw/bla.txt` -> `bla`)
218 : * \param[in] str to retrieve filename from
219 : * \returns best guess of the filename without path and extension; if no guess
220 : * can be made, the whole string is returned
221 : */
222 : UG_API std::string FilenameWithoutExtension(std::string str);
223 :
224 : /**
225 : * \brief returns the best guess of the filename and path from given string
226 : * \details returns the part of the string without extension
227 : * (e.g. `/sw/bla.txt` -> `bla`)
228 : * \param[in] str to retrieve filename from
229 : * \returns best guess of the filename without extension; if no guess
230 : * can be made, the whole string is returned
231 : */
232 : UG_API std::string FilenameAndPathWithoutExtension(std::string str);
233 :
234 : /**
235 : * \brief returns the best guess of a file extensions from given string
236 : * \details returns the extension of the filename (e.g. `/sw/bla.txt` -> `txt`).
237 : * Everything after the last dot ('.') of \c str is considered the file extension.
238 : * \param[in] str to retrieve file extension from
239 : * \returns best guess of the file extension; empty string if no guess can be made
240 : */
241 : UG_API std::string GetFilenameExtension(const std::string &str);
242 :
243 : /**
244 : * \brief Substitutes substrings of given string with other substrings
245 : * \details Replaces each substring of \c target string that is equal to \c oldstr
246 : * with \c newstr
247 : * \param[in] target string to modify
248 : * \param[in] oldstr string to raplace
249 : * \param[in] newstr replacement string
250 : * \return a copy of the specified \c target string where all occurences of
251 : * \c oldstr are replaced with \c newstr.
252 : */
253 : UG_API std::string ReplaceAll( std::string target, const std::string& oldstr,
254 : const std::string& newstr );
255 :
256 : /**
257 : * \brief checks whether a given string starts with a specified substring
258 : * \details Checks whether \c str starts with \c search.
259 : * \param[in] str string
260 : * \param[in] search string to search for
261 : * \return \c true if \c str starts with \c search; \c false otherwise
262 : */
263 : UG_API bool StartsWith(const std::string& str, const std::string& search);
264 :
265 : /**
266 : * \brief Checks whether given string contains a specified substring
267 : * \details Checks whether \c str contains \c search.
268 : * \param[in] str string
269 : * \param[in] search string to search for
270 : * \return \c true if \c str contains \c search; \c false otherwise
271 : */
272 : UG_API bool Contains(const std::string& str, const std::string& search);
273 :
274 : /**
275 : * \brief Returns a lower case version of the specified string.
276 : * \note this function does not support custom locales.
277 : * Thus, only ascii strings shall be specified.
278 : * \param[in] str string to convert
279 : * \return a lower case version of the specified string
280 : */
281 : UG_API std::string ToLower(std::string str);
282 :
283 : /**
284 : * \brief Returns an upper case version of the specified string.
285 : * \note this function does not support custom locales.
286 : * Thus, only ascii strings shall be specified.
287 : * \param[in] str string to convert
288 : * \return an upper case version of the specified string
289 : */
290 : UG_API std::string ToUpper(std::string str);
291 :
292 : /**
293 : * \brief Finds and returns all duplicate elements of given vector
294 : * \details Searches for duplicates in the specified vector and returns a vector
295 : * containing all elements that occur multiple times.
296 : * \param[in] vec vector to analyze
297 : * \return a vector containing all elements that occur multiple times
298 : */
299 : UG_API std::vector<std::string> FindDuplicates(const std::vector<std::string>& vec);
300 :
301 : /**
302 : * \brief Builds a string with specified repetitions of given character
303 : * \param[in] c the character
304 : * \param[in] nr number of times to repeat \c c
305 : * \return string with \c nr times \c c
306 : */
307 : UG_API std::string repeat(char c, int nr);
308 :
309 : /**
310 : * \brief Calculate Levenshtein Distance of to strings
311 : * \details Levenshtein distance calculates the minimum number of edits to
312 : * transform one string into the other with allowable edit operations
313 : * insertion, deletion, or substitution of a single character.
314 : * \note taken from http://en.wikipedia.org/wiki/Levenshtein_distance
315 : * (check copyright or recreate!)
316 : * \param[in] s1 string 1
317 : * \param[in] s2 string 2
318 : * \return minimum number of edits needed to transform one string into the other
319 : */
320 : UG_API size_t LevenshteinDistance( const std::string& s1, const std::string& s2 );
321 :
322 :
323 : /**
324 : * \brief get some specified lines of a file
325 : * \param[in] filename file name
326 : * \param[in] fromline line number to start from
327 : * \param[in] toline line number to stop at
328 : * \param[in] includeLineNumbers if true, add the line number in front of each
329 : * line and a tab.
330 : * \return lines fromline to toline of file filename.
331 : */
332 : UG_API std::string GetFileLines( const char *filename, size_t fromline, size_t toline,
333 : bool includeLineNumbers );
334 :
335 : /**
336 : * \brief get a specific line of a file
337 : * \param filename file name
338 : * \param line line number to extract
339 : * \return the line of the file
340 : */
341 : UG_API std::string GetFileLine(const char *filename, size_t line);
342 :
343 : /**
344 : * \brief checks whether second string is longer than first string
345 : * \details This can be used to get the longest string in a vector of strings:
346 : *
347 : * int maxLength = (*max_element(vecStr.begin(), vecStr.end(), IsLonger)).size();
348 : * \param[in] a
349 : * \param[in] b
350 : * \return \c true if \c b is longer then \c a; \c false otherwise
351 : */
352 : UG_API bool IsLonger(const std::string &a, const std::string &b);
353 :
354 :
355 : /**
356 : * \brief Convert a object supporting '`std::cout << obj`' to a string
357 : * \tparam T type of the object; must support `std::ostream operator<<()`
358 : * \param[in] t object to convert to string
359 : * \return a string with the object as if you would use operator << (like `std::cout`)
360 : */
361 : template<typename T>
362 0 : inline std::string ToString(const T &t)
363 : {
364 0 : std::stringstream out;
365 0 : out << t;
366 0 : return out.str();
367 0 : }
368 :
369 : /**
370 : * \brief returns a string suitable for XML files
371 : * this functions escapes the characters <, >, ', " and &
372 : * @sa http://www.hdfgroup.org/HDF5/XML/xml_escape_chars.htm
373 : * @param[in] s
374 : * @return escaped string
375 : */
376 : UG_API std::string XMLStringEscape(std::string s);
377 :
378 : /**
379 : * \brief wildcard matches like bla.* or *.bla or t?st
380 : * @param[in] str a string
381 : * @param[in] pattern a pattern with wildcards * or ?
382 : * @return true if match otherwise false
383 : */
384 : UG_API bool WildcardMatch(const char *str, const char *pattern);
385 :
386 : /**
387 : * this function replaces XML special characters with their escaped versions:
388 : * & -> &
389 : * " -> "
390 : * ' -> "'
391 : * < -> <
392 : * > -> >
393 : * @param s a normal text
394 : * @return a text where special XML characters are escaped
395 : */
396 : UG_API std::string XMLStringEscape(std::string s);
397 :
398 : // end group ugbase_common_util_strings
399 : /// \}
400 :
401 : /**
402 : * \brief returns a "shifted" string
403 : * one-line strings are not shifted
404 : * two line strings are shifted like this: input:
405 : * "MyLine1\nMyLine2\n"
406 : * Output:
407 : * "\n | MyLine1\n | MyLine2"
408 : * note that they get an additional \n at the beginning, and
409 : * doubled \n and \n at the end are removed, so you can use
410 : * ConfigShift like this
411 : * \code
412 : * strstr << "MySubcomponent = " << ConfigShift(comp1.config_string()) << "\n"
413 : * "MySubcomponent2 = " << ConfigShift(comp2.config_string()) << "\n"
414 : * \endcode
415 : * Depending on comp1.config_string(), this results in
416 : * "MySubcomponent1 = sub1 ... " or "MySubcomponent =\n | sub1.1\n | sub1.2 ..."
417 : * @param[in] s
418 : * @return shifted string
419 : */
420 : UG_API std::string ConfigShift(std::string s);
421 :
422 : template<typename T>
423 : inline std::string OstreamShift(const T &t)
424 : {
425 : std::stringstream ss; ss << t;
426 : return ConfigShift(ss.str());
427 : }
428 :
429 : /**
430 : * Helper function to display byte sizes like 2411724 => 2,3 MB
431 : * @param[in] s size in bytes
432 : * @param[in] length if != 0, fixes the returned string length to this length (for tables etc.).
433 : * @return string describing the size s=1024 -> 1 kb
434 : */
435 : std::string GetBytesSizeString(size_t s, int length=0);
436 :
437 : inline const char *TrueFalseString(bool b)
438 : {
439 : return b ? "TRUE" : "FALSE";
440 : }
441 :
442 : inline const char *OnOffString(bool b)
443 : {
444 : return b ? "ON" : "OFF";
445 : }
446 :
447 :
448 : template< typename... Args >
449 0 : inline std::string GetStringPrintf( const char* format, Args... args )
450 : {
451 0 : int length = std::snprintf( nullptr, 0, format, args... );
452 : assert( length >= 0 );
453 :
454 0 : char* buf = new char[length + 1];
455 : std::snprintf( buf, length + 1, format, args... );
456 :
457 0 : std::string str( buf );
458 0 : delete[] buf;
459 0 : return str;
460 : }
461 :
462 : } // end namespace ug
463 :
464 : #endif /*__H__COMMON_STRING_UTIL__*/
|