LCOV - code coverage report
Current view: top level - ugbase/common/util - string_util.h (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 11 0
Test Date: 2025-09-21 23:31:46 Functions: 0.0 % 4 0

            Line data    Source code
       1              : /*
       2              :  * Copyright (c) 2010-2015:  G-CSC, Goethe University Frankfurt
       3              :  * Author: Andreas Vogel
       4              :  * 
       5              :  * This file is part of UG4.
       6              :  * 
       7              :  * UG4 is free software: you can redistribute it and/or modify it under the
       8              :  * terms of the GNU Lesser General Public License version 3 (as published by the
       9              :  * Free Software Foundation) with the following additional attribution
      10              :  * requirements (according to LGPL/GPL v3 §7):
      11              :  * 
      12              :  * (1) The following notice must be displayed in the Appropriate Legal Notices
      13              :  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
      14              :  * 
      15              :  * (2) The following notice must be displayed at a prominent place in the
      16              :  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
      17              :  * 
      18              :  * (3) The following bibliography is recommended for citation and must be
      19              :  * preserved in all covered files:
      20              :  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
      21              :  *   parallel geometric multigrid solver on hierarchically distributed grids.
      22              :  *   Computing and visualization in science 16, 4 (2013), 151-164"
      23              :  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
      24              :  *   flexible software system for simulating pde based models on high performance
      25              :  *   computers. Computing and visualization in science 16, 4 (2013), 165-179"
      26              :  * 
      27              :  * This program is distributed in the hope that it will be useful,
      28              :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      29              :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      30              :  * GNU Lesser General Public License for more details.
      31              :  */
      32              : 
      33              : #ifndef __H__COMMON_STRING_UTIL__
      34              : #define __H__COMMON_STRING_UTIL__
      35              : 
      36              : #include <string>
      37              : #include <vector>
      38              : #include <algorithm> 
      39              : #include <sstream>
      40              : #include <cctype>
      41              : #include <cstdio>
      42              : #include <string>
      43              : #include <cassert>
      44              : 
      45              : #include "hash_function.h"
      46              : #include "common/ug_config.h"
      47              : #include "stringify.h"
      48              : 
      49              : namespace ug{
      50              : 
      51              : /**
      52              :  * \defgroup ugbase_common_util_strings String Utilities
      53              :  * \ingroup ugbase_common_util
      54              :  * \{
      55              :  */
      56              : 
      57              : /**
      58              :  * \brief splits the string into parts based on a separating char
      59              :  * \details the string parts using a separator char in order to indicate parts
      60              :  * \note any prior content of \c vToken will get deleted
      61              :  * \param[in]     str       original string
      62              :  * \param[in,out] vToken    tokenized parts
      63              :  * \param[in]     delimiter char used as separator
      64              :  */
      65              : UG_API void TokenizeString( const std::string& str, std::vector<std::string>& vToken, 
      66              :                             const char delimiter=',' );
      67              : 
      68              : /**
      69              :  * \brief splits the string into parts based on a separating character
      70              :  * \details returns the string parts using a separator char in order to indicate parts
      71              :  * \param[in] str       original string
      72              :  * \param[in] delimiter char used as separator
      73              :  * \return tokenized parts
      74              :  */
      75              : UG_API std::vector<std::string> TokenizeString( const std::string& str,
      76              :                                                 const char delimiter=',' );
      77              : 
      78              : /**
      79              :  * \brief splits the string into parts based on a separating character
      80              :  * \details returns the string parts using a separator char in order to indicate parts
      81              :  * \param[in] str       original string
      82              :  * \param[in] delimiter char used as separator
      83              :  * \return tokenized parts
      84              :  */
      85              : UG_API std::vector<std::string> TokenizeString( const char* str,
      86              :                                                 const char delimiter=',' );
      87              : 
      88              : /**
      89              :  * \brief splits the string into trimmed parts based on a separating char
      90              :  * \details places the generated tokens in the given vector.
      91              :  * \note any prior content of \c vToken will get deleted
      92              :  * \param[in]     str       original string
      93              :  * \param[in,out] vToken    tokenized parts
      94              :  * \param[in]     delimiter char used as separator
      95              :  */
      96              : UG_API void TokenizeTrimString(const std::string& str, std::vector<std::string>& vToken,
      97              :                                                            const char delimiter=',');
      98              : 
      99              : /**
     100              :  * \brief splits the string into trimmed parts based on a separating char
     101              :  * \details returns the string parts separated by \c delimiter and trims all parts
     102              :  * \param[in] str       original string
     103              :  * \param[in] delimiter char used as separator
     104              :  * \return tokenized and trimmed parts
     105              :  */
     106              : UG_API std::vector<std::string> TokenizeTrimString( const std::string& str, 
     107              :                                                     const char delimiter=',' );
     108              : 
     109              : /**
     110              :  * \brief removes all white space from a string, also within the string
     111              :  * \param[in,out] string the string to modify
     112              :  */
     113              : UG_API void RemoveWhitespaceFromString(std::string& string);
     114              : 
     115              : /**
     116              :  * \brief removes all white space from the front and end of a string
     117              :  * \param[in] string the string to modify
     118              :  * \return the modified string
     119              :  */
     120              : UG_API std::string TrimString(const std::string& str);
     121              : 
     122              : /**
     123              :  * \brief creates a truncated string and may add truncation symbol at end
     124              :  * \param[in] string            the string to modify
     125              :  * \param[in] totalSize         the total size of snippet
     126              :  * \param[in] replaceLast       the number of last chars to be replaced by symbol
     127              :  * \param[in] replace           the replace symbol
     128              :  * \return the modified string
     129              :  */
     130              : UG_API std::string SnipString(const std::string& str, size_t totalSize,
     131              :                               size_t replaceLast = 0, const char replace = '.');
     132              : 
     133              : /**
     134              :  * \brief creates a truncated string and may add truncation symbol at front
     135              :  * \param[in] string            the string to modify
     136              :  * \param[in] totalSize         the total size of snippet
     137              :  * \param[in] replaceLast       the number of last chars to be replaced by symbol
     138              :  * \param[in] replace           the replace symbol
     139              :  * \return the modified string
     140              :  */
     141              : UG_API std::string SnipStringFront(const std::string& str, size_t totalSize,
     142              :                                    size_t replaceFront = 0, const char replace = '.');
     143              : 
     144              : /**
     145              :  * \brief returns the number of digits of an integer (expressed with base 10)
     146              :  * \details Determines the number of digits for the passed base-10 number.
     147              :  *   A minus sign is ignored.
     148              :  * \param[in] n number to count the number of digits
     149              :  * \returns number of digits
     150              :  */
     151              : UG_API int NumberOfDigits(int n);
     152              : 
     153              : /**
     154              :  * \brief appends a counter number to a string
     155              :  * \details This functions appends to a string a counter preceded by some 
     156              :  *   indicator.
     157              :  *   If a \c maxCounter is passed, the field is adjusted to the maximum needed 
     158              :  *   width and additional space is filled by zeros.
     159              :  * \param[in,out] str        string to append the counter
     160              :  * \param[in]     indicator  some string preceding the counter
     161              :  * \param[in]     counter    counter added
     162              :  * \param[in]     maxCounter maximum counter to be added
     163              :  */
     164              : UG_API void AppendCounterToString( std::string& str, std::string indicator,
     165              :                                    int counter, int maxCounter=-1 );
     166              : 
     167              : /**
     168              :  * \brief padding a string with spaces to predefined length
     169              :  * \details Appends spaces to the given string so that the resulting string has
     170              :  *   a predefined length of \c totalLength
     171              :  * \param[in] str         string to be padded
     172              :  * \param[in] totalLength desired total length of the string
     173              :  * \returns padded string
     174              :  */
     175              : UG_API std::string AppendSpacesToString(std::string& str, int totalLength);
     176              : 
     177              : /**
     178              :  * \brief creates a hash key from a string value
     179              :  * \details this template function creates a hash key for a string value
     180              :  * \param[in] str string to create hash for
     181              :  * \returns hash key for given \c key
     182              :  * \note Implementation is copied from some book or website. Can't remember... (sreiter)
     183              :  */
     184              : template <> UG_API size_t hash_key(const std::string& str);
     185              : 
     186              : /**
     187              :  * \brief determines last occurrence of '/' or '\'
     188              :  * \param[in] str string to lock in
     189              :  * \returns position of the last occurrence of '/' or '\' in \c str; 
     190              :  *   returns `std::string::npos` if none are found
     191              :  */
     192              : std::string::size_type GetDirectorySeperatorPos(const std::string &str);
     193              : 
     194              : /**
     195              :  * \brief returns best guess of a filename from a given string
     196              :  * \details returns the part of the string after the last '/' or '\' character 
     197              :  *   (e.g. `/sw/bla.txt` -> `bla.txt`)
     198              :  * \param[in] str to retrieve the filename from
     199              :  * \return best guess of the file name from given path; if no guess can be made
     200              :  *   the complete string is returned
     201              :  */
     202              : UG_API std::string FilenameWithoutPath(const std::string &str);
     203              : 
     204              : /**
     205              :  * \brief returns best guess of a path without a filename from a given string
     206              :  * \details returns the part of the string before the last '/' or '\' character 
     207              :  *   (e.g. `/sw/bla.txt` -> `/sw/`)
     208              :  * \param[in] str to retrieve the filename from
     209              :  * \return best guess of the file name from given path; if no guess can be made
     210              :  *   '.' is returned
     211              :  */
     212              : UG_API std::string PathFromFilename(const std::string &str);
     213              : 
     214              : /**
     215              :  * \brief returns the best guess of the filename from given string
     216              :  * \details returns the part of the string without path and extension
     217              :  *   (e.g. `/sw/bla.txt` -> `bla`)
     218              :  * \param[in] str to retrieve filename from
     219              :  * \returns best guess of the filename without path and extension; if no guess 
     220              :  *   can be made, the whole string is returned
     221              :  */
     222              : UG_API std::string FilenameWithoutExtension(std::string str);
     223              : 
     224              : /**
     225              :  * \brief returns the best guess of the filename and path from given string
     226              :  * \details returns the part of the string without extension
     227              :  *   (e.g. `/sw/bla.txt` -> `bla`)
     228              :  * \param[in] str to retrieve filename from
     229              :  * \returns best guess of the filename without extension; if no guess
     230              :  *   can be made, the whole string is returned
     231              :  */
     232              : UG_API std::string FilenameAndPathWithoutExtension(std::string str);
     233              : 
     234              : /**
     235              :  * \brief returns the best guess of a file extensions from given string
     236              :  * \details returns the extension of the filename (e.g. `/sw/bla.txt` -> `txt`).
     237              :  *   Everything after the last dot ('.') of \c str is considered the file extension.
     238              :  * \param[in] str to retrieve file extension from
     239              :  * \returns best guess of the file extension; empty string if no guess can be made
     240              :  */
     241              : UG_API std::string GetFilenameExtension(const std::string &str);
     242              : 
     243              : /**
     244              :  * \brief Substitutes substrings of given string with other substrings
     245              :  * \details Replaces each substring of \c target string that is equal to \c oldstr 
     246              :  *   with \c newstr
     247              :  * \param[in] target string to modify
     248              :  * \param[in] oldstr string to raplace
     249              :  * \param[in] newstr replacement string
     250              :  * \return a copy of the specified \c target string where all occurences of 
     251              :  *   \c oldstr are replaced with \c newstr.
     252              :  */
     253              : UG_API std::string ReplaceAll( std::string target, const std::string& oldstr, 
     254              :                                const std::string& newstr );
     255              : 
     256              : /**
     257              :  * \brief checks whether a given string starts with a specified substring
     258              :  * \details Checks whether \c str starts with \c search.
     259              :  * \param[in] str    string
     260              :  * \param[in] search string to search for
     261              :  * \return \c true if \c str starts with \c search; \c false otherwise
     262              :  */
     263              : UG_API bool StartsWith(const std::string& str, const std::string& search);
     264              : 
     265              : /**
     266              :  * \brief Checks whether given string contains a specified substring
     267              :  * \details Checks whether \c str contains \c search.
     268              :  * \param[in] str    string
     269              :  * \param[in] search string to search for
     270              :  * \return \c true if \c str contains \c search; \c false otherwise
     271              :  */
     272              : UG_API bool Contains(const std::string& str, const std::string& search);
     273              : 
     274              : /**
     275              :  * \brief Returns a lower case version of the specified string.
     276              :  * \note this function does not support custom locales.
     277              :  *   Thus, only ascii strings shall be specified.
     278              :  * \param[in] str string to convert
     279              :  * \return a lower case version of the specified string
     280              :  */
     281              : UG_API std::string ToLower(std::string str);
     282              : 
     283              : /**
     284              :  * \brief Returns an upper case version of the specified string.
     285              :  * \note this function does not support custom locales.
     286              :  *   Thus, only ascii strings shall be specified.
     287              :  * \param[in] str string to convert
     288              :  * \return an upper case version of the specified string
     289              :  */
     290              : UG_API std::string ToUpper(std::string str);
     291              : 
     292              : /**
     293              :  * \brief Finds and returns all duplicate elements of given vector
     294              :  * \details Searches for duplicates in the specified vector and returns a vector 
     295              :  *   containing all elements that occur multiple times.
     296              :  * \param[in] vec vector to analyze
     297              :  * \return a vector containing all elements that occur multiple times
     298              :  */
     299              : UG_API std::vector<std::string> FindDuplicates(const std::vector<std::string>& vec);
     300              : 
     301              : /**
     302              :  * \brief Builds a string with specified repetitions of given character
     303              :  * \param[in] c  the character
     304              :  * \param[in] nr number of times to repeat \c c
     305              :  * \return string with \c nr times \c c
     306              :  */
     307              : UG_API std::string repeat(char c, int nr);
     308              : 
     309              : /**
     310              :  * \brief Calculate Levenshtein Distance of to strings
     311              :  * \details Levenshtein distance calculates the minimum number of edits to 
     312              :  *   transform one string into the other with allowable edit operations 
     313              :  *   insertion, deletion, or substitution of a single character.
     314              :  * \note taken from http://en.wikipedia.org/wiki/Levenshtein_distance
     315              :  *   (check copyright or recreate!)
     316              :  * \param[in] s1 string 1
     317              :  * \param[in] s2 string 2
     318              :  * \return minimum number of edits needed to transform one string into the other
     319              :  */
     320              : UG_API size_t LevenshteinDistance( const std::string& s1, const std::string& s2 );
     321              : 
     322              : 
     323              : /**
     324              :  * \brief get some specified lines of a file
     325              :  * \param[in] filename           file name
     326              :  * \param[in] fromline           line number to start from
     327              :  * \param[in] toline             line number to stop at
     328              :  * \param[in] includeLineNumbers if true, add the line number in front of each 
     329              :  *   line and a tab.
     330              :  * \return lines fromline to toline of file filename.
     331              :  */
     332              : UG_API std::string GetFileLines( const char *filename, size_t fromline, size_t toline, 
     333              :                                  bool includeLineNumbers );
     334              : 
     335              : /**
     336              :  * \brief get a specific line of a file
     337              :  * \param filename file name
     338              :  * \param line     line number to extract
     339              :  * \return the line of the file
     340              :  */
     341              : UG_API std::string GetFileLine(const char *filename, size_t line);
     342              : 
     343              : /**
     344              :  * \brief checks whether second string is longer than first string
     345              :  * \details This can be used to get the longest string in a vector of strings:
     346              :  * 
     347              :  *     int maxLength = (*max_element(vecStr.begin(), vecStr.end(), IsLonger)).size();
     348              :  * \param[in] a
     349              :  * \param[in] b
     350              :  * \return \c true if \c b is longer then \c a; \c false otherwise
     351              :  */
     352              : UG_API bool IsLonger(const std::string &a, const std::string &b);
     353              : 
     354              : 
     355              : /**
     356              :  * \brief Convert a object supporting '`std::cout << obj`' to a string
     357              :  * \tparam T type of the object; must support `std::ostream operator<<()`
     358              :  * \param[in] t object to convert to string
     359              :  * \return a string with the object as if you would use operator << (like `std::cout`)
     360              :  */
     361              : template<typename T>
     362            0 : inline std::string ToString(const T &t)
     363              : {
     364            0 :     std::stringstream out;
     365            0 :     out << t;
     366            0 :     return out.str();
     367            0 : }
     368              : 
     369              : /**
     370              :  * \brief returns a string suitable for XML files
     371              :  * this functions escapes the characters <, >, ', " and &
     372              :  * @sa http://www.hdfgroup.org/HDF5/XML/xml_escape_chars.htm
     373              :  * @param[in] s
     374              :  * @return escaped string
     375              :  */
     376              : UG_API std::string XMLStringEscape(std::string s);
     377              : 
     378              : /**
     379              :  * \brief wildcard matches like bla.* or *.bla or t?st
     380              :  * @param[in] str a string
     381              :  * @param[in] pattern a pattern with wildcards * or ?
     382              :  * @return true if match otherwise false
     383              :  */
     384              : UG_API bool WildcardMatch(const char *str, const char *pattern);
     385              : 
     386              : /**
     387              :  * this function replaces XML special characters with their escaped versions:
     388              :  * & -> &amp;
     389              :  * " -> &quot;
     390              :  * ' -> "&apos;
     391              :  * < -> &lt;
     392              :  * > -> &gt;
     393              :  * @param s a normal text
     394              :  * @return a text where special XML characters are escaped
     395              :  */
     396              : UG_API std::string XMLStringEscape(std::string s);
     397              : 
     398              : // end group ugbase_common_util_strings
     399              : /// \}
     400              : 
     401              : /**
     402              :  * \brief returns a "shifted" string
     403              :  * one-line strings are not shifted
     404              :  * two line strings are shifted like this: input:
     405              :  * "MyLine1\nMyLine2\n"
     406              :  * Output:
     407              :  * "\n | MyLine1\n | MyLine2"
     408              :  * note that they get an additional \n at the beginning, and
     409              :  * doubled \n and \n at the end are removed, so you can use
     410              :  * ConfigShift like this
     411              :  * \code
     412              :  * strstr <<      "MySubcomponent = " << ConfigShift(comp1.config_string()) << "\n"
     413              :  *                              "MySubcomponent2 = " << ConfigShift(comp2.config_string()) << "\n"
     414              :  * \endcode
     415              :  * Depending on comp1.config_string(), this results in
     416              :  * "MySubcomponent1 = sub1 ... " or "MySubcomponent =\n | sub1.1\n | sub1.2 ..."
     417              :  * @param[in] s
     418              :  * @return shifted string
     419              :  */
     420              : UG_API std::string ConfigShift(std::string s);
     421              : 
     422              : template<typename T>
     423              : inline std::string OstreamShift(const T &t)
     424              : {
     425              :         std::stringstream ss; ss << t;
     426              :         return ConfigShift(ss.str());
     427              : }
     428              : 
     429              : /**
     430              :  * Helper function to display byte sizes like 2411724 => 2,3 MB
     431              :  * @param[in] s size in bytes
     432              :  * @param[in] length if != 0, fixes the returned string length to this length (for tables etc.).
     433              :  * @return string describing the size s=1024 -> 1 kb
     434              :  */
     435              : std::string GetBytesSizeString(size_t s, int length=0);
     436              : 
     437              : inline const char *TrueFalseString(bool b)
     438              : {
     439              :         return b ? "TRUE" : "FALSE";
     440              : }
     441              : 
     442              : inline const char *OnOffString(bool b)
     443              : {
     444              :         return b ? "ON" : "OFF";
     445              : }
     446              : 
     447              : 
     448              : template< typename... Args >
     449            0 : inline std::string GetStringPrintf( const char* format, Args... args )
     450              : {
     451            0 :   int length = std::snprintf( nullptr, 0, format, args... );
     452              :   assert( length >= 0 );
     453              : 
     454            0 :   char* buf = new char[length + 1];
     455              :   std::snprintf( buf, length + 1, format, args... );
     456              : 
     457            0 :   std::string str( buf );
     458            0 :   delete[] buf;
     459            0 :   return str;
     460              : }
     461              : 
     462              : } // end namespace ug
     463              : 
     464              : #endif /*__H__COMMON_STRING_UTIL__*/
        

Generated by: LCOV version 2.0-1