LCOV - code coverage report
Current view: top level - ugbase/lib_grid/file_io/externals/src/tokstream - tokstream.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 261 0
Test Date: 2025-09-21 23:31:46 Functions: 0.0 % 29 0

            Line data    Source code
       1              : /****
       2              :  * Copyright (c) 2008 Nicolas Tessore
       3              :  *
       4              :  * Permission is hereby granted, free of charge, to any person obtaining a copy
       5              :  * of this software and associated documentation files (the "Software"), to deal
       6              :  * in the Software without restriction, including without limitation the rights
       7              :  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       8              :  * copies of the Software, and to permit persons to whom the Software is
       9              :  * furnished to do so, subject to the following conditions:
      10              :  *
      11              :  * The above copyright notice and this permission notice shall be included in
      12              :  * all copies or substantial portions of the Software.
      13              :  *
      14              :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15              :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16              :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      17              :  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      18              :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      19              :  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      20              :  * THE SOFTWARE.
      21              :  ****/
      22              : 
      23              : /**
      24              :  * \mainpage
      25              :  *
      26              :  * The <em>tokstream</em> library is a simple, flexible and fast tokenizer
      27              :  * written in C.
      28              :  *
      29              :  * It contains only one struct, tokstream, and a number
      30              :  * of associated functions, which are all prefixed with a <tt>ts_</tt> tag.
      31              :  *
      32              :  * \section building Building
      33              :  *
      34              :  * Since the library consists of nothing more than a pair of header and
      35              :  * implementation files, the easiest solution is to directly compile it with
      36              :  * your project.
      37              :  *
      38              :  * If you, however, prefer to compile <em>tokstream</em> as a library, refer
      39              :  * to the \ref folders "cmake folder".
      40              :  *
      41              :  * \section folders Folder structure
      42              :  *
      43              :  * You are currently seeing documentation from the <em>doc</em> folder of the
      44              :  * library. The <tt>Doxyfile</tt> for building documentation is also located
      45              :  * there.
      46              :  *
      47              :  * A <tt>CMakeLists.txt</tt> file for building the library with
      48              :  * <a href="http://www.cmake.org/" target="_blank">CMake</a> can be found
      49              :  * in the <em>cmake</em> folder.
      50              :  *
      51              :  * The sources are located in the <em>tokstream</em> folder.
      52              :  *
      53              :  * \section license License
      54              :  *
      55              :  * The <em>tokstream</em> library is released under the MIT License:
      56              :  *
      57              :  * \verbatim
      58              : Copyright (c) 2008 Nicolas Tessore
      59              : 
      60              : Permission is hereby granted, free of charge, to any person obtaining a copy
      61              : of this software and associated documentation files (the "Software"), to deal
      62              : in the Software without restriction, including without limitation the rights
      63              : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      64              : copies of the Software, and to permit persons to whom the Software is
      65              : furnished to do so, subject to the following conditions:
      66              : 
      67              : The above copyright notice and this permission notice shall be included in
      68              : all copies or substantial portions of the Software.
      69              : 
      70              : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      71              : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      72              : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      73              : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      74              : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      75              : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      76              : THE SOFTWARE.
      77              : \endverbatim
      78              :  */
      79              : 
      80              : #include <stdlib.h>
      81              : #include <stdio.h>
      82              : #include <string.h>
      83              : 
      84              : /**
      85              :  * \file tokstream.h
      86              :  *
      87              :  * \brief The tokstream library header
      88              :  *
      89              :  * This is the header file of the tokstream library. It contains a number of
      90              :  * functions, all prefixed with <strong>ts_</strong>, to operate on files as a
      91              :  * stream of tokens.
      92              :  */
      93              : #include "../../include/tokstream/tokstream.h"
      94              : 
      95              : 
      96              : /****
      97              :  * settings
      98              :  */
      99              : 
     100              : /* default read buffer size */
     101              : #define TS_BUFSIZ BUFSIZ
     102              : 
     103              : /* number of possible characters */
     104              : #define TS_CHARMAP_SIZE 256
     105              : 
     106              : /* character flag map data type */
     107              : typedef char ts_charmap[TS_CHARMAP_SIZE];
     108              : 
     109              : 
     110              : /****
     111              :  * tokstream structs
     112              :  */
     113              : 
     114              : struct ts_state
     115              : {
     116              :     ts_charmap sep;
     117              :     ts_charmap sep2;
     118              :     ts_charmap delim;
     119              : 
     120              :     int eof;
     121              :     int error;
     122              : 
     123              :     int buf_rev;
     124              : 
     125              :     char* cur;
     126              :     char* tok;
     127              : 
     128              :     long int pos;
     129              :     int line_no;
     130              :     int char_no;
     131              : 
     132              :     int tok_len;
     133              :     long int tok_pos;
     134              :     int tok_line_no;
     135              :     int tok_char_no;
     136              : 
     137              :     char* tok_buf;
     138              : };
     139              : 
     140              : /**
     141              :  * \struct tokstream tokstream.h
     142              :  *
     143              :  * \brief Token stream data structure
     144              :  *
     145              :  * Data structure representing a token stream.
     146              :  *
     147              :  * Like a FILE object, a tokstream object will always be created dynamically
     148              :  * by a call to ts_open(), and deleted by the according call to ts_close().
     149              :  *
     150              :  * The structure has no publicly accessible members.
     151              :  */
     152              : struct tokstream
     153              : {
     154              :     FILE* fp;
     155              : 
     156              :     char* file;
     157              : 
     158              :     char* buf;
     159              :     int buf_size;
     160              :     int buf_len;
     161              :     int buf_rev;
     162              : 
     163              :     struct ts_state* state;
     164              :     struct ts_state* stack;
     165              :     int stack_size;
     166              : };
     167              : 
     168              : 
     169              : /****
     170              :  * charmap operations
     171              :  */
     172              : 
     173              : #define ts_charmap_clr(map) memset(map, 0, sizeof(ts_charmap))
     174              : #define ts_charmap_cpy(dst, src) memcpy(dst, src, sizeof(ts_charmap))
     175              : 
     176              : #define ts_charmap_0(map, c) (map[(int)c] = 0)
     177              : #define ts_charmap_1(map, c) (map[(int)c] = 1)
     178              : 
     179              : #define ts_charmap_get(map, c) (map[(int)c])
     180              : 
     181              : 
     182              : /****
     183              :  * inlining macros
     184              :  */
     185              : 
     186              : /* check if buffer is valid, if not get new, return status */
     187              : #define ts_bad_buf(ts) ((!ts->state->cur || !(*ts->state->cur)) && ts_read(ts))
     188              : 
     189              : #define ts_issep(ts) ts_charmap_get(ts->state->sep, *ts->state->cur)
     190              : #define ts_cissep(ts, c) ts_charmap_get(ts->state->sep, c)
     191              : #define ts_isdelim(ts) ts_charmap_get(ts->state->delim, *ts->state->cur)
     192              : #define ts_cisdelim(ts, c) ts_charmap_get(ts->state->delim, c)
     193              : 
     194              : /* advance cursor */
     195              : #define ts_adv_cur(ts) \
     196              :     do { \
     197              :         /* count chars and lines */ \
     198              :         if(*ts->state->cur == '\n') \
     199              :         { \
     200              :             ++ts->state->line_no; \
     201              :             ts->state->char_no = 1; \
     202              :         } \
     203              :         else \
     204              :         { \
     205              :             ++ts->state->char_no; \
     206              :         } \
     207              :          \
     208              :         /* increase cursor and position */ \
     209              :         ++ts->state->cur; \
     210              :         ++ts->state->pos; \
     211              :     } while(0) \
     212              : 
     213              : /* expand token */
     214              : #define ts_exp_tok(ts) \
     215              :     do { \
     216              :         /* advance cursor */ \
     217              :         ts_adv_cur(ts); \
     218              :          \
     219              :         /* increase token length */ \
     220              :         ++ts->state->tok_len; \
     221              :     } while(0) \
     222              : 
     223              : /* copy token to buffer */
     224              : #define ts_copy_tok(ts) \
     225              :     do { \
     226              :         /* free old token */ \
     227              :         free(ts->state->tok_buf); \
     228              :          \
     229              :         /* allocate space and copy token */ \
     230              :         ts->state->tok_buf = strncpy(malloc(ts->state->tok_len+1), ts->state->tok, ts->state->tok_len); \
     231              :          \
     232              :         /* terminate token */ \
     233              :         ts->state->tok_buf[ts->state->tok_len] = '\0'; \
     234              :     } while(0) \
     235              : 
     236              : 
     237              : /* copy string with allocation */
     238              : #define ts_strdup(str) strcpy(malloc(strlen(str)+1), str)
     239              : 
     240              : 
     241              : /****
     242              :  * internal functions declaration
     243              :  */
     244              : 
     245              : /* initialize a new state */
     246              : void ts_state_init(struct ts_state* state);
     247              : 
     248              : /* copy new state */
     249              : void ts_state_copy(struct ts_state* dst, const struct ts_state* src);
     250              : 
     251              : /* clean an old state */
     252              : void ts_state_clean(struct ts_state* state);
     253              : 
     254              : /* read new buffer for tokstream */
     255              : int ts_read(tokstream* ts);
     256              : 
     257              : /* normalize buffer contents */
     258              : int ts_normalize(tokstream* ts);
     259              : 
     260              : 
     261              : /****
     262              :  * implementation
     263              :  */
     264              : 
     265              : /**
     266              :  * \relatesalso tokstream
     267              :  *
     268              :  * \brief Open a new tokstream from file
     269              :  *
     270              :  * This opens the file given in the argument and constructs a tokstream around
     271              :  * it.
     272              :  *
     273              :  * Before reading can begin, you have to set the separators and delimiters for
     274              :  * the tokstream by using ts_sep(), ts_delim() and the according on and
     275              :  * off functions.
     276              :  *
     277              :  * \sa ts_sep(), ts_sep_on(), ts_sep_off()
     278              :  * \sa ts_delim(), ts_delim_on(), ts_delim_off()
     279              :  *
     280              :  * \param file The filename to be opened.
     281              :  *
     282              :  * \returns Returns a pointer to the opened tokstream, or NULL on error.
     283              :  */
     284            0 : tokstream* ts_open(const char* file)
     285              : {
     286              :     FILE* fp;
     287              :     tokstream* ts;
     288              : 
     289              :     /* open file and keep it open */
     290            0 :     fp = fopen(file, "rb");
     291            0 :     if(!fp)
     292              :         return NULL;
     293              : 
     294              :     /* allocate new tokstream */
     295            0 :     ts = malloc(sizeof(tokstream));
     296              : 
     297              :     /* initialize */
     298              : 
     299              :     /* set file pointer */
     300            0 :     ts->fp = fp;
     301              : 
     302              :     /* copy filename */
     303            0 :     ts->file = ts_strdup(file);
     304              : 
     305              :     /* create buffer */
     306            0 :     ts->buf_size = TS_BUFSIZ;
     307            0 :     ts->buf = malloc(ts->buf_size);
     308            0 :     ts->buf_len = 0;
     309              : 
     310              :     /* start with 0 buffer revision */
     311            0 :     ts->buf_rev = 0;
     312              : 
     313              :     /* initialize state stack */
     314            0 :     ts->stack = malloc(sizeof(struct ts_state));
     315            0 :     ts->state = ts->stack;
     316            0 :     ts->stack_size = 1;
     317              : 
     318              :     /* initialize main state */
     319            0 :     ts_state_init(ts->state);
     320              : 
     321              :     /* set file status */
     322            0 :     ts->state->eof = feof(ts->fp);
     323            0 :     ts->state->error = ferror(ts->fp);
     324              : 
     325            0 :     return ts;
     326              : }
     327              : 
     328              : /**
     329              :  * \relatesalso tokstream
     330              :  *
     331              :  * \brief Close a tokstream
     332              :  *
     333              :  * Closes the file of the tokstream and frees all allocated memory.
     334              :  *
     335              :  * \param ts The tokstream to be closed.
     336              :  */
     337            0 : void ts_close(tokstream* ts)
     338              : {
     339              :     /* close file */
     340            0 :     fclose(ts->fp);
     341              : 
     342              :     /* clean all stacked states */
     343            0 :     while(ts->state >= ts->stack)
     344              :     {
     345            0 :         ts_state_clean(ts->state);
     346            0 :         --ts->state;
     347              :     }
     348              : 
     349              :     /* free memory */
     350            0 :     free(ts->stack);
     351            0 :     free(ts->buf);
     352            0 :     free(ts->file);
     353            0 :     free(ts);
     354            0 : }
     355              : 
     356              : /**
     357              :  * \relatesalso tokstream
     358              :  *
     359              :  * \brief Push the current tokstream state onto stack
     360              :  *
     361              :  * This saves all current information of the tokstream, but does not alter any
     362              :  * of its properties. Reading will continue just like before the call.
     363              :  *
     364              :  * Using ts_pop(), reading can later be continued exactly from the moment
     365              :  * ts_push() was called. Every setting in the tokstream is restored.
     366              :  *
     367              :  * It is possible to push multiple states on top of each other onto the stack.
     368              :  * The limit of this is defined by available memory.
     369              :  *
     370              :  * If pushing the state onto the stack fails (due to memory allocation), all of
     371              :  * the stack is lost. If possible, a single empty state is pushed onto the
     372              :  * stack, so that ts_ functions will not fail mysteriously.
     373              :  *
     374              :  * \sa ts_pop()
     375              :  *
     376              :  * \param ts The tokstream of which the state is to be saved to the stack.
     377              :  *
     378              :  * \returns In case of an error, a non-zero value is returned.
     379              :  */
     380            0 : int ts_push(tokstream* ts)
     381              : {
     382              :     /* no current state */
     383            0 :     ts->state = NULL;
     384              : 
     385              :     /* resize stack */
     386            0 :     ts->stack = realloc(ts->stack, (ts->stack_size + 1) * sizeof(struct ts_state));
     387              : 
     388              :     /* check there was enough memory available */
     389            0 :     if(!ts->stack)
     390              :     {
     391              :         /* create fallout state */
     392            0 :         ts->stack = malloc(sizeof(struct ts_state));
     393            0 :         ts->state = ts->stack;
     394              : 
     395              :         /* clean fallout state if possible */
     396            0 :         if(ts->state)
     397            0 :             ts_state_init(ts->state);
     398              : 
     399              :         /* return error */
     400            0 :         return 1;
     401              :     }
     402              : 
     403              :     /* duplicate previous state */
     404            0 :     ts_state_copy(ts->stack + ts->stack_size + 1, ts->stack + ts->stack_size);
     405              : 
     406              :     /* increment stack counter */
     407            0 :     ++ts->stack_size;
     408              : 
     409              :     /* set current state */
     410            0 :     ts->state = ts->stack + ts->stack_size;
     411              : 
     412              :     /* success */
     413            0 :     return 0;
     414              : }
     415              : 
     416              : /**
     417              :  * \relatesalso tokstream
     418              :  *
     419              :  * \brief Pop the current state from the stack
     420              :  *
     421              :  * This returns the tokstream to a state which was previously pushed to the
     422              :  * stack with ts_push(). Reading will continue as if the calling of ts_push()
     423              :  * and subsequently ts_pop() never occurred.
     424              :  *
     425              :  * \sa ts_push()
     426              :  *
     427              :  * \param ts The tokstream of which a stacked state is to be restored.
     428              :  *
     429              :  * \returns On attempting to pop the last state from the stack, the function
     430              :  *          returns a non-zero value.
     431              :  */
     432            0 : int ts_pop(tokstream* ts)
     433              : {
     434              :     /* prevent stack underflow */
     435            0 :     if(ts->state == ts->stack)
     436              :         return 1;
     437              : 
     438              :     /* clean discarded state */
     439            0 :     ts_state_clean(ts->state);
     440              : 
     441              :     /* resize state stack */
     442            0 :     ts->stack = realloc(ts->stack, (ts->stack_size - 1) * sizeof(struct ts_state));
     443              : 
     444              :     /* decrement stack counter */
     445            0 :     --ts->stack_size;
     446              : 
     447              :     /* set current state */
     448            0 :     ts->state = ts->stack + ts->stack_size;
     449              : 
     450              :     /* check if buffer changed */
     451            0 :     if(ts->buf_rev > ts->state->buf_rev)
     452              :     {
     453              :         /* invalidate cursor and token */
     454            0 :         ts->state->cur = NULL;
     455            0 :         ts->state->tok = NULL;
     456              :     }
     457              : 
     458              :     /* success */
     459              :     return 0;
     460              : }
     461              : 
     462              : /**
     463              :  * \name Stream information
     464              :  *
     465              :  * Functions to get status information about a token stream.
     466              :  *
     467              :  * \{
     468              :  */
     469              : 
     470              : /**
     471              :  * \relatesalso tokstream
     472              :  *
     473              :  * \brief Check if a tokstream is at EOF
     474              :  *
     475              :  * Checks if the tokstream has reached end-of-file (EOF). Calls feof() for the
     476              :  * underlying file object.
     477              :  *
     478              :  * \param ts The tokstream to check for EOF.
     479              :  *
     480              :  * \returns Returns a non-zero value if file is at EOF.
     481              :  */
     482            0 : int ts_eof(const tokstream* ts)
     483              : {
     484              :     /* delay eof to end of buffer */
     485            0 :     return ts->state->eof && (!ts->state->cur || !(*ts->state->cur));
     486              : }
     487              : 
     488              : /**
     489              :  * \relatesalso tokstream
     490              :  *
     491              :  * \brief Get file error flag of a tokstream
     492              :  *
     493              :  * This function returns the value of ferror() from <em>after the last read
     494              :  * operation</em>. Because of buffering, this is asynchronuous with ts_get()
     495              :  * calls.
     496              :  *
     497              :  * \param ts The tokstream of which to get the file error flag.
     498              :  *
     499              :  * \returns Report the last value of ferror() for the tokstream's FILE* object.
     500              :  */
     501            0 : int ts_error(const tokstream* ts)
     502              : {
     503              :     /* delay errors to end of buffer */
     504            0 :     return ts->state->error && (!ts->state->cur || !(*ts->state->cur));
     505              : }
     506              : 
     507              : /**
     508              :  * \relatesalso tokstream
     509              :  *
     510              :  * \brief Return current line number
     511              :  *
     512              :  * This function gives the number of the line at which the current token was
     513              :  * read in the file. If there is no current token, the line number of the next
     514              :  * processed character is returned.
     515              :  *
     516              :  * \param ts The stream to return its line number.
     517              :  *
     518              :  * \returns Returns the line number of the current token.
     519              :  */
     520            0 : int ts_line(const tokstream* ts)
     521              : {
     522            0 :     if(!ts->state->tok)
     523            0 :         return ts->state->line_no;
     524              : 
     525            0 :     return ts->state->tok_line_no;
     526              : }
     527              : 
     528              : /**
     529              :  * \relatesalso tokstream
     530              :  *
     531              :  * \brief Return current character position
     532              :  *
     533              :  * This function gives the position of the character at which the current token
     534              :  * was read in its line. If there is no current token, the character position
     535              :  * of the next processed character is returned.
     536              :  *
     537              :  * \param ts The stream to return its character position.
     538              :  *
     539              :  * \returns Returns the character position of the current token.
     540              :  */
     541            0 : int ts_char(const tokstream* ts)
     542              : {
     543            0 :     if(!ts->state->tok)
     544            0 :         return ts->state->char_no;
     545              : 
     546            0 :     return ts->state->tok_char_no;
     547              : }
     548              : 
     549              : /**
     550              :  * \relatesalso tokstream
     551              :  *
     552              :  * \brief Return the current token
     553              :  *
     554              :  * This function returns the exact same token that was fetched and returned
     555              :  * by the last call to ts_get(). If no such call was ever made, it will return
     556              :  * NULL.
     557              :  *
     558              :  * The string returned belongs to the tokstream object. It will be invalid
     559              :  * on the next call to ts_get().
     560              :  *
     561              :  * \sa ts_get()
     562              :  *
     563              :  * \param ts The stream to get the current token of.
     564              :  *
     565              :  * \returns A string containing the current token, or NULL if no such token
     566              :  *          exists.
     567              :  */
     568            0 : const char* ts_tok(const tokstream* ts)
     569              : {
     570              :     /* check if token is valid */
     571            0 :     if(!ts->state->tok)
     572              :         return NULL;
     573              : 
     574              :     /* return buffered token */
     575            0 :     return ts->state->tok_buf;
     576              : }
     577              : 
     578              : /**
     579              :  * \}
     580              :  */
     581              : 
     582              : /**
     583              :  * \name Token getters
     584              :  *
     585              :  * Functions to get tokens from the input stream, modifying the current token.
     586              :  *
     587              :  * \{
     588              :  */
     589              : 
     590              : /**
     591              :  * \relatesalso tokstream
     592              :  *
     593              :  * \brief Get the next token from stream
     594              :  *
     595              :  * Search the input stream for the next token, according to current separator
     596              :  * and delimiter settings.
     597              :  *
     598              :  * The string returned belongs to the tokstream object. It will be invalid
     599              :  * on the next call to ts_get().
     600              :  *
     601              :  * \note This fetches the <em>next</em> token from the stream. If you want to
     602              :  *       get the <em>current</em> token, use ts_tok().
     603              :  *
     604              :  * \sa ts_tok()
     605              :  *
     606              :  * \param ts The stream to get the token from.
     607              :  *
     608              :  * \returns A zero-terminated string containing the next token is returned, or
     609              :  *          NULL in case of an error (ie. EOF occurred while getting token).
     610              :  */
     611            0 : const char* ts_get(tokstream* ts)
     612              : {
     613              :     /* check if buffer is good */
     614            0 :     if(ts_bad_buf(ts))
     615              :         return NULL;
     616              : 
     617              :     /* seek beginning of token */
     618            0 :     while(ts_issep(ts))
     619              :     {
     620              :         /* advance cursor */
     621            0 :         ts_adv_cur(ts);
     622              : 
     623              :         /* check if buffer is still good */
     624            0 :         if(ts_bad_buf(ts))
     625              :             return NULL;
     626              :     }
     627              : 
     628              :     /* tokenize string beginning from cursor */
     629            0 :     ts->state->tok = ts->state->cur;
     630              : 
     631              :     /* reset token length */
     632            0 :     ts->state->tok_len = 0;
     633              : 
     634              :     /* store position of token */
     635            0 :     ts->state->tok_pos = ts->state->pos;
     636            0 :     ts->state->tok_line_no = ts->state->line_no;
     637            0 :     ts->state->tok_char_no = ts->state->char_no;
     638              : 
     639              :     /* expand token */
     640            0 :     ts_exp_tok(ts);
     641              : 
     642              :     /* check if token is not a delimiter */
     643            0 :     if(!ts_cisdelim(ts, *ts->state->tok))
     644              :     {
     645              :         /* move cursor forward until separator or delimiter */
     646            0 :         while(!ts_issep(ts) && !ts_isdelim(ts))
     647              :         {
     648              :             /* expand token */
     649            0 :             ts_exp_tok(ts);
     650              : 
     651              :             /* buffer ends here, and so does token */
     652            0 :             if(!(*ts->state->cur))
     653              :                 break;
     654              :         }
     655              :     }
     656              : 
     657              :     /* copy token to token buffer */
     658            0 :     ts_copy_tok(ts);
     659              : 
     660              :     /* return the token found, from buffer */
     661            0 :     return ts->state->tok_buf;
     662              : }
     663              : 
     664              : /**
     665              :  * \relatesalso tokstream
     666              :  *
     667              :  * \brief Unget current token
     668              :  *
     669              :  * Put the current token back into the stream, so that the next call to ts_get()
     670              :  * might again return it.
     671              :  *
     672              :  * This is useful when you need to changing separators and delimiters, as well
     673              :  * as when you need to peek at the next token.
     674              :  *
     675              :  * \note This might cause a buffer refresh.
     676              :  *
     677              :  * \param ts The stream to unget the token to.
     678              :  *
     679              :  * \returns If the previous token is no longer available and cannot be ungot,
     680              :  *          a non-zero value is returned.
     681              :  */
     682            0 : int ts_unget(tokstream* ts)
     683              : {
     684              :     /* check if there is a token in buffer */
     685            0 :     if(!ts->state->tok)
     686              :         return 1;
     687              : 
     688              :     /* set cursor to token */
     689            0 :     ts->state->cur = ts->state->tok;
     690            0 :     ts->state->pos = ts->state->tok_pos;
     691            0 :     ts->state->line_no = ts->state->tok_line_no;
     692            0 :     ts->state->char_no = ts->state->tok_char_no;
     693              : 
     694              :     /* no current token */
     695            0 :     free(ts->state->tok_buf);
     696            0 :     ts->state->tok_buf = NULL;
     697            0 :     ts->stack->tok = NULL;
     698              : 
     699              :     /* success */
     700            0 :     return 0;
     701              : }
     702              : 
     703              : /**
     704              :  * \relatesalso tokstream
     705              :  *
     706              :  * \brief Get rest of line from stream
     707              :  *
     708              :  * Stores the rest of the current line (everything until newline character)
     709              :  * as the current token and returns it.
     710              :  *
     711              :  * The string will begin with the first non-separator character. If there is no
     712              :  * non-separator character until the end of line, the returned string will be
     713              :  * empty.
     714              :  *
     715              :  * The newline will be consumed, the stream will be positioned at the beginning
     716              :  * of the next line.
     717              :  *
     718              :  * Subsequent calls to ts_get() will return the line as returned by this
     719              :  * function.
     720              :  *
     721              :  * \param ts The stream to get the line from.
     722              :  *
     723              :  * \returns Returns a string containing the line, or NULL if an error occurred.
     724              :  */
     725            0 : const char* ts_getline(tokstream* ts)
     726              : {
     727              :     /* tokenize until newline */
     728            0 :     if(!ts_seekc(ts, '\n'))
     729              :         return NULL;
     730              : 
     731              :     /* advance cursor past newline */
     732            0 :     ts_adv_cur(ts);
     733              : 
     734              :     /* return the line from token buffer */
     735            0 :     return ts->state->tok_buf;
     736              : }
     737              : 
     738              : /**
     739              :  * \}
     740              :  */
     741              : 
     742              : /**
     743              :  * \name Input skipping
     744              :  *
     745              :  * Functions to skip over parts of the input without modifying the current
     746              :  * stream status, ie. current token.
     747              :  *
     748              :  * \{
     749              :  */
     750              : 
     751              : /**
     752              :  * \relatesalso tokstream
     753              :  *
     754              :  * \brief Skip over the next token
     755              :  *
     756              :  * Using this function, the next token can be skipped without invalidating the
     757              :  * current token. This might be useful if the next token is already known, ie.
     758              :  * from a call to a seek function.
     759              :  *
     760              :  * \param ts The stream in which to skip a token.
     761              :  *
     762              :  * \returns Returns a non-zero value if an error occurred.
     763              :  */
     764            0 : int ts_skip(tokstream* ts)
     765              : {
     766              :     /* check if buffer is good */
     767            0 :     if(ts_bad_buf(ts))
     768              :         return 1;
     769              : 
     770              :     /* seek beginning of token */
     771            0 :     while(ts_issep(ts))
     772              :     {
     773              :         /* advance cursor */
     774            0 :         ts_adv_cur(ts);
     775              : 
     776              :         /* check if buffer is still good */
     777            0 :         if(ts_bad_buf(ts))
     778              :             return 1;
     779              :     }
     780              : 
     781              :     /* advance cursor */
     782            0 :     ts_adv_cur(ts);
     783              : 
     784              :     /* check if token is not a delimiter */
     785            0 :     if(!ts_cisdelim(ts, *ts->state->tok))
     786              :     {
     787              :         /* move cursor forward until separator or delimiter */
     788            0 :         while(!ts_issep(ts) && !ts_isdelim(ts))
     789              :         {
     790              :             /* advance cursor */
     791            0 :             ts_adv_cur(ts);
     792              : 
     793              :             /* buffer ends here, and so does token */
     794            0 :             if(!(*ts->state->cur))
     795              :                 break;
     796              :         }
     797              :     }
     798              : 
     799              :     /* done */
     800              :     return 0;
     801              : }
     802              : 
     803              : /**
     804              :  * \relatesalso tokstream
     805              :  *
     806              :  * \brief Skip line in stream
     807              :  *
     808              :  * Discards the current line in the stream and sets the stream position to the
     809              :  * beginning of the next line.
     810              :  *
     811              :  * \note Invalidates current token.
     812              :  *
     813              :  * \param ts The stream in which to skip a line.
     814              :  *
     815              :  * \returns On error, a non-zero value is returned.
     816              :  */
     817            0 : int ts_skipline(tokstream* ts)
     818              : {
     819              :     /* invalidate token */
     820            0 :     ts->state->tok = NULL;
     821              : 
     822              :     /* check if buffer is good */
     823            0 :     if(ts_bad_buf(ts))
     824              :         return 1;
     825              : 
     826              :     /* increment cursor until we find newline */
     827            0 :     while(*ts->state->cur != '\n')
     828              :     {
     829              :         /* advance cursor */
     830            0 :         ts_adv_cur(ts);
     831              : 
     832              :         /* make sure buffer is still filled */
     833            0 :         if(ts_bad_buf(ts))
     834              :             return 1;
     835              :     }
     836              : 
     837              :     /* advance past newline */
     838            0 :     ts_adv_cur(ts);
     839              : 
     840              :     /* success */
     841            0 :     return 0;
     842              : }
     843              : 
     844              : /**
     845              :  * \name Input seeking
     846              :  *
     847              :  * Seek to specific position in token stream.
     848              :  *
     849              :  * \{
     850              :  */
     851              : 
     852              : /**
     853              :  * \relatesalso tokstream
     854              :  *
     855              :  * \brief Seek to token
     856              :  *
     857              :  * The searched token will be the <em>current</em> token. The next call to
     858              :  * ts_get() will fetch a new token.
     859              :  *
     860              :  * \param ts The token stream to operate on.
     861              :  * \param tok The token to seek.
     862              :  *
     863              :  * \returns A non-zero value is returned to indicate the token was not found.
     864              :  */
     865              : 
     866            0 : int ts_seek(tokstream* ts, const char* tok)
     867              : {
     868              :         /* get tokens from ts until tok is found */
     869              :         do
     870              :         {
     871              :                 /* check if current token is right */
     872            0 :                 if(strcmp(ts->state->tok, tok) == 0)
     873              :                         return 0;
     874              :         }
     875            0 :         while(ts_get(ts) != NULL);
     876              : 
     877              :         /* token was not found */
     878              :         return 1;
     879              : }
     880              : 
     881              : /**
     882              :  * \relatesalso tokstream
     883              :  *
     884              :  * \brief Seek to character
     885              :  *
     886              :  * Stores the input until it encounters the \a c character and returns it as a
     887              :  * token.
     888              :  *
     889              :  * The string will begin with the first non-separator character. If there is no
     890              :  * non-separator character until the character \a c is found, the returned
     891              :  * string will be empty.
     892              :  *
     893              :  * The character \a c will not be consumed, it can be part of the next token.
     894              :  *
     895              :  * Subsequent calls to ts_get() will return the token as returned by this
     896              :  * function.
     897              :  *
     898              :  * \param ts The stream to get the token from.
     899              :  * \param c The character that ends the token.
     900              :  *
     901              :  * \returns Returns a string containing the token, or NULL if an error occurred.
     902              :  */
     903            0 : const char* ts_seekc(tokstream* ts, char c)
     904              : {
     905              :     /* check if buffer is good */
     906            0 :     if(ts_bad_buf(ts))
     907              :         return NULL;
     908              : 
     909              :     /* seek beginning of token */
     910            0 :     while(ts_issep(ts) && *ts->state->cur != c)
     911              :     {
     912              :         /* advance cursor */
     913            0 :         ts_adv_cur(ts);
     914              : 
     915              :         /* check if buffer is still good */
     916            0 :         if(ts_bad_buf(ts))
     917              :             return NULL;
     918              :     }
     919              : 
     920              :     /* tokenize string beginning from cursor */
     921            0 :     ts->state->tok = ts->state->cur;
     922              : 
     923              :     /* reset token length */
     924            0 :     ts->state->tok_len = 0;
     925              : 
     926              :     /* store position of token */
     927            0 :     ts->state->tok_pos = ts->state->pos;
     928            0 :     ts->state->tok_line_no = ts->state->line_no;
     929            0 :     ts->state->tok_char_no = ts->state->char_no;
     930              : 
     931              :     /* move cursor forward until char is found */
     932            0 :     while(*ts->state->cur != c)
     933              :     {
     934              :         /* expand token */
     935            0 :         ts_exp_tok(ts);
     936              : 
     937              :         /* buffer ends here, and so does token */
     938            0 :         if(!(*ts->state->cur))
     939              :             break;
     940              :     }
     941              : 
     942              :     /* copy token to token buffer */
     943            0 :     ts_copy_tok(ts);
     944              : 
     945              :     /* return the token buffer */
     946            0 :     return ts->state->tok_buf;
     947              : }
     948              : 
     949              : /**
     950              :  * \relatesalso tokstream
     951              :  *
     952              :  * \brief Seek to any character from array
     953              :  *
     954              :  * Stores the input until it encounters any of the \a ca characters and return
     955              :  * it as a token.
     956              :  *
     957              :  * The string will begin with the first non-separator character. If there is no
     958              :  * non-separator character until a character in \a ca is found, the returned
     959              :  * string will be empty.
     960              :  *
     961              :  * The character from \a ca will not be consumed, it can be part of the next
     962              :  * token.
     963              :  *
     964              :  * Subsequent calls to ts_get() will return the token as returned by this
     965              :  * function.
     966              :  *
     967              :  * \param ts The stream to get the token from.
     968              :  * \param ca The characters that end the token.
     969              :  *
     970              :  * \returns Returns a string containing the token, or NULL if an error occurred.
     971              :  */
     972            0 : const char* ts_seekca(tokstream* ts, const char* ca)
     973              : {
     974              :     /* check if buffer is good */
     975            0 :     if(ts_bad_buf(ts))
     976              :         return NULL;
     977              : 
     978              :     /* seek beginning of token */
     979            0 :     while(ts_issep(ts) && !strchr(ca, *ts->state->cur))
     980              :     {
     981              :         /* advance cursor */
     982            0 :         ts_adv_cur(ts);
     983              : 
     984              :         /* check if buffer is still good */
     985            0 :         if(ts_bad_buf(ts))
     986              :             return NULL;
     987              :     }
     988              : 
     989              :     /* tokenize string beginning from cursor */
     990            0 :     ts->state->tok = ts->state->cur;
     991              : 
     992              :     /* reset token length */
     993            0 :     ts->state->tok_len = 0;
     994              : 
     995              :     /* store position of token */
     996            0 :     ts->state->tok_pos = ts->state->pos;
     997            0 :     ts->state->tok_line_no = ts->state->line_no;
     998            0 :     ts->state->tok_char_no = ts->state->char_no;
     999              : 
    1000              :     /* move cursor forward until char is found */
    1001            0 :     while(!strchr(ca, *ts->state->cur))
    1002              :     {
    1003              :         /* expand token */
    1004            0 :         ts_exp_tok(ts);
    1005              : 
    1006              :         /* buffer ends here, and so does token */
    1007            0 :         if(!(*ts->state->cur))
    1008              :             break;
    1009              :     }
    1010              : 
    1011              :     /* copy token to token buffer */
    1012            0 :     ts_copy_tok(ts);
    1013              : 
    1014              :     /* return the token buffer */
    1015            0 :     return ts->state->tok_buf;
    1016              : }
    1017              : 
    1018              : /**
    1019              :  * \}
    1020              :  */
    1021              : 
    1022              : /**
    1023              :  * \name Separator and delimiter control
    1024              :  *
    1025              :  * Functions to set which characters act as separators and which act as
    1026              :  * delimiters.
    1027              :  *
    1028              :  * \{
    1029              :  */
    1030              : 
    1031              : /**
    1032              :  * \relatesalso tokstream
    1033              :  *
    1034              :  * \brief Set separator characters
    1035              :  */
    1036            0 : void ts_sep(tokstream* ts, const char* sep)
    1037              : {
    1038              :     /* turn all separator flags off */
    1039            0 :     ts_charmap_clr(ts->state->sep);
    1040              : 
    1041              :     /* set separators */
    1042            0 :     for(; *sep; ++sep)
    1043            0 :         ts_charmap_1(ts->state->sep, *sep);
    1044              : 
    1045              :     /* make backup of separator flags */
    1046            0 :     ts_charmap_cpy(ts->state->sep2, ts->state->sep);
    1047              : 
    1048              :     /* renormalize buffer */
    1049            0 :     ts_normalize(ts);
    1050            0 : }
    1051              : 
    1052              : /**
    1053              :  * \relatesalso tokstream
    1054              :  *
    1055              :  * \brief Set character as separator
    1056              :  */
    1057            0 : void ts_sep_on(tokstream* ts, char c)
    1058              : {
    1059              :     /* set separator */
    1060            0 :     ts_charmap_1(ts->state->sep, c);
    1061            0 :     ts_charmap_1(ts->state->sep2, c);
    1062              : 
    1063              :     /* renormalize buffer */
    1064            0 :     ts_normalize(ts);
    1065            0 : }
    1066              : 
    1067              : /**
    1068              :  * \relatesalso tokstream
    1069              :  *
    1070              :  * \brief Unset character as separator
    1071              :  */
    1072            0 : void ts_sep_off(tokstream* ts, char c)
    1073              : {
    1074              :     /* unset separator */
    1075            0 :     ts_charmap_0(ts->state->sep, c);
    1076            0 :     ts_charmap_0(ts->state->sep2, c);
    1077              : 
    1078              :     /* renormalize buffer */
    1079            0 :     ts_normalize(ts);
    1080            0 : }
    1081              : 
    1082              : /**
    1083              :  * \relatesalso tokstream
    1084              :  *
    1085              :  * \brief Set delimiter characters
    1086              :  *
    1087              :  * */
    1088            0 : void ts_delim(tokstream* ts, const char* delim)
    1089              : {
    1090              :     /* turn all delimiter flags off */
    1091            0 :     ts_charmap_clr(ts->state->delim);
    1092              : 
    1093              :     /* restore all separator flags */
    1094            0 :     ts_charmap_cpy(ts->state->sep, ts->state->sep2);
    1095              : 
    1096              :     /* set delimiters */
    1097            0 :     for(; *delim; ++delim)
    1098              :     {
    1099              :         /* remove sep flag */
    1100            0 :         ts_charmap_0(ts->state->sep, *delim);
    1101              : 
    1102              :         /* set delim flag */
    1103            0 :         ts_charmap_1(ts->state->delim, *delim);
    1104              :     }
    1105              : 
    1106              :     /* renormalize buffer */
    1107            0 :     ts_normalize(ts);
    1108            0 : }
    1109              : 
    1110              : /**
    1111              :  * \relatesalso tokstream
    1112              :  *
    1113              :  * \brief Set character as delimiter
    1114              :  */
    1115            0 : void ts_delim_on(tokstream* ts, char c)
    1116              : {
    1117              :     /* remove sep flag */
    1118            0 :     ts_charmap_0(ts->state->sep, c);
    1119              : 
    1120              :     /* set delimiter */
    1121            0 :     ts_charmap_1(ts->state->delim, c);
    1122              : 
    1123              :     /* renormalize buffer */
    1124            0 :     ts_normalize(ts);
    1125            0 : }
    1126              : 
    1127              : /**
    1128              :  * \relatesalso tokstream
    1129              :  *
    1130              :  * \brief Unset character as delimiter
    1131              :  */
    1132            0 : void ts_delim_off(tokstream* ts, char c)
    1133              : {
    1134              :     /* unset delimiter */
    1135            0 :     ts_charmap_0(ts->state->delim, c);
    1136              : 
    1137              :     /* restore sep flag */
    1138            0 :     if(ts_charmap_get(ts->state->sep2, c))
    1139            0 :         ts_charmap_1(ts->state->sep, c);
    1140              : 
    1141              :     /* renormalize buffer */
    1142            0 :     ts_normalize(ts);
    1143            0 : }
    1144              : 
    1145              : /**
    1146              :  * \}
    1147              :  */
    1148              : 
    1149              : /**
    1150              :  * \relatesalso tokstream
    1151              :  *
    1152              :  * \brief Set input buffer size for stream
    1153              :  */
    1154            0 : int ts_bufsiz(tokstream* ts, int size)
    1155              : {
    1156              :     /* invalidate buffer for all states */
    1157            0 :     ts->state->cur = NULL;
    1158            0 :     ts->state->tok = NULL;
    1159            0 :     ++ts->buf_rev;
    1160              : 
    1161              :     /* reallocate buffer */
    1162            0 :     ts->buf = realloc(ts->buf, size);
    1163              : 
    1164              :     /* check if realloc failed */
    1165            0 :     if(!ts->buf)
    1166              :     {
    1167              :         /* allocate old buffer size */
    1168            0 :         ts->buf = malloc(ts->buf_size);
    1169              : 
    1170              :         /* error */
    1171            0 :         return 1;
    1172              :     }
    1173              : 
    1174              :     /* set size of buffer */
    1175            0 :     ts->buf_size = size;
    1176              : 
    1177              :     /* success */
    1178            0 :     return 0;
    1179              : }
    1180              : 
    1181              : 
    1182              : /****
    1183              :  * internal functions
    1184              :  */
    1185              : 
    1186            0 : void ts_state_init(struct ts_state* state)
    1187              : {
    1188            0 :     ts_charmap_clr(state->sep);
    1189            0 :     ts_charmap_clr(state->sep2);
    1190            0 :     ts_charmap_clr(state->delim);
    1191              : 
    1192            0 :     state->eof = 0;
    1193            0 :     state->error = 0;
    1194              : 
    1195            0 :     state->buf_rev = 0;
    1196              : 
    1197            0 :     state->cur = NULL;
    1198            0 :     state->tok = NULL;
    1199              : 
    1200            0 :     state->pos = 0;
    1201            0 :     state->line_no = 1;
    1202            0 :     state->char_no = 1;
    1203              : 
    1204            0 :     state->tok_len = 0;
    1205            0 :     state->tok_pos = 0;
    1206            0 :     state->tok_line_no = 1;
    1207            0 :     state->tok_char_no = 1;
    1208              : 
    1209            0 :     state->tok_buf = NULL;
    1210            0 : }
    1211              : 
    1212            0 : void ts_state_copy(struct ts_state* dst, const struct ts_state* src)
    1213              : {
    1214            0 :     ts_charmap_cpy(dst->sep, src->sep);
    1215            0 :     ts_charmap_cpy(dst->sep2, src->sep2);
    1216            0 :     ts_charmap_cpy(dst->delim, src->delim);
    1217              : 
    1218            0 :     dst->eof = src->eof;
    1219            0 :     dst->error = src->error;
    1220              : 
    1221            0 :     dst->buf_rev = src->buf_rev;
    1222              : 
    1223            0 :     dst->cur = src->cur;
    1224            0 :     dst->tok = src->tok;
    1225              : 
    1226            0 :     dst->pos = src->pos;
    1227            0 :     dst->line_no = src->line_no;
    1228            0 :     dst->char_no = src->char_no;
    1229              : 
    1230            0 :     dst->tok_len = src->tok_len;
    1231            0 :     dst->tok_pos = src->tok_pos;
    1232            0 :     dst->tok_line_no = src->tok_line_no;
    1233            0 :     dst->tok_char_no = src->tok_char_no;
    1234              : 
    1235            0 :     dst->tok_buf = src->tok_buf ? ts_strdup(src->tok_buf) : NULL;
    1236            0 : }
    1237              : 
    1238            0 : void ts_state_clean(struct ts_state* state)
    1239              : {
    1240            0 :     free(state->tok_buf);
    1241            0 : }
    1242              : 
    1243            0 : int ts_read(tokstream* ts)
    1244              : {
    1245              :     int seek_err;
    1246              : 
    1247              :     /* check if file is at eof already */
    1248            0 :     if(ts->state->eof)
    1249              :         return 1;
    1250              : 
    1251              :     /* seek to file position */
    1252            0 :     seek_err = fseek(ts->fp, ts->state->pos, SEEK_SET);
    1253              : 
    1254              :     /* update error and eof data */
    1255            0 :     ts->state->eof = feof(ts->fp);
    1256            0 :     ts->state->error = ferror(ts->fp);
    1257              : 
    1258              :     /* if there was a seek error, rather not give fp */
    1259            0 :     if(seek_err)
    1260              :         return 1;
    1261              : 
    1262              :     /* invalidate cursor and token */
    1263            0 :     ts->state->cur = NULL;
    1264            0 :     ts->state->tok = NULL;
    1265              : 
    1266              :     /* increase buffer revision */
    1267            0 :     ++ts->buf_rev;
    1268            0 :     ts->state->buf_rev = ts->buf_rev;
    1269              : 
    1270              :     /* get BUFSIZ chars from file to buffer */
    1271            0 :     ts->buf_len = fread(ts->buf, 1, ts->buf_size-1, ts->fp);
    1272              : 
    1273              :     /* terminate buffer string */
    1274            0 :     ts->buf[ts->buf_len] = '\0';
    1275              : 
    1276              :     /* set error indicators */
    1277            0 :     ts->state->eof = feof(ts->fp);
    1278            0 :     ts->state->error = ferror(ts->fp);
    1279              : 
    1280              :     /* break on error before updating tokstream */
    1281            0 :     if(ts->state->error)
    1282              :         return 1;
    1283              : 
    1284              :     /* set cursor to beginning of buffer */
    1285            0 :     ts->state->cur = ts->buf;
    1286              : 
    1287              :     /* normalize tokstream */
    1288            0 :     ts_normalize(ts);
    1289              : 
    1290              :     /* success */
    1291            0 :     return 0;
    1292              : }
    1293              : 
    1294            0 : int ts_normalize(tokstream* ts)
    1295              : {
    1296              :     int trim;
    1297              : 
    1298              :     /* test buffer */
    1299            0 :     if(ts->buf_rev == 0)
    1300              :         return 0;
    1301              : 
    1302              :     /* don't trim when at eof */
    1303              :     trim = 0;
    1304            0 :     if(!ts->state->eof)
    1305              :     {
    1306              :         /* trim token chars from end of buffer */
    1307            0 :         char* back = ts->buf + ts->buf_len - 1;
    1308            0 :         while(ts->buf_len > 0)
    1309              :         {
    1310              :             /* trim until separator or delimiter */
    1311            0 :             if(ts_cissep(ts, *back) || ts_cisdelim(ts, *back))
    1312              :                 break;
    1313              : 
    1314              :             /* trim buffer */
    1315            0 :             --back;
    1316            0 :             --ts->buf_len;
    1317              :         }
    1318              : 
    1319              :         /* check whether buffer was trimmed */
    1320            0 :         if(*(back+1))
    1321              :         {
    1322              :             /* buffer was trimmed */
    1323              :             trim = 1;
    1324              : 
    1325              :             /* terminate trimmed buffer */
    1326            0 :             *(back+1) = '\0';
    1327              :         }
    1328              :     }
    1329              : 
    1330              :     if(trim)
    1331              :     {
    1332              :         /* buffer changed, update buffer revision */
    1333            0 :         ++ts->buf_rev;
    1334            0 :         ++ts->state->buf_rev;
    1335              :     }
    1336              : 
    1337              :     /* report changes to buffer */
    1338              :     return trim;
    1339              : }
        

Generated by: LCOV version 2.0-1