Line data Source code
1 : /****
2 : * Copyright (c) 2008 Nicolas Tessore
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a copy
5 : * of this software and associated documentation files (the "Software"), to deal
6 : * in the Software without restriction, including without limitation the rights
7 : * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 : * copies of the Software, and to permit persons to whom the Software is
9 : * furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 : * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 : * THE SOFTWARE.
21 : ****/
22 :
23 : /**
24 : * \mainpage
25 : *
26 : * The <em>tokstream</em> library is a simple, flexible and fast tokenizer
27 : * written in C.
28 : *
29 : * It contains only one struct, tokstream, and a number
30 : * of associated functions, which are all prefixed with a <tt>ts_</tt> tag.
31 : *
32 : * \section building Building
33 : *
34 : * Since the library consists of nothing more than a pair of header and
35 : * implementation files, the easiest solution is to directly compile it with
36 : * your project.
37 : *
38 : * If you, however, prefer to compile <em>tokstream</em> as a library, refer
39 : * to the \ref folders "cmake folder".
40 : *
41 : * \section folders Folder structure
42 : *
43 : * You are currently seeing documentation from the <em>doc</em> folder of the
44 : * library. The <tt>Doxyfile</tt> for building documentation is also located
45 : * there.
46 : *
47 : * A <tt>CMakeLists.txt</tt> file for building the library with
48 : * <a href="http://www.cmake.org/" target="_blank">CMake</a> can be found
49 : * in the <em>cmake</em> folder.
50 : *
51 : * The sources are located in the <em>tokstream</em> folder.
52 : *
53 : * \section license License
54 : *
55 : * The <em>tokstream</em> library is released under the MIT License:
56 : *
57 : * \verbatim
58 : Copyright (c) 2008 Nicolas Tessore
59 :
60 : Permission is hereby granted, free of charge, to any person obtaining a copy
61 : of this software and associated documentation files (the "Software"), to deal
62 : in the Software without restriction, including without limitation the rights
63 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
64 : copies of the Software, and to permit persons to whom the Software is
65 : furnished to do so, subject to the following conditions:
66 :
67 : The above copyright notice and this permission notice shall be included in
68 : all copies or substantial portions of the Software.
69 :
70 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
71 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
72 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
73 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
74 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
75 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
76 : THE SOFTWARE.
77 : \endverbatim
78 : */
79 :
80 : #include <stdlib.h>
81 : #include <stdio.h>
82 : #include <string.h>
83 :
84 : /**
85 : * \file tokstream.h
86 : *
87 : * \brief The tokstream library header
88 : *
89 : * This is the header file of the tokstream library. It contains a number of
90 : * functions, all prefixed with <strong>ts_</strong>, to operate on files as a
91 : * stream of tokens.
92 : */
93 : #include "../../include/tokstream/tokstream.h"
94 :
95 :
96 : /****
97 : * settings
98 : */
99 :
100 : /* default read buffer size */
101 : #define TS_BUFSIZ BUFSIZ
102 :
103 : /* number of possible characters */
104 : #define TS_CHARMAP_SIZE 256
105 :
106 : /* character flag map data type */
107 : typedef char ts_charmap[TS_CHARMAP_SIZE];
108 :
109 :
110 : /****
111 : * tokstream structs
112 : */
113 :
114 : struct ts_state
115 : {
116 : ts_charmap sep;
117 : ts_charmap sep2;
118 : ts_charmap delim;
119 :
120 : int eof;
121 : int error;
122 :
123 : int buf_rev;
124 :
125 : char* cur;
126 : char* tok;
127 :
128 : long int pos;
129 : int line_no;
130 : int char_no;
131 :
132 : int tok_len;
133 : long int tok_pos;
134 : int tok_line_no;
135 : int tok_char_no;
136 :
137 : char* tok_buf;
138 : };
139 :
140 : /**
141 : * \struct tokstream tokstream.h
142 : *
143 : * \brief Token stream data structure
144 : *
145 : * Data structure representing a token stream.
146 : *
147 : * Like a FILE object, a tokstream object will always be created dynamically
148 : * by a call to ts_open(), and deleted by the according call to ts_close().
149 : *
150 : * The structure has no publicly accessible members.
151 : */
152 : struct tokstream
153 : {
154 : FILE* fp;
155 :
156 : char* file;
157 :
158 : char* buf;
159 : int buf_size;
160 : int buf_len;
161 : int buf_rev;
162 :
163 : struct ts_state* state;
164 : struct ts_state* stack;
165 : int stack_size;
166 : };
167 :
168 :
169 : /****
170 : * charmap operations
171 : */
172 :
173 : #define ts_charmap_clr(map) memset(map, 0, sizeof(ts_charmap))
174 : #define ts_charmap_cpy(dst, src) memcpy(dst, src, sizeof(ts_charmap))
175 :
176 : #define ts_charmap_0(map, c) (map[(int)c] = 0)
177 : #define ts_charmap_1(map, c) (map[(int)c] = 1)
178 :
179 : #define ts_charmap_get(map, c) (map[(int)c])
180 :
181 :
182 : /****
183 : * inlining macros
184 : */
185 :
186 : /* check if buffer is valid, if not get new, return status */
187 : #define ts_bad_buf(ts) ((!ts->state->cur || !(*ts->state->cur)) && ts_read(ts))
188 :
189 : #define ts_issep(ts) ts_charmap_get(ts->state->sep, *ts->state->cur)
190 : #define ts_cissep(ts, c) ts_charmap_get(ts->state->sep, c)
191 : #define ts_isdelim(ts) ts_charmap_get(ts->state->delim, *ts->state->cur)
192 : #define ts_cisdelim(ts, c) ts_charmap_get(ts->state->delim, c)
193 :
194 : /* advance cursor */
195 : #define ts_adv_cur(ts) \
196 : do { \
197 : /* count chars and lines */ \
198 : if(*ts->state->cur == '\n') \
199 : { \
200 : ++ts->state->line_no; \
201 : ts->state->char_no = 1; \
202 : } \
203 : else \
204 : { \
205 : ++ts->state->char_no; \
206 : } \
207 : \
208 : /* increase cursor and position */ \
209 : ++ts->state->cur; \
210 : ++ts->state->pos; \
211 : } while(0) \
212 :
213 : /* expand token */
214 : #define ts_exp_tok(ts) \
215 : do { \
216 : /* advance cursor */ \
217 : ts_adv_cur(ts); \
218 : \
219 : /* increase token length */ \
220 : ++ts->state->tok_len; \
221 : } while(0) \
222 :
223 : /* copy token to buffer */
224 : #define ts_copy_tok(ts) \
225 : do { \
226 : /* free old token */ \
227 : free(ts->state->tok_buf); \
228 : \
229 : /* allocate space and copy token */ \
230 : ts->state->tok_buf = strncpy(malloc(ts->state->tok_len+1), ts->state->tok, ts->state->tok_len); \
231 : \
232 : /* terminate token */ \
233 : ts->state->tok_buf[ts->state->tok_len] = '\0'; \
234 : } while(0) \
235 :
236 :
237 : /* copy string with allocation */
238 : #define ts_strdup(str) strcpy(malloc(strlen(str)+1), str)
239 :
240 :
241 : /****
242 : * internal functions declaration
243 : */
244 :
245 : /* initialize a new state */
246 : void ts_state_init(struct ts_state* state);
247 :
248 : /* copy new state */
249 : void ts_state_copy(struct ts_state* dst, const struct ts_state* src);
250 :
251 : /* clean an old state */
252 : void ts_state_clean(struct ts_state* state);
253 :
254 : /* read new buffer for tokstream */
255 : int ts_read(tokstream* ts);
256 :
257 : /* normalize buffer contents */
258 : int ts_normalize(tokstream* ts);
259 :
260 :
261 : /****
262 : * implementation
263 : */
264 :
265 : /**
266 : * \relatesalso tokstream
267 : *
268 : * \brief Open a new tokstream from file
269 : *
270 : * This opens the file given in the argument and constructs a tokstream around
271 : * it.
272 : *
273 : * Before reading can begin, you have to set the separators and delimiters for
274 : * the tokstream by using ts_sep(), ts_delim() and the according on and
275 : * off functions.
276 : *
277 : * \sa ts_sep(), ts_sep_on(), ts_sep_off()
278 : * \sa ts_delim(), ts_delim_on(), ts_delim_off()
279 : *
280 : * \param file The filename to be opened.
281 : *
282 : * \returns Returns a pointer to the opened tokstream, or NULL on error.
283 : */
284 0 : tokstream* ts_open(const char* file)
285 : {
286 : FILE* fp;
287 : tokstream* ts;
288 :
289 : /* open file and keep it open */
290 0 : fp = fopen(file, "rb");
291 0 : if(!fp)
292 : return NULL;
293 :
294 : /* allocate new tokstream */
295 0 : ts = malloc(sizeof(tokstream));
296 :
297 : /* initialize */
298 :
299 : /* set file pointer */
300 0 : ts->fp = fp;
301 :
302 : /* copy filename */
303 0 : ts->file = ts_strdup(file);
304 :
305 : /* create buffer */
306 0 : ts->buf_size = TS_BUFSIZ;
307 0 : ts->buf = malloc(ts->buf_size);
308 0 : ts->buf_len = 0;
309 :
310 : /* start with 0 buffer revision */
311 0 : ts->buf_rev = 0;
312 :
313 : /* initialize state stack */
314 0 : ts->stack = malloc(sizeof(struct ts_state));
315 0 : ts->state = ts->stack;
316 0 : ts->stack_size = 1;
317 :
318 : /* initialize main state */
319 0 : ts_state_init(ts->state);
320 :
321 : /* set file status */
322 0 : ts->state->eof = feof(ts->fp);
323 0 : ts->state->error = ferror(ts->fp);
324 :
325 0 : return ts;
326 : }
327 :
328 : /**
329 : * \relatesalso tokstream
330 : *
331 : * \brief Close a tokstream
332 : *
333 : * Closes the file of the tokstream and frees all allocated memory.
334 : *
335 : * \param ts The tokstream to be closed.
336 : */
337 0 : void ts_close(tokstream* ts)
338 : {
339 : /* close file */
340 0 : fclose(ts->fp);
341 :
342 : /* clean all stacked states */
343 0 : while(ts->state >= ts->stack)
344 : {
345 0 : ts_state_clean(ts->state);
346 0 : --ts->state;
347 : }
348 :
349 : /* free memory */
350 0 : free(ts->stack);
351 0 : free(ts->buf);
352 0 : free(ts->file);
353 0 : free(ts);
354 0 : }
355 :
356 : /**
357 : * \relatesalso tokstream
358 : *
359 : * \brief Push the current tokstream state onto stack
360 : *
361 : * This saves all current information of the tokstream, but does not alter any
362 : * of its properties. Reading will continue just like before the call.
363 : *
364 : * Using ts_pop(), reading can later be continued exactly from the moment
365 : * ts_push() was called. Every setting in the tokstream is restored.
366 : *
367 : * It is possible to push multiple states on top of each other onto the stack.
368 : * The limit of this is defined by available memory.
369 : *
370 : * If pushing the state onto the stack fails (due to memory allocation), all of
371 : * the stack is lost. If possible, a single empty state is pushed onto the
372 : * stack, so that ts_ functions will not fail mysteriously.
373 : *
374 : * \sa ts_pop()
375 : *
376 : * \param ts The tokstream of which the state is to be saved to the stack.
377 : *
378 : * \returns In case of an error, a non-zero value is returned.
379 : */
380 0 : int ts_push(tokstream* ts)
381 : {
382 : /* no current state */
383 0 : ts->state = NULL;
384 :
385 : /* resize stack */
386 0 : ts->stack = realloc(ts->stack, (ts->stack_size + 1) * sizeof(struct ts_state));
387 :
388 : /* check there was enough memory available */
389 0 : if(!ts->stack)
390 : {
391 : /* create fallout state */
392 0 : ts->stack = malloc(sizeof(struct ts_state));
393 0 : ts->state = ts->stack;
394 :
395 : /* clean fallout state if possible */
396 0 : if(ts->state)
397 0 : ts_state_init(ts->state);
398 :
399 : /* return error */
400 0 : return 1;
401 : }
402 :
403 : /* duplicate previous state */
404 0 : ts_state_copy(ts->stack + ts->stack_size + 1, ts->stack + ts->stack_size);
405 :
406 : /* increment stack counter */
407 0 : ++ts->stack_size;
408 :
409 : /* set current state */
410 0 : ts->state = ts->stack + ts->stack_size;
411 :
412 : /* success */
413 0 : return 0;
414 : }
415 :
416 : /**
417 : * \relatesalso tokstream
418 : *
419 : * \brief Pop the current state from the stack
420 : *
421 : * This returns the tokstream to a state which was previously pushed to the
422 : * stack with ts_push(). Reading will continue as if the calling of ts_push()
423 : * and subsequently ts_pop() never occurred.
424 : *
425 : * \sa ts_push()
426 : *
427 : * \param ts The tokstream of which a stacked state is to be restored.
428 : *
429 : * \returns On attempting to pop the last state from the stack, the function
430 : * returns a non-zero value.
431 : */
432 0 : int ts_pop(tokstream* ts)
433 : {
434 : /* prevent stack underflow */
435 0 : if(ts->state == ts->stack)
436 : return 1;
437 :
438 : /* clean discarded state */
439 0 : ts_state_clean(ts->state);
440 :
441 : /* resize state stack */
442 0 : ts->stack = realloc(ts->stack, (ts->stack_size - 1) * sizeof(struct ts_state));
443 :
444 : /* decrement stack counter */
445 0 : --ts->stack_size;
446 :
447 : /* set current state */
448 0 : ts->state = ts->stack + ts->stack_size;
449 :
450 : /* check if buffer changed */
451 0 : if(ts->buf_rev > ts->state->buf_rev)
452 : {
453 : /* invalidate cursor and token */
454 0 : ts->state->cur = NULL;
455 0 : ts->state->tok = NULL;
456 : }
457 :
458 : /* success */
459 : return 0;
460 : }
461 :
462 : /**
463 : * \name Stream information
464 : *
465 : * Functions to get status information about a token stream.
466 : *
467 : * \{
468 : */
469 :
470 : /**
471 : * \relatesalso tokstream
472 : *
473 : * \brief Check if a tokstream is at EOF
474 : *
475 : * Checks if the tokstream has reached end-of-file (EOF). Calls feof() for the
476 : * underlying file object.
477 : *
478 : * \param ts The tokstream to check for EOF.
479 : *
480 : * \returns Returns a non-zero value if file is at EOF.
481 : */
482 0 : int ts_eof(const tokstream* ts)
483 : {
484 : /* delay eof to end of buffer */
485 0 : return ts->state->eof && (!ts->state->cur || !(*ts->state->cur));
486 : }
487 :
488 : /**
489 : * \relatesalso tokstream
490 : *
491 : * \brief Get file error flag of a tokstream
492 : *
493 : * This function returns the value of ferror() from <em>after the last read
494 : * operation</em>. Because of buffering, this is asynchronuous with ts_get()
495 : * calls.
496 : *
497 : * \param ts The tokstream of which to get the file error flag.
498 : *
499 : * \returns Report the last value of ferror() for the tokstream's FILE* object.
500 : */
501 0 : int ts_error(const tokstream* ts)
502 : {
503 : /* delay errors to end of buffer */
504 0 : return ts->state->error && (!ts->state->cur || !(*ts->state->cur));
505 : }
506 :
507 : /**
508 : * \relatesalso tokstream
509 : *
510 : * \brief Return current line number
511 : *
512 : * This function gives the number of the line at which the current token was
513 : * read in the file. If there is no current token, the line number of the next
514 : * processed character is returned.
515 : *
516 : * \param ts The stream to return its line number.
517 : *
518 : * \returns Returns the line number of the current token.
519 : */
520 0 : int ts_line(const tokstream* ts)
521 : {
522 0 : if(!ts->state->tok)
523 0 : return ts->state->line_no;
524 :
525 0 : return ts->state->tok_line_no;
526 : }
527 :
528 : /**
529 : * \relatesalso tokstream
530 : *
531 : * \brief Return current character position
532 : *
533 : * This function gives the position of the character at which the current token
534 : * was read in its line. If there is no current token, the character position
535 : * of the next processed character is returned.
536 : *
537 : * \param ts The stream to return its character position.
538 : *
539 : * \returns Returns the character position of the current token.
540 : */
541 0 : int ts_char(const tokstream* ts)
542 : {
543 0 : if(!ts->state->tok)
544 0 : return ts->state->char_no;
545 :
546 0 : return ts->state->tok_char_no;
547 : }
548 :
549 : /**
550 : * \relatesalso tokstream
551 : *
552 : * \brief Return the current token
553 : *
554 : * This function returns the exact same token that was fetched and returned
555 : * by the last call to ts_get(). If no such call was ever made, it will return
556 : * NULL.
557 : *
558 : * The string returned belongs to the tokstream object. It will be invalid
559 : * on the next call to ts_get().
560 : *
561 : * \sa ts_get()
562 : *
563 : * \param ts The stream to get the current token of.
564 : *
565 : * \returns A string containing the current token, or NULL if no such token
566 : * exists.
567 : */
568 0 : const char* ts_tok(const tokstream* ts)
569 : {
570 : /* check if token is valid */
571 0 : if(!ts->state->tok)
572 : return NULL;
573 :
574 : /* return buffered token */
575 0 : return ts->state->tok_buf;
576 : }
577 :
578 : /**
579 : * \}
580 : */
581 :
582 : /**
583 : * \name Token getters
584 : *
585 : * Functions to get tokens from the input stream, modifying the current token.
586 : *
587 : * \{
588 : */
589 :
590 : /**
591 : * \relatesalso tokstream
592 : *
593 : * \brief Get the next token from stream
594 : *
595 : * Search the input stream for the next token, according to current separator
596 : * and delimiter settings.
597 : *
598 : * The string returned belongs to the tokstream object. It will be invalid
599 : * on the next call to ts_get().
600 : *
601 : * \note This fetches the <em>next</em> token from the stream. If you want to
602 : * get the <em>current</em> token, use ts_tok().
603 : *
604 : * \sa ts_tok()
605 : *
606 : * \param ts The stream to get the token from.
607 : *
608 : * \returns A zero-terminated string containing the next token is returned, or
609 : * NULL in case of an error (ie. EOF occurred while getting token).
610 : */
611 0 : const char* ts_get(tokstream* ts)
612 : {
613 : /* check if buffer is good */
614 0 : if(ts_bad_buf(ts))
615 : return NULL;
616 :
617 : /* seek beginning of token */
618 0 : while(ts_issep(ts))
619 : {
620 : /* advance cursor */
621 0 : ts_adv_cur(ts);
622 :
623 : /* check if buffer is still good */
624 0 : if(ts_bad_buf(ts))
625 : return NULL;
626 : }
627 :
628 : /* tokenize string beginning from cursor */
629 0 : ts->state->tok = ts->state->cur;
630 :
631 : /* reset token length */
632 0 : ts->state->tok_len = 0;
633 :
634 : /* store position of token */
635 0 : ts->state->tok_pos = ts->state->pos;
636 0 : ts->state->tok_line_no = ts->state->line_no;
637 0 : ts->state->tok_char_no = ts->state->char_no;
638 :
639 : /* expand token */
640 0 : ts_exp_tok(ts);
641 :
642 : /* check if token is not a delimiter */
643 0 : if(!ts_cisdelim(ts, *ts->state->tok))
644 : {
645 : /* move cursor forward until separator or delimiter */
646 0 : while(!ts_issep(ts) && !ts_isdelim(ts))
647 : {
648 : /* expand token */
649 0 : ts_exp_tok(ts);
650 :
651 : /* buffer ends here, and so does token */
652 0 : if(!(*ts->state->cur))
653 : break;
654 : }
655 : }
656 :
657 : /* copy token to token buffer */
658 0 : ts_copy_tok(ts);
659 :
660 : /* return the token found, from buffer */
661 0 : return ts->state->tok_buf;
662 : }
663 :
664 : /**
665 : * \relatesalso tokstream
666 : *
667 : * \brief Unget current token
668 : *
669 : * Put the current token back into the stream, so that the next call to ts_get()
670 : * might again return it.
671 : *
672 : * This is useful when you need to changing separators and delimiters, as well
673 : * as when you need to peek at the next token.
674 : *
675 : * \note This might cause a buffer refresh.
676 : *
677 : * \param ts The stream to unget the token to.
678 : *
679 : * \returns If the previous token is no longer available and cannot be ungot,
680 : * a non-zero value is returned.
681 : */
682 0 : int ts_unget(tokstream* ts)
683 : {
684 : /* check if there is a token in buffer */
685 0 : if(!ts->state->tok)
686 : return 1;
687 :
688 : /* set cursor to token */
689 0 : ts->state->cur = ts->state->tok;
690 0 : ts->state->pos = ts->state->tok_pos;
691 0 : ts->state->line_no = ts->state->tok_line_no;
692 0 : ts->state->char_no = ts->state->tok_char_no;
693 :
694 : /* no current token */
695 0 : free(ts->state->tok_buf);
696 0 : ts->state->tok_buf = NULL;
697 0 : ts->stack->tok = NULL;
698 :
699 : /* success */
700 0 : return 0;
701 : }
702 :
703 : /**
704 : * \relatesalso tokstream
705 : *
706 : * \brief Get rest of line from stream
707 : *
708 : * Stores the rest of the current line (everything until newline character)
709 : * as the current token and returns it.
710 : *
711 : * The string will begin with the first non-separator character. If there is no
712 : * non-separator character until the end of line, the returned string will be
713 : * empty.
714 : *
715 : * The newline will be consumed, the stream will be positioned at the beginning
716 : * of the next line.
717 : *
718 : * Subsequent calls to ts_get() will return the line as returned by this
719 : * function.
720 : *
721 : * \param ts The stream to get the line from.
722 : *
723 : * \returns Returns a string containing the line, or NULL if an error occurred.
724 : */
725 0 : const char* ts_getline(tokstream* ts)
726 : {
727 : /* tokenize until newline */
728 0 : if(!ts_seekc(ts, '\n'))
729 : return NULL;
730 :
731 : /* advance cursor past newline */
732 0 : ts_adv_cur(ts);
733 :
734 : /* return the line from token buffer */
735 0 : return ts->state->tok_buf;
736 : }
737 :
738 : /**
739 : * \}
740 : */
741 :
742 : /**
743 : * \name Input skipping
744 : *
745 : * Functions to skip over parts of the input without modifying the current
746 : * stream status, ie. current token.
747 : *
748 : * \{
749 : */
750 :
751 : /**
752 : * \relatesalso tokstream
753 : *
754 : * \brief Skip over the next token
755 : *
756 : * Using this function, the next token can be skipped without invalidating the
757 : * current token. This might be useful if the next token is already known, ie.
758 : * from a call to a seek function.
759 : *
760 : * \param ts The stream in which to skip a token.
761 : *
762 : * \returns Returns a non-zero value if an error occurred.
763 : */
764 0 : int ts_skip(tokstream* ts)
765 : {
766 : /* check if buffer is good */
767 0 : if(ts_bad_buf(ts))
768 : return 1;
769 :
770 : /* seek beginning of token */
771 0 : while(ts_issep(ts))
772 : {
773 : /* advance cursor */
774 0 : ts_adv_cur(ts);
775 :
776 : /* check if buffer is still good */
777 0 : if(ts_bad_buf(ts))
778 : return 1;
779 : }
780 :
781 : /* advance cursor */
782 0 : ts_adv_cur(ts);
783 :
784 : /* check if token is not a delimiter */
785 0 : if(!ts_cisdelim(ts, *ts->state->tok))
786 : {
787 : /* move cursor forward until separator or delimiter */
788 0 : while(!ts_issep(ts) && !ts_isdelim(ts))
789 : {
790 : /* advance cursor */
791 0 : ts_adv_cur(ts);
792 :
793 : /* buffer ends here, and so does token */
794 0 : if(!(*ts->state->cur))
795 : break;
796 : }
797 : }
798 :
799 : /* done */
800 : return 0;
801 : }
802 :
803 : /**
804 : * \relatesalso tokstream
805 : *
806 : * \brief Skip line in stream
807 : *
808 : * Discards the current line in the stream and sets the stream position to the
809 : * beginning of the next line.
810 : *
811 : * \note Invalidates current token.
812 : *
813 : * \param ts The stream in which to skip a line.
814 : *
815 : * \returns On error, a non-zero value is returned.
816 : */
817 0 : int ts_skipline(tokstream* ts)
818 : {
819 : /* invalidate token */
820 0 : ts->state->tok = NULL;
821 :
822 : /* check if buffer is good */
823 0 : if(ts_bad_buf(ts))
824 : return 1;
825 :
826 : /* increment cursor until we find newline */
827 0 : while(*ts->state->cur != '\n')
828 : {
829 : /* advance cursor */
830 0 : ts_adv_cur(ts);
831 :
832 : /* make sure buffer is still filled */
833 0 : if(ts_bad_buf(ts))
834 : return 1;
835 : }
836 :
837 : /* advance past newline */
838 0 : ts_adv_cur(ts);
839 :
840 : /* success */
841 0 : return 0;
842 : }
843 :
844 : /**
845 : * \name Input seeking
846 : *
847 : * Seek to specific position in token stream.
848 : *
849 : * \{
850 : */
851 :
852 : /**
853 : * \relatesalso tokstream
854 : *
855 : * \brief Seek to token
856 : *
857 : * The searched token will be the <em>current</em> token. The next call to
858 : * ts_get() will fetch a new token.
859 : *
860 : * \param ts The token stream to operate on.
861 : * \param tok The token to seek.
862 : *
863 : * \returns A non-zero value is returned to indicate the token was not found.
864 : */
865 :
866 0 : int ts_seek(tokstream* ts, const char* tok)
867 : {
868 : /* get tokens from ts until tok is found */
869 : do
870 : {
871 : /* check if current token is right */
872 0 : if(strcmp(ts->state->tok, tok) == 0)
873 : return 0;
874 : }
875 0 : while(ts_get(ts) != NULL);
876 :
877 : /* token was not found */
878 : return 1;
879 : }
880 :
881 : /**
882 : * \relatesalso tokstream
883 : *
884 : * \brief Seek to character
885 : *
886 : * Stores the input until it encounters the \a c character and returns it as a
887 : * token.
888 : *
889 : * The string will begin with the first non-separator character. If there is no
890 : * non-separator character until the character \a c is found, the returned
891 : * string will be empty.
892 : *
893 : * The character \a c will not be consumed, it can be part of the next token.
894 : *
895 : * Subsequent calls to ts_get() will return the token as returned by this
896 : * function.
897 : *
898 : * \param ts The stream to get the token from.
899 : * \param c The character that ends the token.
900 : *
901 : * \returns Returns a string containing the token, or NULL if an error occurred.
902 : */
903 0 : const char* ts_seekc(tokstream* ts, char c)
904 : {
905 : /* check if buffer is good */
906 0 : if(ts_bad_buf(ts))
907 : return NULL;
908 :
909 : /* seek beginning of token */
910 0 : while(ts_issep(ts) && *ts->state->cur != c)
911 : {
912 : /* advance cursor */
913 0 : ts_adv_cur(ts);
914 :
915 : /* check if buffer is still good */
916 0 : if(ts_bad_buf(ts))
917 : return NULL;
918 : }
919 :
920 : /* tokenize string beginning from cursor */
921 0 : ts->state->tok = ts->state->cur;
922 :
923 : /* reset token length */
924 0 : ts->state->tok_len = 0;
925 :
926 : /* store position of token */
927 0 : ts->state->tok_pos = ts->state->pos;
928 0 : ts->state->tok_line_no = ts->state->line_no;
929 0 : ts->state->tok_char_no = ts->state->char_no;
930 :
931 : /* move cursor forward until char is found */
932 0 : while(*ts->state->cur != c)
933 : {
934 : /* expand token */
935 0 : ts_exp_tok(ts);
936 :
937 : /* buffer ends here, and so does token */
938 0 : if(!(*ts->state->cur))
939 : break;
940 : }
941 :
942 : /* copy token to token buffer */
943 0 : ts_copy_tok(ts);
944 :
945 : /* return the token buffer */
946 0 : return ts->state->tok_buf;
947 : }
948 :
949 : /**
950 : * \relatesalso tokstream
951 : *
952 : * \brief Seek to any character from array
953 : *
954 : * Stores the input until it encounters any of the \a ca characters and return
955 : * it as a token.
956 : *
957 : * The string will begin with the first non-separator character. If there is no
958 : * non-separator character until a character in \a ca is found, the returned
959 : * string will be empty.
960 : *
961 : * The character from \a ca will not be consumed, it can be part of the next
962 : * token.
963 : *
964 : * Subsequent calls to ts_get() will return the token as returned by this
965 : * function.
966 : *
967 : * \param ts The stream to get the token from.
968 : * \param ca The characters that end the token.
969 : *
970 : * \returns Returns a string containing the token, or NULL if an error occurred.
971 : */
972 0 : const char* ts_seekca(tokstream* ts, const char* ca)
973 : {
974 : /* check if buffer is good */
975 0 : if(ts_bad_buf(ts))
976 : return NULL;
977 :
978 : /* seek beginning of token */
979 0 : while(ts_issep(ts) && !strchr(ca, *ts->state->cur))
980 : {
981 : /* advance cursor */
982 0 : ts_adv_cur(ts);
983 :
984 : /* check if buffer is still good */
985 0 : if(ts_bad_buf(ts))
986 : return NULL;
987 : }
988 :
989 : /* tokenize string beginning from cursor */
990 0 : ts->state->tok = ts->state->cur;
991 :
992 : /* reset token length */
993 0 : ts->state->tok_len = 0;
994 :
995 : /* store position of token */
996 0 : ts->state->tok_pos = ts->state->pos;
997 0 : ts->state->tok_line_no = ts->state->line_no;
998 0 : ts->state->tok_char_no = ts->state->char_no;
999 :
1000 : /* move cursor forward until char is found */
1001 0 : while(!strchr(ca, *ts->state->cur))
1002 : {
1003 : /* expand token */
1004 0 : ts_exp_tok(ts);
1005 :
1006 : /* buffer ends here, and so does token */
1007 0 : if(!(*ts->state->cur))
1008 : break;
1009 : }
1010 :
1011 : /* copy token to token buffer */
1012 0 : ts_copy_tok(ts);
1013 :
1014 : /* return the token buffer */
1015 0 : return ts->state->tok_buf;
1016 : }
1017 :
1018 : /**
1019 : * \}
1020 : */
1021 :
1022 : /**
1023 : * \name Separator and delimiter control
1024 : *
1025 : * Functions to set which characters act as separators and which act as
1026 : * delimiters.
1027 : *
1028 : * \{
1029 : */
1030 :
1031 : /**
1032 : * \relatesalso tokstream
1033 : *
1034 : * \brief Set separator characters
1035 : */
1036 0 : void ts_sep(tokstream* ts, const char* sep)
1037 : {
1038 : /* turn all separator flags off */
1039 0 : ts_charmap_clr(ts->state->sep);
1040 :
1041 : /* set separators */
1042 0 : for(; *sep; ++sep)
1043 0 : ts_charmap_1(ts->state->sep, *sep);
1044 :
1045 : /* make backup of separator flags */
1046 0 : ts_charmap_cpy(ts->state->sep2, ts->state->sep);
1047 :
1048 : /* renormalize buffer */
1049 0 : ts_normalize(ts);
1050 0 : }
1051 :
1052 : /**
1053 : * \relatesalso tokstream
1054 : *
1055 : * \brief Set character as separator
1056 : */
1057 0 : void ts_sep_on(tokstream* ts, char c)
1058 : {
1059 : /* set separator */
1060 0 : ts_charmap_1(ts->state->sep, c);
1061 0 : ts_charmap_1(ts->state->sep2, c);
1062 :
1063 : /* renormalize buffer */
1064 0 : ts_normalize(ts);
1065 0 : }
1066 :
1067 : /**
1068 : * \relatesalso tokstream
1069 : *
1070 : * \brief Unset character as separator
1071 : */
1072 0 : void ts_sep_off(tokstream* ts, char c)
1073 : {
1074 : /* unset separator */
1075 0 : ts_charmap_0(ts->state->sep, c);
1076 0 : ts_charmap_0(ts->state->sep2, c);
1077 :
1078 : /* renormalize buffer */
1079 0 : ts_normalize(ts);
1080 0 : }
1081 :
1082 : /**
1083 : * \relatesalso tokstream
1084 : *
1085 : * \brief Set delimiter characters
1086 : *
1087 : * */
1088 0 : void ts_delim(tokstream* ts, const char* delim)
1089 : {
1090 : /* turn all delimiter flags off */
1091 0 : ts_charmap_clr(ts->state->delim);
1092 :
1093 : /* restore all separator flags */
1094 0 : ts_charmap_cpy(ts->state->sep, ts->state->sep2);
1095 :
1096 : /* set delimiters */
1097 0 : for(; *delim; ++delim)
1098 : {
1099 : /* remove sep flag */
1100 0 : ts_charmap_0(ts->state->sep, *delim);
1101 :
1102 : /* set delim flag */
1103 0 : ts_charmap_1(ts->state->delim, *delim);
1104 : }
1105 :
1106 : /* renormalize buffer */
1107 0 : ts_normalize(ts);
1108 0 : }
1109 :
1110 : /**
1111 : * \relatesalso tokstream
1112 : *
1113 : * \brief Set character as delimiter
1114 : */
1115 0 : void ts_delim_on(tokstream* ts, char c)
1116 : {
1117 : /* remove sep flag */
1118 0 : ts_charmap_0(ts->state->sep, c);
1119 :
1120 : /* set delimiter */
1121 0 : ts_charmap_1(ts->state->delim, c);
1122 :
1123 : /* renormalize buffer */
1124 0 : ts_normalize(ts);
1125 0 : }
1126 :
1127 : /**
1128 : * \relatesalso tokstream
1129 : *
1130 : * \brief Unset character as delimiter
1131 : */
1132 0 : void ts_delim_off(tokstream* ts, char c)
1133 : {
1134 : /* unset delimiter */
1135 0 : ts_charmap_0(ts->state->delim, c);
1136 :
1137 : /* restore sep flag */
1138 0 : if(ts_charmap_get(ts->state->sep2, c))
1139 0 : ts_charmap_1(ts->state->sep, c);
1140 :
1141 : /* renormalize buffer */
1142 0 : ts_normalize(ts);
1143 0 : }
1144 :
1145 : /**
1146 : * \}
1147 : */
1148 :
1149 : /**
1150 : * \relatesalso tokstream
1151 : *
1152 : * \brief Set input buffer size for stream
1153 : */
1154 0 : int ts_bufsiz(tokstream* ts, int size)
1155 : {
1156 : /* invalidate buffer for all states */
1157 0 : ts->state->cur = NULL;
1158 0 : ts->state->tok = NULL;
1159 0 : ++ts->buf_rev;
1160 :
1161 : /* reallocate buffer */
1162 0 : ts->buf = realloc(ts->buf, size);
1163 :
1164 : /* check if realloc failed */
1165 0 : if(!ts->buf)
1166 : {
1167 : /* allocate old buffer size */
1168 0 : ts->buf = malloc(ts->buf_size);
1169 :
1170 : /* error */
1171 0 : return 1;
1172 : }
1173 :
1174 : /* set size of buffer */
1175 0 : ts->buf_size = size;
1176 :
1177 : /* success */
1178 0 : return 0;
1179 : }
1180 :
1181 :
1182 : /****
1183 : * internal functions
1184 : */
1185 :
1186 0 : void ts_state_init(struct ts_state* state)
1187 : {
1188 0 : ts_charmap_clr(state->sep);
1189 0 : ts_charmap_clr(state->sep2);
1190 0 : ts_charmap_clr(state->delim);
1191 :
1192 0 : state->eof = 0;
1193 0 : state->error = 0;
1194 :
1195 0 : state->buf_rev = 0;
1196 :
1197 0 : state->cur = NULL;
1198 0 : state->tok = NULL;
1199 :
1200 0 : state->pos = 0;
1201 0 : state->line_no = 1;
1202 0 : state->char_no = 1;
1203 :
1204 0 : state->tok_len = 0;
1205 0 : state->tok_pos = 0;
1206 0 : state->tok_line_no = 1;
1207 0 : state->tok_char_no = 1;
1208 :
1209 0 : state->tok_buf = NULL;
1210 0 : }
1211 :
1212 0 : void ts_state_copy(struct ts_state* dst, const struct ts_state* src)
1213 : {
1214 0 : ts_charmap_cpy(dst->sep, src->sep);
1215 0 : ts_charmap_cpy(dst->sep2, src->sep2);
1216 0 : ts_charmap_cpy(dst->delim, src->delim);
1217 :
1218 0 : dst->eof = src->eof;
1219 0 : dst->error = src->error;
1220 :
1221 0 : dst->buf_rev = src->buf_rev;
1222 :
1223 0 : dst->cur = src->cur;
1224 0 : dst->tok = src->tok;
1225 :
1226 0 : dst->pos = src->pos;
1227 0 : dst->line_no = src->line_no;
1228 0 : dst->char_no = src->char_no;
1229 :
1230 0 : dst->tok_len = src->tok_len;
1231 0 : dst->tok_pos = src->tok_pos;
1232 0 : dst->tok_line_no = src->tok_line_no;
1233 0 : dst->tok_char_no = src->tok_char_no;
1234 :
1235 0 : dst->tok_buf = src->tok_buf ? ts_strdup(src->tok_buf) : NULL;
1236 0 : }
1237 :
1238 0 : void ts_state_clean(struct ts_state* state)
1239 : {
1240 0 : free(state->tok_buf);
1241 0 : }
1242 :
1243 0 : int ts_read(tokstream* ts)
1244 : {
1245 : int seek_err;
1246 :
1247 : /* check if file is at eof already */
1248 0 : if(ts->state->eof)
1249 : return 1;
1250 :
1251 : /* seek to file position */
1252 0 : seek_err = fseek(ts->fp, ts->state->pos, SEEK_SET);
1253 :
1254 : /* update error and eof data */
1255 0 : ts->state->eof = feof(ts->fp);
1256 0 : ts->state->error = ferror(ts->fp);
1257 :
1258 : /* if there was a seek error, rather not give fp */
1259 0 : if(seek_err)
1260 : return 1;
1261 :
1262 : /* invalidate cursor and token */
1263 0 : ts->state->cur = NULL;
1264 0 : ts->state->tok = NULL;
1265 :
1266 : /* increase buffer revision */
1267 0 : ++ts->buf_rev;
1268 0 : ts->state->buf_rev = ts->buf_rev;
1269 :
1270 : /* get BUFSIZ chars from file to buffer */
1271 0 : ts->buf_len = fread(ts->buf, 1, ts->buf_size-1, ts->fp);
1272 :
1273 : /* terminate buffer string */
1274 0 : ts->buf[ts->buf_len] = '\0';
1275 :
1276 : /* set error indicators */
1277 0 : ts->state->eof = feof(ts->fp);
1278 0 : ts->state->error = ferror(ts->fp);
1279 :
1280 : /* break on error before updating tokstream */
1281 0 : if(ts->state->error)
1282 : return 1;
1283 :
1284 : /* set cursor to beginning of buffer */
1285 0 : ts->state->cur = ts->buf;
1286 :
1287 : /* normalize tokstream */
1288 0 : ts_normalize(ts);
1289 :
1290 : /* success */
1291 0 : return 0;
1292 : }
1293 :
1294 0 : int ts_normalize(tokstream* ts)
1295 : {
1296 : int trim;
1297 :
1298 : /* test buffer */
1299 0 : if(ts->buf_rev == 0)
1300 : return 0;
1301 :
1302 : /* don't trim when at eof */
1303 : trim = 0;
1304 0 : if(!ts->state->eof)
1305 : {
1306 : /* trim token chars from end of buffer */
1307 0 : char* back = ts->buf + ts->buf_len - 1;
1308 0 : while(ts->buf_len > 0)
1309 : {
1310 : /* trim until separator or delimiter */
1311 0 : if(ts_cissep(ts, *back) || ts_cisdelim(ts, *back))
1312 : break;
1313 :
1314 : /* trim buffer */
1315 0 : --back;
1316 0 : --ts->buf_len;
1317 : }
1318 :
1319 : /* check whether buffer was trimmed */
1320 0 : if(*(back+1))
1321 : {
1322 : /* buffer was trimmed */
1323 : trim = 1;
1324 :
1325 : /* terminate trimmed buffer */
1326 0 : *(back+1) = '\0';
1327 : }
1328 : }
1329 :
1330 : if(trim)
1331 : {
1332 : /* buffer changed, update buffer revision */
1333 0 : ++ts->buf_rev;
1334 0 : ++ts->state->buf_rev;
1335 : }
1336 :
1337 : /* report changes to buffer */
1338 : return trim;
1339 : }
|