From 218021b512f7bb71f553966cd5447f71a49002f9 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Wed, 9 Apr 2025 12:41:51 +0200 Subject: token buffer parser --- attocc.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- makefile | 2 +- 2 files changed, 121 insertions(+), 19 deletions(-) diff --git a/attocc.c b/attocc.c index 785c5d1..4c901c6 100644 --- a/attocc.c +++ b/attocc.c @@ -16,8 +16,10 @@ along with this program. If not, see . */ #include +#include #include #include +#include #include #ifdef LINT @@ -587,31 +589,35 @@ void token_print(struct token tok) { printf("%4lu ", tok.position); switch (tok.kind) { case TOK_IDENTIFIER: - printf("TOK_IDENTIFIER:%s, ", tok.data.identifier); + printf("TOK_IDENTIFIER:%s", tok.data.identifier); break; case TOK_KEYWORD: - printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tok.data.keyword]); + printf("TOK_KEYWORD:%s", KEYWORD_NAMES[tok.data.keyword]); break; case TOK_CONSTANT: switch (tok.data.constant_kind) { case CONST_INT: - printf("TOK_CONSTANT:CONST_INT:%i, ", tok.data.constant_int_value); + printf("TOK_CONSTANT:CONST_INT:%i", tok.data.constant_int_value); break; case CONST_STR: - printf("TOK_CONSTANT:CONST_STR:%s, ", tok.data.constant_str_value); + printf("TOK_CONSTANT:CONST_STR:%s", tok.data.constant_str_value); break; case CONST_CHAR: - printf("TOK_CONSTANT:CONST_CHAR:%c, ", tok.data.constant_char_value); + printf("TOK_CONSTANT:CONST_CHAR:%c", tok.data.constant_char_value); break; } break; case TOK_OPERATOR: - printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tok.data.operator] ); + printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator] ); break; case TOK_SEPERATOR: - printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tok.data.seperator]); + printf("TOK_SEPERATOR:%s", SEPERATOR_NAMES[tok.data.seperator]); + break; + case TOK_ERROR: + printf("TOK_ERROR:%s", tok.data.error_message); break; case TOK_END: + printf("TOK_END"); break; } printf("\n"); @@ -939,6 +945,102 @@ void debug_node(struct node *node, int k) { } #endif +struct token_buffer { + struct token tokens[4]; + struct token_iter *iter; +}; + +struct token_buffer token_buffer_new(struct token_iter *iter) { + struct token_buffer buf; + for (int i = 0; i < 4; i++) { + buf.tokens[i] = token_iter_next(iter); + if (buf.tokens[i].kind == TOK_END) + break; + } + buf.iter = iter; + return buf; +} + +struct token token_buffer_next(struct token_buffer *buf) { + struct token out = buf->tokens[0]; + for (int i = 0; i < 3; i++) + buf->tokens[i] = buf->tokens[i + 1]; + buf->tokens[3] = token_iter_next(buf->iter); + return out; +} + +struct token *token_buffer_peek(struct token_buffer *buf, int off) { + if (off < 4) + return &buf->tokens[off]; + fprintf(stderr, "peek too far\n"); + exit(3); +} + +int peek_type_size(struct token_buffer *buf) { + struct token *a = token_buffer_peek(buf, 0); + if (a->kind == TOK_KEYWORD && + (a->data.keyword == KW_STRUCT || a->data.keyword == KW_ENUM || + a->data.keyword == KW_UNION)) { + struct token *b = token_buffer_peek(buf, 1); + if (b->kind == TOK_IDENTIFIER) { + return 2; + } else { + return -1; + } + } else if (a->kind == TOK_IDENTIFIER) { + return 1; + } else { + return -1; + } +} + +void toplevel(struct token_buffer *buf) { + int ts = peek_type_size(buf); + if (ts >= 0) { + struct token *a = token_buffer_peek(buf, ts); + struct token *b = token_buffer_peek(buf, ts + 1); + if (a->kind == TOK_SEPERATOR && a->data.seperator == SEP_LCURLY) { + printf("type\n"); + // likely type definition + } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_SEPERATOR && + b->data.seperator == SEP_LPAREN) { + printf("function\n"); + // likely function + } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_OPERATOR && + b->data.operator== OP_ASSIGN) { + printf("global\n"); + // likely global variable + } + } +} + +int token_step(struct token_iter *iter, struct token *a) { + token_free(*a); + *a = token_iter_next(iter); + return a->kind == TOK_END || a->kind == TOK_ERROR; +} + +void toplevel2(struct token_iter *iter) { + struct token a = token_iter_next(iter); + + if (a.kind == TOK_KEYWORD) { + switch (a.data.keyword) { + case KW_ENUM: + a = token_iter_next(iter); + if (a.kind == TOK_IDENTIFIER) { + char *type_name = strdup(a.data.identifier); + if (token_step(iter, &a)) + return; + } + break; + default: + printf("unhandled keyword: "); + token_print(a); + exit(1); + } + } +} + #ifdef TEST char test_clean = 1; void assert(char cond, char *label) { @@ -954,16 +1056,16 @@ void assert_eq(int a, int b, char *label) { } } void test() { + printf("sizeof(struct token) = %li\n", sizeof(struct token)); + printf("sizeof(struct token_iter) = %li\n", sizeof(struct token_iter)); + printf("sizeof(struct token_buffer) = %li\n", sizeof(struct token_buffer)); assert_eq(strlen("sizeof"), 6, "strlen 1"); assert_eq(strlen(""), 0, "strlen 2"); assert_eq(strlen("a"), 1, "strlen 3"); assert(is_alpha('a'), "alpha 1"); assert(is_alpha('z'), "alpha 2"); assert(is_alpha('k'), "alpha 3"); - assert(!!tokenize("enum"), "tok 1"); - assert(!!tokenize("int x = 0"), "tok 2"); - // assert(!!tokenize("\"Hello\""), "tok 3"); - // assert(!!tokenize("'\n'"), "tok 4"); + // TODO test tokenize } #endif @@ -991,10 +1093,6 @@ int main(int argc, char **argv) { return 1; } -#ifdef DEBUG - printf("\n=== READ INPUT ===\n"); -#endif - int source_len = 0; char *source = NULL; int size; @@ -1015,20 +1113,24 @@ int main(int argc, char **argv) { struct linemap linemap; struct token_iter iter; + // struct token_buffer buf; struct token tok; linemap = linemap_new(source); iter = token_iter_new(source); + // buf = token_buffer_new(&iter); + // toplevel(&buf); + // toplevel2(&iter); while (1) { tok = token_iter_next(&iter); - if (tok.kind == TOK_END) - break; - #ifdef DEBUG token_print(tok); #endif + if (tok.kind == TOK_END) + break; + token_free(tok); }; diff --git a/makefile b/makefile index 0364204..dae0c7d 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,5 @@ .PHONY: all clean test -ALL = attocc attocc-small attocc-debug +ALL = attocc attocc-small attocc-debug attocc-test all: $(ALL) clean: rm $(ALL) -- cgit v1.2.3-70-g09d2