summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-04-09 12:41:51 +0200
committermetamuffin <metamuffin@disroot.org>2025-04-09 12:41:51 +0200
commit218021b512f7bb71f553966cd5447f71a49002f9 (patch)
treea425555a3cad5f16fe72c1e4dc12c6820bbe8399
parent5ac1cdde1743e818db9ae362376cb52770145973 (diff)
downloadattocc-218021b512f7bb71f553966cd5447f71a49002f9.tar
attocc-218021b512f7bb71f553966cd5447f71a49002f9.tar.bz2
attocc-218021b512f7bb71f553966cd5447f71a49002f9.tar.zst
token buffer parser
-rw-r--r--attocc.c138
-rw-r--r--makefile2
2 files changed, 121 insertions, 19 deletions
diff --git a/attocc.c b/attocc.c
index 785c5d1..4c901c6 100644
--- a/attocc.c
+++ b/attocc.c
@@ -16,8 +16,10 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <fcntl.h>
+#include <math.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/cdefs.h>
#include <unistd.h>
#ifdef LINT
@@ -587,31 +589,35 @@ void token_print(struct token tok) {
printf("%4lu ", tok.position);
switch (tok.kind) {
case TOK_IDENTIFIER:
- printf("TOK_IDENTIFIER:%s, ", tok.data.identifier);
+ printf("TOK_IDENTIFIER:%s", tok.data.identifier);
break;
case TOK_KEYWORD:
- printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tok.data.keyword]);
+ printf("TOK_KEYWORD:%s", KEYWORD_NAMES[tok.data.keyword]);
break;
case TOK_CONSTANT:
switch (tok.data.constant_kind) {
case CONST_INT:
- printf("TOK_CONSTANT:CONST_INT:%i, ", tok.data.constant_int_value);
+ printf("TOK_CONSTANT:CONST_INT:%i", tok.data.constant_int_value);
break;
case CONST_STR:
- printf("TOK_CONSTANT:CONST_STR:%s, ", tok.data.constant_str_value);
+ printf("TOK_CONSTANT:CONST_STR:%s", tok.data.constant_str_value);
break;
case CONST_CHAR:
- printf("TOK_CONSTANT:CONST_CHAR:%c, ", tok.data.constant_char_value);
+ printf("TOK_CONSTANT:CONST_CHAR:%c", tok.data.constant_char_value);
break;
}
break;
case TOK_OPERATOR:
- printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tok.data.operator] );
+ printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator] );
break;
case TOK_SEPERATOR:
- printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tok.data.seperator]);
+ printf("TOK_SEPERATOR:%s", SEPERATOR_NAMES[tok.data.seperator]);
+ break;
+ case TOK_ERROR:
+ printf("TOK_ERROR:%s", tok.data.error_message);
break;
case TOK_END:
+ printf("TOK_END");
break;
}
printf("\n");
@@ -939,6 +945,102 @@ void debug_node(struct node *node, int k) {
}
#endif
+struct token_buffer {
+ struct token tokens[4];
+ struct token_iter *iter;
+};
+
+struct token_buffer token_buffer_new(struct token_iter *iter) {
+ struct token_buffer buf;
+ for (int i = 0; i < 4; i++) {
+ buf.tokens[i] = token_iter_next(iter);
+ if (buf.tokens[i].kind == TOK_END)
+ break;
+ }
+ buf.iter = iter;
+ return buf;
+}
+
+struct token token_buffer_next(struct token_buffer *buf) {
+ struct token out = buf->tokens[0];
+ for (int i = 0; i < 3; i++)
+ buf->tokens[i] = buf->tokens[i + 1];
+ buf->tokens[3] = token_iter_next(buf->iter);
+ return out;
+}
+
+struct token *token_buffer_peek(struct token_buffer *buf, int off) {
+ if (off < 4)
+ return &buf->tokens[off];
+ fprintf(stderr, "peek too far\n");
+ exit(3);
+}
+
+int peek_type_size(struct token_buffer *buf) {
+ struct token *a = token_buffer_peek(buf, 0);
+ if (a->kind == TOK_KEYWORD &&
+ (a->data.keyword == KW_STRUCT || a->data.keyword == KW_ENUM ||
+ a->data.keyword == KW_UNION)) {
+ struct token *b = token_buffer_peek(buf, 1);
+ if (b->kind == TOK_IDENTIFIER) {
+ return 2;
+ } else {
+ return -1;
+ }
+ } else if (a->kind == TOK_IDENTIFIER) {
+ return 1;
+ } else {
+ return -1;
+ }
+}
+
+void toplevel(struct token_buffer *buf) {
+ int ts = peek_type_size(buf);
+ if (ts >= 0) {
+ struct token *a = token_buffer_peek(buf, ts);
+ struct token *b = token_buffer_peek(buf, ts + 1);
+ if (a->kind == TOK_SEPERATOR && a->data.seperator == SEP_LCURLY) {
+ printf("type\n");
+ // likely type definition
+ } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_SEPERATOR &&
+ b->data.seperator == SEP_LPAREN) {
+ printf("function\n");
+ // likely function
+ } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_OPERATOR &&
+ b->data.operator== OP_ASSIGN) {
+ printf("global\n");
+ // likely global variable
+ }
+ }
+}
+
+int token_step(struct token_iter *iter, struct token *a) {
+ token_free(*a);
+ *a = token_iter_next(iter);
+ return a->kind == TOK_END || a->kind == TOK_ERROR;
+}
+
+void toplevel2(struct token_iter *iter) {
+ struct token a = token_iter_next(iter);
+
+ if (a.kind == TOK_KEYWORD) {
+ switch (a.data.keyword) {
+ case KW_ENUM:
+ a = token_iter_next(iter);
+ if (a.kind == TOK_IDENTIFIER) {
+ char *type_name = strdup(a.data.identifier);
+ if (token_step(iter, &a))
+ return;
+ }
+ break;
+ default:
+ printf("unhandled keyword: ");
+ token_print(a);
+ exit(1);
+ }
+ }
+}
+
#ifdef TEST
char test_clean = 1;
void assert(char cond, char *label) {
@@ -954,16 +1056,16 @@ void assert_eq(int a, int b, char *label) {
}
}
void test() {
+ printf("sizeof(struct token) = %li\n", sizeof(struct token));
+ printf("sizeof(struct token_iter) = %li\n", sizeof(struct token_iter));
+ printf("sizeof(struct token_buffer) = %li\n", sizeof(struct token_buffer));
assert_eq(strlen("sizeof"), 6, "strlen 1");
assert_eq(strlen(""), 0, "strlen 2");
assert_eq(strlen("a"), 1, "strlen 3");
assert(is_alpha('a'), "alpha 1");
assert(is_alpha('z'), "alpha 2");
assert(is_alpha('k'), "alpha 3");
- assert(!!tokenize("enum"), "tok 1");
- assert(!!tokenize("int x = 0"), "tok 2");
- // assert(!!tokenize("\"Hello\""), "tok 3");
- // assert(!!tokenize("'\n'"), "tok 4");
+ // TODO test tokenize
}
#endif
@@ -991,10 +1093,6 @@ int main(int argc, char **argv) {
return 1;
}
-#ifdef DEBUG
- printf("\n=== READ INPUT ===\n");
-#endif
-
int source_len = 0;
char *source = NULL;
int size;
@@ -1015,20 +1113,24 @@ int main(int argc, char **argv) {
struct linemap linemap;
struct token_iter iter;
+ // struct token_buffer buf;
struct token tok;
linemap = linemap_new(source);
iter = token_iter_new(source);
+ // buf = token_buffer_new(&iter);
+ // toplevel(&buf);
+ // toplevel2(&iter);
while (1) {
tok = token_iter_next(&iter);
- if (tok.kind == TOK_END)
- break;
-
#ifdef DEBUG
token_print(tok);
#endif
+ if (tok.kind == TOK_END)
+ break;
+
token_free(tok);
};
diff --git a/makefile b/makefile
index 0364204..dae0c7d 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
.PHONY: all clean test
-ALL = attocc attocc-small attocc-debug
+ALL = attocc attocc-small attocc-debug attocc-test
all: $(ALL)
clean:
rm $(ALL)