From 218021b512f7bb71f553966cd5447f71a49002f9 Mon Sep 17 00:00:00 2001
From: metamuffin <metamuffin@disroot.org>
Date: Wed, 9 Apr 2025 12:41:51 +0200
Subject: token buffer parser

---
 attocc.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 makefile |   2 +-
 2 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/attocc.c b/attocc.c
index 785c5d1..4c901c6 100644
--- a/attocc.c
+++ b/attocc.c
@@ -16,8 +16,10 @@
     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 #include <fcntl.h>
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/cdefs.h>
 #include <unistd.h>
 
 #ifdef LINT
@@ -587,31 +589,35 @@ void token_print(struct token tok) {
   printf("%4lu ", tok.position);
   switch (tok.kind) {
   case TOK_IDENTIFIER:
-    printf("TOK_IDENTIFIER:%s, ", tok.data.identifier);
+    printf("TOK_IDENTIFIER:%s", tok.data.identifier);
     break;
   case TOK_KEYWORD:
-    printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tok.data.keyword]);
+    printf("TOK_KEYWORD:%s", KEYWORD_NAMES[tok.data.keyword]);
     break;
   case TOK_CONSTANT:
     switch (tok.data.constant_kind) {
     case CONST_INT:
-      printf("TOK_CONSTANT:CONST_INT:%i, ", tok.data.constant_int_value);
+      printf("TOK_CONSTANT:CONST_INT:%i", tok.data.constant_int_value);
       break;
     case CONST_STR:
-      printf("TOK_CONSTANT:CONST_STR:%s, ", tok.data.constant_str_value);
+      printf("TOK_CONSTANT:CONST_STR:%s", tok.data.constant_str_value);
       break;
     case CONST_CHAR:
-      printf("TOK_CONSTANT:CONST_CHAR:%c, ", tok.data.constant_char_value);
+      printf("TOK_CONSTANT:CONST_CHAR:%c", tok.data.constant_char_value);
       break;
     }
     break;
   case TOK_OPERATOR:
-      printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tok.data.operator] );
+      printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator] );
       break;
   case TOK_SEPERATOR:
-    printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tok.data.seperator]);
+    printf("TOK_SEPERATOR:%s", SEPERATOR_NAMES[tok.data.seperator]);
+    break;
+  case TOK_ERROR:
+    printf("TOK_ERROR:%s", tok.data.error_message);
     break;
   case TOK_END:
+    printf("TOK_END");
     break;
   }
   printf("\n");
@@ -939,6 +945,102 @@ void debug_node(struct node *node, int k) {
 }
 #endif
 
+struct token_buffer {
+  struct token tokens[4];
+  struct token_iter *iter;
+};
+
+struct token_buffer token_buffer_new(struct token_iter *iter) {
+  struct token_buffer buf;
+  for (int i = 0; i < 4; i++) {
+    buf.tokens[i] = token_iter_next(iter);
+    if (buf.tokens[i].kind == TOK_END)
+      break;
+  }
+  buf.iter = iter;
+  return buf;
+}
+
+struct token token_buffer_next(struct token_buffer *buf) {
+  struct token out = buf->tokens[0];
+  for (int i = 0; i < 3; i++)
+    buf->tokens[i] = buf->tokens[i + 1];
+  buf->tokens[3] = token_iter_next(buf->iter);
+  return out;
+}
+
+struct token *token_buffer_peek(struct token_buffer *buf, int off) {
+  if (off < 4)
+    return &buf->tokens[off];
+  fprintf(stderr, "peek too far\n");
+  exit(3);
+}
+
+int peek_type_size(struct token_buffer *buf) {
+  struct token *a = token_buffer_peek(buf, 0);
+  if (a->kind == TOK_KEYWORD &&
+      (a->data.keyword == KW_STRUCT || a->data.keyword == KW_ENUM ||
+       a->data.keyword == KW_UNION)) {
+    struct token *b = token_buffer_peek(buf, 1);
+    if (b->kind == TOK_IDENTIFIER) {
+      return 2;
+    } else {
+      return -1;
+    }
+  } else if (a->kind == TOK_IDENTIFIER) {
+    return 1;
+  } else {
+    return -1;
+  }
+}
+
+void toplevel(struct token_buffer *buf) {
+  int ts = peek_type_size(buf);
+  if (ts >= 0) {
+    struct token *a = token_buffer_peek(buf, ts);
+    struct token *b = token_buffer_peek(buf, ts + 1);
+    if (a->kind == TOK_SEPERATOR && a->data.seperator == SEP_LCURLY) {
+      printf("type\n");
+      // likely type definition
+    } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_SEPERATOR &&
+               b->data.seperator == SEP_LPAREN) {
+      printf("function\n");
+      // likely function
+    } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_OPERATOR &&
+               b->data.operator== OP_ASSIGN) {
+      printf("global\n");
+      // likely global variable
+    }
+  }
+}
+
+int token_step(struct token_iter *iter, struct token *a) {
+  token_free(*a);
+  *a = token_iter_next(iter);
+  return a->kind == TOK_END || a->kind == TOK_ERROR;
+}
+
+void toplevel2(struct token_iter *iter) {
+  struct token a = token_iter_next(iter);
+
+  if (a.kind == TOK_KEYWORD) {
+    switch (a.data.keyword) {
+    case KW_ENUM:
+      a = token_iter_next(iter);
+      if (a.kind == TOK_IDENTIFIER) {
+        char *type_name = strdup(a.data.identifier);
+        if (token_step(iter, &a))
+          return;
+      }
+      break;
+    default:
+      printf("unhandled keyword: ");
+      token_print(a);
+      exit(1);
+    }
+  }
+}
+
 #ifdef TEST
 char test_clean = 1;
 void assert(char cond, char *label) {
@@ -954,16 +1056,16 @@ void assert_eq(int a, int b, char *label) {
   }
 }
 void test() {
+  printf("sizeof(struct token) = %li\n", sizeof(struct token));
+  printf("sizeof(struct token_iter) = %li\n", sizeof(struct token_iter));
+  printf("sizeof(struct token_buffer) = %li\n", sizeof(struct token_buffer));
   assert_eq(strlen("sizeof"), 6, "strlen 1");
   assert_eq(strlen(""), 0, "strlen 2");
   assert_eq(strlen("a"), 1, "strlen 3");
   assert(is_alpha('a'), "alpha 1");
   assert(is_alpha('z'), "alpha 2");
   assert(is_alpha('k'), "alpha 3");
-  assert(!!tokenize("enum"), "tok 1");
-  assert(!!tokenize("int x = 0"), "tok 2");
-  // assert(!!tokenize("\"Hello\""), "tok 3");
-  // assert(!!tokenize("'\n'"), "tok 4");
+  // TODO test tokenize
 }
 #endif
 
@@ -991,10 +1093,6 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-#ifdef DEBUG
-  printf("\n=== READ INPUT ===\n");
-#endif
-
   int source_len = 0;
   char *source = NULL;
   int size;
@@ -1015,20 +1113,24 @@ int main(int argc, char **argv) {
 
   struct linemap linemap;
   struct token_iter iter;
+  // struct token_buffer buf;
   struct token tok;
 
   linemap = linemap_new(source);
   iter = token_iter_new(source);
+  // buf = token_buffer_new(&iter);
+  // toplevel(&buf);
+  // toplevel2(&iter);
 
   while (1) {
     tok = token_iter_next(&iter);
-    if (tok.kind == TOK_END)
-      break;
-
 #ifdef DEBUG
     token_print(tok);
 #endif
 
+    if (tok.kind == TOK_END)
+      break;
+
     token_free(tok);
   };
 
diff --git a/makefile b/makefile
index 0364204..dae0c7d 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
 .PHONY: all clean test
-ALL = attocc attocc-small attocc-debug
+ALL = attocc attocc-small attocc-debug attocc-test
 all: $(ALL)
 clean:
 	rm $(ALL)
-- 
cgit v1.2.3-70-g09d2