summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-06-19 13:03:21 +0200
committermetamuffin <metamuffin@disroot.org>2025-06-19 13:03:21 +0200
commit2c7a36cd8edd17ac87de017a9cc9047ef557c564 (patch)
tree7798d79f674ea758c2a4d7bd0c56547f50db6da2
parentd1f5732d0088855be1857bf49ff75e2ca0a6317f (diff)
downloadattocc-2c7a36cd8edd17ac87de017a9cc9047ef557c564.tar
attocc-2c7a36cd8edd17ac87de017a9cc9047ef557c564.tar.bz2
attocc-2c7a36cd8edd17ac87de017a9cc9047ef557c564.tar.zst
start reimplementing parser loops
-rw-r--r--attocc.c192
1 files changed, 144 insertions, 48 deletions
diff --git a/attocc.c b/attocc.c
index b233703..5a575fa 100644
--- a/attocc.c
+++ b/attocc.c
@@ -16,6 +16,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <fcntl.h>
+#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/cdefs.h>
@@ -136,7 +137,7 @@ enum keyword {
KW_WHILE,
};
-enum operator{
+enum operator {
OP_SIZEOF, // len=6
OP_SHIFT_LEFT, // len=2
OP_SHIFT_RIGHT,
@@ -441,7 +442,7 @@ struct token token_iter_next(struct token_iter *iter) {
if (strncmp(op, p, strlen(op)) == 0) {
p += strlen(op);
tok.kind = TOK_OPERATOR;
- tok.data.operator= i;
+ tok.data.operator = i;
match = 1;
break;
}
@@ -604,8 +605,8 @@ void token_print(struct token tok) {
}
break;
case TOK_OPERATOR:
- printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator] );
- break;
+ printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator]);
+ break;
case TOK_SEPERATOR:
printf("TOK_SEPERATOR:%s", SEPERATOR_NAMES[tok.data.seperator]);
break;
@@ -644,6 +645,7 @@ enum primitive_type {
struct type {
enum type_kind kind;
int index;
+ int pointer_level;
};
struct constant {
@@ -659,12 +661,21 @@ struct global_context {
char **enums;
};
+int gcx_find_enum(struct global_context *gcx, char *name) {
+ for (int i = 0; i < gcx->num_enums; i++)
+ if (strlen(gcx->enums[i]) == strlen(name) &&
+ strncmp(name, gcx->enums[i], strlen(name)) == 0)
+ return i;
+ return -1;
+}
+
int enum_decl(struct global_context *gcx, struct token_iter *iter,
- struct token *a, char *name) {
+ struct token *a, char *name, int *index) {
gcx->enums =
realloc_failsafe(gcx->enums, sizeof(char *) * (gcx->num_enums + 1));
int enum_index = gcx->num_enums;
+ *index = gcx->num_enums;
gcx->enums[enum_index] = name;
gcx->num_enums += 1;
@@ -676,7 +687,7 @@ int enum_decl(struct global_context *gcx, struct token_iter *iter,
if (token_step(iter, a))
return 1;
- if (a->kind == TOK_OPERATOR && a->data.operator== OP_ASSIGN) {
+ if (a->kind == TOK_OPERATOR && a->data.operator == OP_ASSIGN) {
if (token_step(iter, a))
return 1;
if (a->kind == TOK_CONSTANT && a->data.constant_kind == CONST_INT) {
@@ -720,6 +731,54 @@ int enum_decl(struct global_context *gcx, struct token_iter *iter,
}
}
+int read_type(struct global_context *gcx, struct token_iter *iter,
+ struct token *a, struct type *ty) {
+ if (a->kind != TOK_KEYWORD)
+ return 1;
+ if (a->data.keyword == KW_STRUCT) {
+ TRAP;
+ return 1;
+ } else if (a->data.keyword == KW_UNION) {
+ TRAP;
+ return 1;
+ } else if (a->data.keyword == KW_ENUM) {
+ if (token_step(iter, a))
+ return 1;
+
+ ty->kind = TY_ENUM;
+
+ char *name = NULL;
+ if (a->kind == TOK_IDENTIFIER) {
+ name = strdup_failsafe(a->data.identifier);
+ if (token_step(iter, a))
+ return 1;
+ }
+ if (a->kind == TOK_SEPERATOR && a->data.seperator == SEP_LCURLY) {
+ if (token_step(iter, a))
+ return 1;
+ if (enum_decl(gcx, iter, a, name, &ty->index))
+ return 1;
+ } else {
+ ty->index = gcx_find_enum(gcx, name);
+ if (ty->index == -1) {
+ fprintf(stderr, "unknown enum %s\n", name);
+ return 1;
+ }
+ if (token_step(iter, a))
+ return 1;
+ }
+ return 0;
+ } else if (a->data.keyword == KW_INT) {
+ ty->kind = TY_PRIMITIVE;
+ ty->index = 0;
+ if (token_step(iter, a))
+ return 1;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
int function_decl(struct global_context *gcx, struct token_iter *iter,
struct token *a, char *fun_name) {
if (token_step(iter, a)) // lparen
@@ -760,6 +819,7 @@ void gcx_print(struct global_context *gcx) {
int toplevel(struct token_iter *iter) {
struct token a = token_iter_next(iter);
struct global_context gcx;
+ int ret;
char *name = NULL;
@@ -771,60 +831,96 @@ int toplevel(struct token_iter *iter) {
struct type type;
while (1) {
- if (a.kind == TOK_END) {
+ if (a.kind == TOK_END)
break;
- } else if (a.kind == TOK_KEYWORD) {
- switch (a.data.keyword) {
- case KW_ENUM:
+ ret = read_type(&gcx, iter, &a, &type);
+ if (!ret) {
+ // type read success, might be a variable or function or was typedef
+ if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_SEMICOLON) {
+ // was a type decl, continue
if (token_step(iter, &a))
return 1;
- if (a.kind == TOK_IDENTIFIER) {
- name = strdup_failsafe(a.data.identifier);
- if (token_step(iter, &a))
- return 1;
- }
- if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_LCURLY) {
- if (token_step(iter, &a))
- return 1;
- if (enum_decl(&gcx, iter, &a, name))
- return 1;
- if (a.kind != TOK_SEPERATOR && a.data.seperator != SEP_SEMICOLON) {
- fprintf(stderr, "expected semicolon after enum decl\n");
- return 1;
- }
- if (token_step(iter, &a))
- return 1;
- }
- break;
- case KW_INT:
- type.kind = TY_PRIMITIVE;
- type.index = PRIMTY_INT;
+ } else if (a.kind == TOK_IDENTIFIER) {
+ // either var or func
+ char *name = strdup_failsafe(a.data.identifier);
if (token_step(iter, &a))
return 1;
- break;
- default:
- fprintf(stderr, "unhandled keyword: ");
- token_print(a);
- return 1;
- }
- } else if (a.kind == TOK_IDENTIFIER) {
- name = strdup_failsafe(a.data.identifier);
- if (token_step(iter, &a))
- return 1;
- if (a.kind == TOK_OPERATOR && a.data.operator== OP_ASSIGN) {
- fprintf(stderr, "todo: handle global var\n");
- return 1;
- } else if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_LPAREN) {
- if (function_decl(&gcx, iter, &a, name))
+
+ if (a.kind == TOK_OPERATOR && a.data.operator == OP_ASSIGN) {
+ // variable / constant
+ TRAP
+ } else if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_LPAREN) {
+ // function decl
+ TRAP
+ } else {
+ fprintf(stderr, "unknown type-ident started toplevel statement\n");
return 1;
+ }
+ } else {
+ fprintf(stderr, "unknown type started toplevel statement\n");
+ return 1;
}
} else {
- fprintf(stderr, "undhandled toplevel token: ");
- token_print(a);
+ fprintf(stderr, "unknown toplevel statement\n");
return 1;
}
}
+ // while (1) {
+ // if (a.kind == TOK_END) {
+ // break;
+ // } else if (a.kind == TOK_KEYWORD) {
+ // switch (a.data.keyword) {
+ // case KW_ENUM:
+ // if (token_step(iter, &a))
+ // return 1;
+ // if (a.kind == TOK_IDENTIFIER) {
+ // name = strdup_failsafe(a.data.identifier);
+ // if (token_step(iter, &a))
+ // return 1;
+ // }
+ // if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_LCURLY) {
+ // if (token_step(iter, &a))
+ // return 1;
+ // if (enum_decl(&gcx, iter, &a, name))
+ // return 1;
+ // if (a.kind != TOK_SEPERATOR && a.data.seperator != SEP_SEMICOLON) {
+ // fprintf(stderr, "expected semicolon after enum decl\n");
+ // return 1;
+ // }
+ // if (token_step(iter, &a))
+ // return 1;
+ // }
+ // break;
+ // case KW_INT:
+ // type.kind = TY_PRIMITIVE;
+ // type.index = PRIMTY_INT;
+ // if (token_step(iter, &a))
+ // return 1;
+ // break;
+ // default:
+ // fprintf(stderr, "unhandled keyword: ");
+ // token_print(a);
+ // return 1;
+ // }
+ // } else if (a.kind == TOK_IDENTIFIER) {
+ // name = strdup_failsafe(a.data.identifier);
+ // if (token_step(iter, &a))
+ // return 1;
+ // if (a.kind == TOK_OPERATOR && a.data.operator == OP_ASSIGN) {
+ // fprintf(stderr, "todo: handle global var\n");
+ // return 1;
+ // } else if (a.kind == TOK_SEPERATOR && a.data.seperator == SEP_LPAREN) {
+ // if (function_decl(&gcx, iter, &a, name))
+ // return 1;
+ // }
+ // } else {
+ // fprintf(stderr, "unhandled toplevel token: ");
+ // token_print(a);
+ // return 1;
+ // }
+ // }
+
#ifdef DEBUG
gcx_print(&gcx);
#endif