/* attocc - A minimal C compiler. Copyright (C) 2024 metamuffin This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ #include #include #include #include #include #include #ifdef LINT #define TEST #define DEBUG #endif #define TRAP kill(getpid(), SIGTRAP); const int NUM_KEYWORDS = 32; static char *KEYWORDS[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"}; static char *SEPERATORS = "()[]{};,:"; const int NUM_OPERATORS = 36; static char *OPERATORS[] = { "sizeof", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^=", "++", "--", "==", "!=", "<=", ">=", "||", "&&", "+", "-", "*", "/", "%", "<", ">", "~", "&", "|", "^", "!", "=", ".", "?", }; #ifdef DEBUG static char *OPERATOR_NAMES[] = { "OP_SIZEOF", "OP_SHIFT_LEFT", "OP_SHIFT_RIGHT", "OP_ADD_ASSIGN", "OP_SUB_ASSIGN", "OP_MUL_ASSIGN", "OP_DIV_ASSIGN", "OP_MOD_ASSIGN", "OP_SHIFT_LEFT_ASSIGN", "OP_SHIFT_RIGHT_ASSIGN", "OP_BITWISE_AND_ASSIGN", "OP_BITWISE_OR_ASSIGN", "OP_BITWISE_XOR_ASSIGN", "OP_INCREMENT", "OP_DECREMENT", "OP_EQUAL", "OP_NOT_EQUAL", "OP_LESS_EQUAL", "OP_GREATER_EQUAL", "OP_LOGICAL_OR", "OP_LOGICAL_AND", "OP_ADD", "OP_SUB", "OP_MUL_OR_POINTER", "OP_DIV", "OP_MOD", "OP_LESS", "OP_GREATER", "OP_BITWISE_NOT", "OP_BITWISE_AND_OR_REF", "OP_BITWISE_OR", "OP_BITWISE_XOR", "OP_LOGICAL_NOT", "OP_ASSIGN", "OP_MEMBER_ACCESS", "OP_TERNARY", }; static char *KEYWORD_NAMES[] = { "KW_AUTO", "KW_BREAK", "KW_CASE", "KW_CHAR", "KW_CONST", "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_DOUBLE", "KW_ELSE", "KW_ENUM", "KW_EXTERN", "KW_FLOAT", "KW_FOR", "KW_GOTO", "KW_IF", "KW_INT", "KW_LONG", "KW_REGISTER", "KW_RETURN", "KW_SHORT", "KW_SIGNED", "KW_SIZEOF", "KW_STATIC", "KW_STRUCT", "KW_SWITCH", "KW_TYPEDEF", "KW_UNION", "KW_UNSIGNED", "KW_VOID", "KW_VOLATILE", "KW_WHILE", }; static char *SEPERATOR_NAMES[] = { "SEP_LPAREN", "SEP_RPAREN", "SEP_LSQUARE", "SEP_RSQUARE", "SEP_LCURLY", "SEP_RCURLY", "SEP_SEMICOLON", "SEP_COMMA", "SEP_DOT", "SEP_COLOR", }; #endif enum keyword { KW_AUTO, KW_BREAK, KW_CASE, KW_CHAR, KW_CONST, KW_CONTINUE, KW_DEFAULT, KW_DO, KW_DOUBLE, KW_ELSE, KW_ENUM, KW_EXTERN, KW_FLOAT, KW_FOR, KW_GOTO, KW_IF, KW_INT, KW_LONG, KW_REGISTER, KW_RETURN, KW_SHORT, KW_SIGNED, KW_SIZEOF, KW_STATIC, KW_STRUCT, KW_SWITCH, KW_TYPEDEF, KW_UNION, KW_UNSIGNED, KW_VOID, KW_VOLATILE, KW_WHILE, }; enum operator{ OP_SIZEOF, // len=6 OP_SHIFT_LEFT, // len=2 OP_SHIFT_RIGHT, OP_ADD_ASSIGN, OP_SUB_ASSIGN, OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN, OP_SHIFT_LEFT_ASSIGN, OP_SHIFT_RIGHT_ASSIGN, OP_BITWISE_AND_ASSIGN, OP_BITWISE_OR_ASSIGN, OP_BITWISE_XOR_ASSIGN, OP_INCREMENT, OP_DECREMENT, OP_EQUAL, OP_NOT_EQUAL, OP_LESS_EQUAL, OP_GREATER_EQUAL, OP_LOGICAL_OR, OP_LOGICAL_AND, OP_ADD, // len=1 OP_SUB, OP_MUL_OR_POINTER, OP_DIV, OP_MOD, OP_LESS, OP_GREATER, OP_BITWISE_NOT, OP_BITWISE_AND_OR_REF, OP_BITWISE_OR, OP_BITWISE_XOR, OP_LOGICAL_NOT, OP_ASSIGN, OP_MEMBER_ACCESS, OP_TERNARY, }; enum seperator { SEP_LPAREN, SEP_RPAREN, SEP_LSQUARE, SEP_RSQUARE, SEP_LCURLY, SEP_RCURLY, SEP_SEMICOLON, SEP_COMMA, SEP_DOT, SEP_COLOR, }; enum token_kind { TOK_END, TOK_IDENTIFIER, TOK_KEYWORD, TOK_CONSTANT, TOK_SEPERATOR, TOK_OPERATOR, TOK_ERROR, }; enum constant_kind { CONST_INT, CONST_STR, CONST_CHAR, }; union token_data { char *identifier; enum keyword keyword; struct { enum constant_kind constant_kind; union { int constant_int_value; char constant_char_value; char *constant_str_value; }; }; enum seperator seperator; enum operator operator; char *error_message; }; struct token { enum token_kind kind; union token_data data; unsigned long position; }; char is_numeric(char c) { return c >= '0' && c <= '9'; } char is_octal(char c) { return c >= '0' && c <= '7'; } char is_hexadecial(char c) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); } char is_alpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } char is_alphanumeric(char c) { return is_alpha(c) || is_numeric(c); } char is_ident(char c) { return is_alphanumeric(c) || c == '_'; } char map_escape(char c) { switch (c) { case 'n': return '\n'; case 't': return '\t'; case 'b': return '\b'; case 'r': return '\r'; case '\'': return '\''; case '"': return '"'; case '0': return '\0'; default: return '\0'; } } void *realloc_failsafe(void *old, unsigned long size) { void *p = realloc(old, size); if (!p) { fprintf(stderr, "malloc failed for size %lu\n", size); exit(2); } return p; } void *memcpy(void *dest, const void *src, unsigned long size) { char *dest_ = (char *)dest; char *src_ = (char *)src; while (size--) *dest_++ = *src_++; return dest; } // not to specification but this program only needs to check equality int strncmp(const char *a, const char *b, unsigned long len) { while (len--) { if (*a != *b) return *a - *b; a++; b++; } return 0; } unsigned long strlen(const char *s) { int len = 0; while (*s++) len++; return len; } char *strdup(const char *s) { int len = strlen(s); char *new_s = realloc(NULL, len + 1); if (!new_s) return NULL; memcpy(new_s, s, len + 1); return new_s; } char *strdup_failsafe(char *s) { char *p = strdup(s); if (!p) { fprintf(stderr, "strdup failed\n"); exit(2); } return p; } struct error { char *message; unsigned long position; }; struct linemap { unsigned long *lines; }; struct linemap linemap_new(char *source) { struct linemap lm; unsigned long i = 0; lm.lines = realloc_failsafe(NULL, sizeof(unsigned long)); int num_lines = 1; lm.lines[0] = 0; while (source[i]) { if (source[i] == '\n') { num_lines += 1; lm.lines = realloc_failsafe(lm.lines, num_lines * sizeof(unsigned long)); lm.lines[num_lines - 1] = i; } i += 1; } lm.lines = realloc_failsafe(lm.lines, (num_lines + 1) * sizeof(unsigned long)); lm.lines[num_lines] = 0xffffffff; return lm; } int linemap_find(struct linemap *lm, unsigned long position) { int line = 0; while (lm->lines[line] < position) line += 1; return line; } void print_error(struct error error, char *filename, struct linemap *linemap) { int line = linemap_find(linemap, error.position); int column = error.position - linemap->lines[line]; if (!error.message) error.message = ""; printf("error: %s\n", error.message); printf("\tin %s:%i:%i\n", filename, line + 1, column + 1); } struct token *token_push(struct token **tokens, unsigned long *num_tokens) { *num_tokens += 1; *tokens = realloc_failsafe(*tokens, sizeof(struct token) * *num_tokens); return &(*tokens)[*num_tokens - 1]; } struct token_iter { char *end; char *start; char *p; }; struct token_iter token_iter_new(char *source) { struct token_iter iter; iter.end = source + strlen(source); iter.start = source; iter.p = source; return iter; } struct token t_error(char *message) { struct token t; t.kind = TOK_ERROR; t.data.error_message = message; return t; } struct token token_iter_next(struct token_iter *iter) { char *p = iter->p; unsigned long remaining = iter->end - p; unsigned long position = p - iter->start; struct token tok; tok.kind = TOK_END; while (*p) { tok.position = position; //* whitespace if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') { p++; continue; } //* comments if (remaining >= 2) { if (p[0] == '/' && p[1] == '/') { p += 2; for (char c; (c = *p++) && c != '\n';) ; continue; } else if (p[0] == '/' && p[1] == '*') { p += 2; for (char c = '\0', d; (d = c, c = *p++) && !(c == '/' && d == '*');) ; continue; } else if (p[0] == '#') { p += 1; for (char c; (c = *p++) && c != '\n';) ; continue; } } //* seperators char match = 0; for (int i = 0; SEPERATORS[i]; i++) { if (p[0] == SEPERATORS[i]) { p++; tok.kind = TOK_SEPERATOR; tok.data.seperator = i; match = 1; break; } } if (match) break; //* operators match = 0; for (int i = 0; i < NUM_OPERATORS; i++) { char *op = OPERATORS[i]; if (remaining >= strlen(op)) { if (strncmp(op, p, strlen(op)) == 0) { tok.kind = TOK_OPERATOR; tok.data.operator= i; p += strlen(op); match = 1; break; } } } if (match) break; //* keyword match = 0; for (int i = 0; i < NUM_KEYWORDS; i++) { char *kw = KEYWORDS[i]; if (remaining >= strlen(kw) + 1) { if (strncmp(kw, p, strlen(kw)) == 0 && !is_ident(p[strlen(kw)])) { tok.kind = TOK_KEYWORD; tok.data.keyword = i; p += strlen(kw); match = 1; break; } } } if (match) break; //* number constant if (is_numeric(p[0])) { int value = 0; if (remaining >= 2 && p[1] == 'x') { p += 2; for (char c; (c = *p++) && is_hexadecial(c);) { value *= 0x10; value += c <= '9' ? c - '0' : 10 + (c <= 'F' ? c - 'A' : c - 'a'); } } else if (p[0] == '0') { p += 1; for (char c; (c = *p++) && is_octal(c);) { value *= 010; value += c - '0'; } } else { for (char c; (c = *p++) && is_numeric(c);) { value *= 10; value += c - '0'; } } p--; tok.kind = TOK_CONSTANT; tok.data.constant_kind = CONST_INT; tok.data.constant_int_value = value; break; } //* string constant if (p[0] == '"') { p++; char *str = NULL; int str_len = 0; for (char c; (c = *p++) && c != '"';) { // TODO escape str_len++; str = realloc_failsafe(str, str_len); str[str_len - 1] = c; } // null termination str_len++; str = realloc_failsafe(str, str_len); str[str_len - 1] = '\0'; tok.kind = TOK_CONSTANT; tok.data.constant_kind = CONST_STR; tok.data.constant_str_value = str; break; } //* char constant if (p[0] == '\'') { if (!*p++) return t_error("eof"); char chr = p[0]; if (p[0] == '\\') { if (!*p++) return t_error("eof"); chr = map_escape(p[0]); } if (!*p++) return t_error("eof"); if (*p++ != '\'') return t_error("expected '\n"); tok.kind = TOK_CONSTANT; tok.data.constant_kind = CONST_CHAR; tok.data.constant_char_value = chr; break; } //* identifier if (is_ident(p[0]) && !is_numeric(p[0])) { char *ident_start = p; for (char c; (c = *p++) && is_ident(c);) ; p--; int ident_len = p - ident_start; char *ident_str = realloc_failsafe(NULL, ident_len + 1); for (int i = 0; i < ident_len; i++) ident_str[i] = ident_start[i]; ident_str[ident_len] = '\0'; tok.kind = TOK_IDENTIFIER; tok.data.identifier = ident_str; break; } return t_error("unknown token"); } iter->p = p; return tok; } void token_free(struct token tok) { switch (tok.kind) { case TOK_IDENTIFIER: free(tok.data.identifier); break; case TOK_CONSTANT: switch (tok.data.constant_kind) { case CONST_STR: free(tok.data.constant_str_value); break; default: break; } default: break; } } #ifdef DEBUG void token_print(struct token tok) { printf("%4lu ", tok.position); switch (tok.kind) { case TOK_IDENTIFIER: printf("TOK_IDENTIFIER:%s", tok.data.identifier); break; case TOK_KEYWORD: printf("TOK_KEYWORD:%s", KEYWORD_NAMES[tok.data.keyword]); break; case TOK_CONSTANT: switch (tok.data.constant_kind) { case CONST_INT: printf("TOK_CONSTANT:CONST_INT:%i", tok.data.constant_int_value); break; case CONST_STR: printf("TOK_CONSTANT:CONST_STR:%s", tok.data.constant_str_value); break; case CONST_CHAR: printf("TOK_CONSTANT:CONST_CHAR:%c", tok.data.constant_char_value); break; } break; case TOK_OPERATOR: printf("TOK_OPERATOR:%s", OPERATOR_NAMES[tok.data.operator] ); break; case TOK_SEPERATOR: printf("TOK_SEPERATOR:%s", SEPERATOR_NAMES[tok.data.seperator]); break; case TOK_ERROR: printf("TOK_ERROR:%s", tok.data.error_message); break; case TOK_END: printf("TOK_END"); break; } printf("\n"); } #endif struct node { enum node_kind { NO_BINOP, NO_UNOP, NO_MEMBER_ACCESS, NO_STRUCT, NO_UNION, NO_ENUM, NO_TYPE_PRIMITIVE, NO_TYPE_POINTER, NO_FUNCTION, NO_COMPOUND, NO_TR_UNIT, } kind; union node_data { struct node *type_pointer; enum node_primitive_type { PRIMTY_U64, PRIMTY_I32, PRIMTY_U8, } primitive_type; struct node_data_tr_unit { unsigned long num_elements; struct node **elements; } tr_unit; struct node_data_function { struct node *return_type; char *name; unsigned long num_arguments; struct function_argument { char *name; struct node *type; } *arguments; struct node *body; } function; struct node_data_compound { unsigned long num_statements; struct node **statements; } compount; struct node_data_binop { struct node *lhs; struct node *rhs; } binop; struct node_data_enum { char *name; char is_decl; unsigned long num_members; struct enum_member { int value; char *name; } *members; } enum_; } data; }; struct node *parse_enum_decl(int *p, struct token *tokens, struct error *error); struct node *parse_function(int *p, struct token *tokens, struct error *error); struct node *parse_type(int *p, struct token *tokens, struct error *error); struct node *parse_toplevel(int *p, struct token *tokens, struct error *error); struct node *parse(int *p, struct token *tokens, struct error *error) { int num_elements = 0; struct node **elements = NULL; while (tokens[*p].kind != TOK_END) { struct node *el = parse_toplevel(p, tokens, error); if (!el) return NULL; num_elements += 1; elements = realloc_failsafe(elements, num_elements * sizeof(struct node *)); elements[num_elements - 1] = el; } struct node *node = realloc_failsafe(NULL, sizeof(struct node)); node->kind = NO_TR_UNIT; node->data.tr_unit.elements = elements; node->data.tr_unit.num_elements = num_elements; return node; } struct node *parse_toplevel(int *p, struct token *tokens, struct error *error) { struct node *out; out = parse_type(p, tokens, error); if (out) return out; out = parse_function(p, tokens, error); if (out) return out; return NULL; } struct node *parse_type(int *p, struct token *tokens, struct error *error) { struct node *out; out = parse_enum_decl(p, tokens, error); if (out) return out; if (tokens[*p].kind == TOK_IDENTIFIER) { struct node *node = realloc_failsafe(NULL, sizeof(struct node)); node->kind = NO_TYPE_PRIMITIVE; node->data.primitive_type = PRIMTY_I32; } error->message = "type expected"; error->position = tokens[*p].position; return NULL; } struct node *parse_function(int *p, struct token *tokens, struct error *error) { struct node *return_type = parse_type(p, tokens, error); if (!return_type) return NULL; if (tokens[*p].kind != TOK_IDENTIFIER) return NULL; char *name = strdup_failsafe(tokens[*p].data.identifier); *p += 1; if (tokens[*p].kind != TOK_SEPERATOR || tokens[*p].data.seperator != SEP_LPAREN) return NULL; *p += 1; int num_arguments = 0; struct function_argument *arguments = NULL; while (1) { if (tokens[*p].kind == TOK_END) return NULL; if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_RPAREN) break; struct node *type = parse_type(p, tokens, error); if (tokens[*p].kind != TOK_IDENTIFIER) return NULL; char *name = strdup_failsafe(tokens->data.identifier); *p += 1; if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_COMMA) *p += 1; num_arguments += 1; arguments = realloc_failsafe( arguments, num_arguments * sizeof(struct function_argument)); arguments[num_arguments - 1].name = name; arguments[num_arguments - 1].type = type; } struct node *node = realloc_failsafe(NULL, sizeof(struct node)); node->kind = NO_FUNCTION; node->data.function.return_type = return_type; node->data.function.name = name; return node; } struct node *parse_enum_decl(int *p, struct token *tokens, struct error *error) { if (tokens[*p].kind != TOK_KEYWORD || tokens[*p].data.keyword != KW_ENUM) { error->message = "expected enum keyword"; error->position = tokens[*p].position; return NULL; } *p += 1; char *ident = NULL; if (tokens[*p].kind == TOK_IDENTIFIER) { ident = tokens[*p].data.identifier; *p += 1; } char is_decl = 0; struct enum_member *members = NULL; unsigned long num_members = 0; if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_LCURLY) { *p += 1; is_decl = 1; int value = 0; while (1) { if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_RCURLY) break; if (tokens[*p].kind == TOK_END) { error->message = "unexpected eof in enum decl"; error->position = tokens[*p].position; return NULL; } if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_COMMA) { *p += 1; continue; } if (tokens[*p].kind != TOK_IDENTIFIER) { error->message = "expected identifier in enum decl"; error->position = tokens[*p].position; return NULL; } char *m_ident = tokens[*p].data.identifier; if (tokens[*p].kind == TOK_OPERATOR && tokens[*p].data.operator== OP_ASSIGN) { *p += 1; if (tokens[*p].kind == TOK_END) { error->message = "unexpected eof in enum decl"; error->position = tokens[*p].position; return NULL; } if (tokens[*p].kind != TOK_CONSTANT || tokens[*p].data.constant_kind != CONST_INT) { error->message = "expected integer constant as enum value"; error->position = tokens[*p].position; return NULL; } value = tokens[*p].data.constant_int_value; *p += 1; } num_members++; members = realloc_failsafe(members, sizeof(struct enum_member) * num_members); struct enum_member *new_member = &members[num_members - 1]; new_member->value = value++; new_member->name = strdup_failsafe(m_ident); *p += 1; } } else if (tokens[*p].kind == TOK_SEPERATOR && tokens[*p].data.seperator == SEP_SEMICOLON) { } else { error->message = "expected semicolon or enum variants"; error->position = tokens[*p].position; return NULL; } struct node *node = realloc_failsafe(NULL, sizeof(struct node)); node->kind = NO_ENUM; node->data.enum_.is_decl = is_decl; node->data.enum_.name = strdup_failsafe(ident); node->data.enum_.num_members = num_members; node->data.enum_.members = members; return node; } #ifdef DEBUG void indent(int k) { for (int i = 0; i < k; i++) { printf(" "); } } void debug_node(struct node *node, int k) { switch (node->kind) { case NO_ENUM: indent(k); printf("NO_ENUM\n"); indent(k + 1); printf("name=%s\n", node->data.enum_.name); indent(k + 1); printf("is_decl=%i\n", node->data.enum_.is_decl); if (node->data.enum_.is_decl) { indent(k + 1); printf("num_members=%li\n", node->data.enum_.num_members); indent(k + 1); printf("members\n"); for (unsigned long i = 0; i < node->data.enum_.num_members; i++) { indent(k + 2); printf("%s=%i\n", node->data.enum_.members[i].name, node->data.enum_.members[i].value); } } break; case NO_BINOP: indent(k); printf("NO_BINOP\n"); indent(k + 1); printf("lhs: "); debug_node(node->data.binop.lhs, k + 2); printf("\n"); indent(k + 1); printf("rhs: "); debug_node(node->data.binop.rhs, k + 2); printf("\n"); break; case NO_MEMBER_ACCESS: printf("NO_MEMBER_ACCESS\n"); break; case NO_FUNCTION: printf("NO_FUNCTION\n"); break; case NO_STRUCT: printf("NO_STRUCT\n"); break; case NO_UNION: printf("NO_UNION\n"); break; case NO_UNOP: printf("NO_UNOP\n"); break; case NO_TR_UNIT: indent(k); printf("NO_TR_UNIT\n"); for (unsigned long i = 0; i < node->data.tr_unit.num_elements; i++) debug_node(node->data.tr_unit.elements[i], k + 1); default: break; } } #endif struct token_buffer { struct token tokens[4]; struct token_iter *iter; }; struct token_buffer token_buffer_new(struct token_iter *iter) { struct token_buffer buf; for (int i = 0; i < 4; i++) { buf.tokens[i] = token_iter_next(iter); if (buf.tokens[i].kind == TOK_END) break; } buf.iter = iter; return buf; } struct token token_buffer_next(struct token_buffer *buf) { struct token out = buf->tokens[0]; for (int i = 0; i < 3; i++) buf->tokens[i] = buf->tokens[i + 1]; buf->tokens[3] = token_iter_next(buf->iter); return out; } struct token *token_buffer_peek(struct token_buffer *buf, int off) { if (off < 4) return &buf->tokens[off]; fprintf(stderr, "peek too far\n"); exit(3); } int peek_type_size(struct token_buffer *buf) { struct token *a = token_buffer_peek(buf, 0); if (a->kind == TOK_KEYWORD && (a->data.keyword == KW_STRUCT || a->data.keyword == KW_ENUM || a->data.keyword == KW_UNION)) { struct token *b = token_buffer_peek(buf, 1); if (b->kind == TOK_IDENTIFIER) { return 2; } else { return -1; } } else if (a->kind == TOK_IDENTIFIER) { return 1; } else { return -1; } } void toplevel(struct token_buffer *buf) { int ts = peek_type_size(buf); if (ts >= 0) { struct token *a = token_buffer_peek(buf, ts); struct token *b = token_buffer_peek(buf, ts + 1); if (a->kind == TOK_SEPERATOR && a->data.seperator == SEP_LCURLY) { printf("type\n"); // likely type definition } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_SEPERATOR && b->data.seperator == SEP_LPAREN) { printf("function\n"); // likely function } else if (a->kind == TOK_IDENTIFIER && b->kind == TOK_OPERATOR && b->data.operator== OP_ASSIGN) { printf("global\n"); // likely global variable } } } int token_step(struct token_iter *iter, struct token *a) { token_free(*a); *a = token_iter_next(iter); return a->kind == TOK_END || a->kind == TOK_ERROR; } void toplevel2(struct token_iter *iter) { struct token a = token_iter_next(iter); if (a.kind == TOK_KEYWORD) { switch (a.data.keyword) { case KW_ENUM: a = token_iter_next(iter); if (a.kind == TOK_IDENTIFIER) { char *type_name = strdup(a.data.identifier); if (token_step(iter, &a)) return; } break; default: printf("unhandled keyword: "); token_print(a); exit(1); } } } #ifdef TEST char test_clean = 1; void assert(char cond, char *label) { if (!cond) { fprintf(stderr, "FAIL %s\n", label); test_clean = 0; } } void assert_eq(int a, int b, char *label) { if (a != b) { fprintf(stderr, "FAIL %s (%i == %i)\n", label, a, b); test_clean = 0; } } void test() { printf("sizeof(struct token) = %li\n", sizeof(struct token)); printf("sizeof(struct token_iter) = %li\n", sizeof(struct token_iter)); printf("sizeof(struct token_buffer) = %li\n", sizeof(struct token_buffer)); assert_eq(strlen("sizeof"), 6, "strlen 1"); assert_eq(strlen(""), 0, "strlen 2"); assert_eq(strlen("a"), 1, "strlen 3"); assert(is_alpha('a'), "alpha 1"); assert(is_alpha('z'), "alpha 2"); assert(is_alpha('k'), "alpha 3"); // TODO test tokenize } #endif int main(int argc, char **argv) { #ifdef TEST test(); return test_clean ? 0 : 1; #endif if (argc < 3) { fprintf(stderr, "USAGE:\n\tattocc \n"); return 1; } char *input = argv[1]; char *output = argv[2]; int input_fd = open(input, O_RDONLY | O_CLOEXEC); if (input_fd < 0) { perror("cannot open input"); return 1; } int output_fd = open(output, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0640); if (output_fd < 0) { perror("cannot open input"); return 1; } int source_len = 0; char *source = NULL; int size; char buffer[4096]; while ((size = read(input_fd, &buffer, 4096))) { if (size < 0) { perror("cannot read source"); return 1; } source_len += size; source = realloc_failsafe(source, source_len + 1); for (int i = 0; i < size; i++) source[source_len - size + i] = buffer[i]; } source[source_len] = '\0'; struct linemap linemap; struct token_iter iter; // struct token_buffer buf; struct token tok; linemap = linemap_new(source); iter = token_iter_new(source); // buf = token_buffer_new(&iter); // toplevel(&buf); // toplevel2(&iter); while (1) { tok = token_iter_next(&iter); #ifdef DEBUG token_print(tok); #endif if (tok.kind == TOK_END) break; token_free(tok); }; return 0; }