summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--attocc.c134
1 files changed, 122 insertions, 12 deletions
diff --git a/attocc.c b/attocc.c
index d390900..74ae7e0 100644
--- a/attocc.c
+++ b/attocc.c
@@ -1,3 +1,20 @@
+/*
+ attocc - A minimal C compiler.
+ Copyright (C) 2024 metamuffin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -15,6 +32,14 @@ static char *KEYWORDS[] = {
static char *SEPERATORS = "()[]{};,:";
+const int NUM_OPERATORS = 36;
+static char *OPERATORS[] = {
+ "sizeof", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "<<=",
+ ">>=", "&=", "|=", "^=", "++", "--", "==", "!=", "<=",
+ ">=", "||", "&&", "+", "-", "*", "/", "%", "<",
+ ">", "~", "&", "|", "^", "!", "&", "*", "=",
+};
+
enum keyword {
kW_AUTO,
KW_BREAK,
@@ -51,11 +76,42 @@ enum keyword {
};
enum operator{
- OP_ADD,
+ OP_SIZEOF, // len=6
+ OP_SHIFT_LEFT, // len=2
+ OP_SHIFT_RIGHT,
+ OP_ADD_ASSIGN,
+ OP_SUB_ASSIGN,
+ OP_MUL_ASSIGN,
+ OP_DIV_ASSIGN,
+ OP_MOD_ASSIGN,
+ OP_SHIFT_LEFT_ASSIGN,
+ OP_SHIFT_RIGHT_ASSIGN,
+ OP_BITWISE_AND_ASSIGN,
+ OP_BITWISE_OR_ASSIGN,
+ OP_BITWISE_XOR_ASSIGN,
+ OP_INCREMENT,
+ OP_DECREMENT,
+ OP_EQUAL,
+ OP_NOT_EQUAL,
+ OP_LESS_EQUAL,
+ OP_GREATER_EQUAL,
+ OP_LOGICAL_OR,
+ OP_LOGICAL_AND,
+ OP_ADD, // len=1
OP_SUB,
OP_MUL,
OP_DIV,
OP_MOD,
+ OP_LESS,
+ OP_GREATER,
+ OP_BITWISE_NOT,
+ OP_BITWISE_AND,
+ OP_BITWISE_OR,
+ OP_BITWISE_XOR,
+ OP_LOGICAL_NOT,
+ OP_POINTER_REF,
+ OP_POINTER_DEREF,
+ OP_ASSIGN,
};
enum seperator {
@@ -93,13 +149,19 @@ struct token {
union token_data data;
};
+char is_numeric(char c) { return (c >= '0' && c <= '9'); }
+char is_alpha(char c) {
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+char is_alphanumeric(char c) { return is_alpha(c) || is_numeric(c); }
+
struct token *tokenize(char *source) {
char *end = source + strlen(source);
char *p = source;
- int num_tokens = 0;
+ unsigned long num_tokens = 0;
struct token *tokens = NULL;
while (*p) {
- int remaining = end - p;
+ unsigned long remaining = end - p;
// printf("tok %c\n", p[0]);
//* whitespace
@@ -124,32 +186,53 @@ struct token *tokenize(char *source) {
}
//* operators
+ char match = 0;
+ for (int i = 0; i < NUM_OPERATORS; i++) {
+ char *op = OPERATORS[i];
+ if (remaining >= strlen(op)) {
+ if (strncmp(op, p, strlen(op)) == 0) {
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_OPERATOR;
+ new_token->data.operator= i;
+
+ p += strlen(op);
+ match = 1;
+ break;
+ }
+ }
+ }
+ if (match) {
+ continue;
+ }
//* comments
if (remaining >= 2) {
if (p[0] == '/' && p[1] == '/') {
p += 2;
- char c;
- while ((c = *p++) && c != '\n')
+ for (char c; (c = *p++) && c != '\n';)
;
continue;
} else if (p[0] == '/' && p[1] == '*') {
p += 2;
- char c, d;
- while ((d = c, c = *p++) && c == '/' && d == '*')
+ for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';)
;
continue;
} else if (p[0] == '#') {
p += 1;
- char c;
- while ((c = *p++) && c != '\n')
+ for (char c; (c = *p++) && c != '\n';)
;
continue;
}
}
//* keyword
- char match = 0;
+ match = 0;
for (int i = 0; i < NUM_KEYWORDS; i++) {
char *kw = KEYWORDS[i];
if (remaining >= strlen(kw)) {
@@ -175,6 +258,34 @@ struct token *tokenize(char *source) {
continue;
}
+ if (is_alpha(p[0])) {
+ char *ident_start = p;
+ p++;
+ for (char c; (c = *p++) && is_alphanumeric(c);)
+ ;
+ int ident_len = p - ident_start - 1;
+ char *ident_str = malloc(ident_len + 1);
+ if (!ident_str) {
+ fprintf(stderr, "malloc failed\n");
+ return NULL;
+ }
+ for (int i = 0; i < ident_len; i++)
+ ident_str[i] = p[i];
+ ident_str[ident_len] = '\0';
+
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_IDENTIFIER;
+ new_token->data.identifier = ident_str;
+
+ continue;
+ }
+
fprintf(stderr, "unknown token at %li\n", p - source);
printf("%s", p);
return NULL;
@@ -255,9 +366,8 @@ int main(int argc, char **argv) {
return 1;
}
- for (int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++)
source[source_len - size + i] = buffer[i];
- }
}
source[source_len] = '\0';