diff options
author | metamuffin <metamuffin@disroot.org> | 2024-06-15 00:58:02 +0200 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2024-06-15 00:58:02 +0200 |
commit | b73e0a0a8cf4ad38defd789c137fe74b5dd1d496 (patch) | |
tree | 5490b75f8bb07dc0a63fac833d3da955527ccadb /attocc.c | |
parent | b45a8709d263a87f55fedbc62fac1468320fe035 (diff) | |
download | attocc-b73e0a0a8cf4ad38defd789c137fe74b5dd1d496.tar attocc-b73e0a0a8cf4ad38defd789c137fe74b5dd1d496.tar.bz2 attocc-b73e0a0a8cf4ad38defd789c137fe74b5dd1d496.tar.zst |
lex operators and idents
Diffstat (limited to 'attocc.c')
-rw-r--r-- | attocc.c | 134 |
1 files changed, 122 insertions, 12 deletions
@@ -1,3 +1,20 @@ +/* + attocc - A minimal C compiler. + Copyright (C) 2024 metamuffin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. +*/ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -15,6 +32,14 @@ static char *KEYWORDS[] = { static char *SEPERATORS = "()[]{};,:"; +const int NUM_OPERATORS = 36; +static char *OPERATORS[] = { + "sizeof", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "<<=", + ">>=", "&=", "|=", "^=", "++", "--", "==", "!=", "<=", + ">=", "||", "&&", "+", "-", "*", "/", "%", "<", + ">", "~", "&", "|", "^", "!", "&", "*", "=", +}; + enum keyword { kW_AUTO, KW_BREAK, @@ -51,11 +76,42 @@ enum keyword { }; enum operator{ - OP_ADD, + OP_SIZEOF, // len=6 + OP_SHIFT_LEFT, // len=2 + OP_SHIFT_RIGHT, + OP_ADD_ASSIGN, + OP_SUB_ASSIGN, + OP_MUL_ASSIGN, + OP_DIV_ASSIGN, + OP_MOD_ASSIGN, + OP_SHIFT_LEFT_ASSIGN, + OP_SHIFT_RIGHT_ASSIGN, + OP_BITWISE_AND_ASSIGN, + OP_BITWISE_OR_ASSIGN, + OP_BITWISE_XOR_ASSIGN, + OP_INCREMENT, + OP_DECREMENT, + OP_EQUAL, + OP_NOT_EQUAL, + OP_LESS_EQUAL, + OP_GREATER_EQUAL, + OP_LOGICAL_OR, + OP_LOGICAL_AND, + OP_ADD, // len=1 OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_LESS, + OP_GREATER, + OP_BITWISE_NOT, + OP_BITWISE_AND, + OP_BITWISE_OR, + OP_BITWISE_XOR, + OP_LOGICAL_NOT, + OP_POINTER_REF, + OP_POINTER_DEREF, + OP_ASSIGN, }; enum seperator { @@ -93,13 +149,19 @@ struct token { union token_data data; }; +char is_numeric(char c) { return (c >= '0' && c <= '9'); } +char is_alpha(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} +char is_alphanumeric(char c) { return is_alpha(c) || is_numeric(c); } + struct token *tokenize(char *source) { char *end = source + strlen(source); char *p = source; - int num_tokens = 0; + unsigned long num_tokens = 0; struct token *tokens = NULL; while (*p) { - int remaining = end - p; + unsigned long remaining = end - p; // printf("tok %c\n", p[0]); //* whitespace @@ -124,32 +186,53 @@ struct token *tokenize(char *source) { } //* operators + char match = 0; + for (int i = 0; i < NUM_OPERATORS; i++) { + char *op = OPERATORS[i]; + if (remaining >= strlen(op)) { + if (strncmp(op, p, strlen(op)) == 0) { + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) { + fprintf(stderr, "realloc failed\n"); + return NULL; + } + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_OPERATOR; + new_token->data.operator= i; + + p += strlen(op); + match = 1; + break; + } + } + } + if (match) { + continue; + } //* comments if (remaining >= 2) { if (p[0] == '/' && p[1] == '/') { p += 2; - char c; - while ((c = *p++) && c != '\n') + for (char c; (c = *p++) && c != '\n';) ; continue; } else if (p[0] == '/' && p[1] == '*') { p += 2; - char c, d; - while ((d = c, c = *p++) && c == '/' && d == '*') + for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';) ; continue; } else if (p[0] == '#') { p += 1; - char c; - while ((c = *p++) && c != '\n') + for (char c; (c = *p++) && c != '\n';) ; continue; } } //* keyword - char match = 0; + match = 0; for (int i = 0; i < NUM_KEYWORDS; i++) { char *kw = KEYWORDS[i]; if (remaining >= strlen(kw)) { @@ -175,6 +258,34 @@ struct token *tokenize(char *source) { continue; } + if (is_alpha(p[0])) { + char *ident_start = p; + p++; + for (char c; (c = *p++) && is_alphanumeric(c);) + ; + int ident_len = p - ident_start - 1; + char *ident_str = malloc(ident_len + 1); + if (!ident_str) { + fprintf(stderr, "malloc failed\n"); + return NULL; + } + for (int i = 0; i < ident_len; i++) + ident_str[i] = p[i]; + ident_str[ident_len] = '\0'; + + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) { + fprintf(stderr, "realloc failed\n"); + return NULL; + } + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_IDENTIFIER; + new_token->data.identifier = ident_str; + + continue; + } + fprintf(stderr, "unknown token at %li\n", p - source); printf("%s", p); return NULL; @@ -255,9 +366,8 @@ int main(int argc, char **argv) { return 1; } - for (int i = 0; i < size; i++) { + for (int i = 0; i < size; i++) source[source_len - size + i] = buffer[i]; - } } source[source_len] = '\0'; |