summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2024-06-15 01:22:11 +0200
committermetamuffin <metamuffin@disroot.org>2024-06-15 01:22:11 +0200
commitfdedfa4b7e08263ce00e5c0b42b98ab249d5582b (patch)
tree2999cec0cb50ae883562e909b0b8526d2609f446
parentb73e0a0a8cf4ad38defd789c137fe74b5dd1d496 (diff)
downloadattocc-fdedfa4b7e08263ce00e5c0b42b98ab249d5582b.tar
attocc-fdedfa4b7e08263ce00e5c0b42b98ab249d5582b.tar.bz2
attocc-fdedfa4b7e08263ce00e5c0b42b98ab249d5582b.tar.zst
properly parse seps
-rw-r--r--attocc.c165
1 files changed, 129 insertions, 36 deletions
diff --git a/attocc.c b/attocc.c
index 74ae7e0..57c934a 100644
--- a/attocc.c
+++ b/attocc.c
@@ -39,9 +39,60 @@ static char *OPERATORS[] = {
">=", "||", "&&", "+", "-", "*", "/", "%", "<",
">", "~", "&", "|", "^", "!", "&", "*", "=",
};
+static char *OPERATOR_NAMES[] = {
+ "OP_SIZEOF",
+ "OP_SHIFT_LEFT",
+ "OP_SHIFT_RIGHT",
+ "OP_ADD_ASSIGN",
+ "OP_SUB_ASSIGN",
+ "OP_MUL_ASSIGN",
+ "OP_DIV_ASSIGN",
+ "OP_MOD_ASSIGN",
+ "OP_SHIFT_LEFT_ASSIGN",
+ "OP_SHIFT_RIGHT_ASSIGN",
+ "OP_BITWISE_AND_ASSIGN",
+ "OP_BITWISE_OR_ASSIGN",
+ "OP_BITWISE_XOR_ASSIGN",
+ "OP_INCREMENT",
+ "OP_DECREMENT",
+ "OP_EQUAL",
+ "OP_NOT_EQUAL",
+ "OP_LESS_EQUAL",
+ "OP_GREATER_EQUAL",
+ "OP_LOGICAL_OR",
+ "OP_LOGICAL_AND",
+ "OP_ADD",
+ "OP_SUB",
+ "OP_MUL",
+ "OP_DIV",
+ "OP_MOD",
+ "OP_LESS",
+ "OP_GREATER",
+ "OP_BITWISE_NOT",
+ "OP_BITWISE_AND",
+ "OP_BITWISE_OR",
+ "OP_BITWISE_XOR",
+ "OP_LOGICAL_NOT",
+ "OP_POINTER_REF",
+ "OP_POINTER_DEREF",
+ "OP_ASSIGN",
+};
+static char *KEYWORD_NAMES[] = {
+ "KW_AUTO", "KW_BREAK", "KW_CASE", "KW_CHAR", "KW_CONST",
+ "KW_CONTINUE", "KW_DEFAULT", "KW_DO", "KW_DOUBLE", "KW_ELSE",
+ "KW_ENUM", "KW_EXTERN", "KW_FLOAT", "KW_FOR", "KW_GOTO",
+ "KW_IF", "KW_INT", "KW_LONG", "KW_REGISTER", "KW_RETURN",
+ "KW_SHORT", "KW_SIGNED", "KW_SIZEOF", "KW_STATIC", "KW_STRUCT",
+ "KW_SWITCH", "KW_TYPEDEF", "KW_UNION", "KW_UNSIGNED", "KW_VOID",
+ "KW_VOLATILE", "KW_WHILE",
+};
+static char *SEPERATOR_NAMES[] = {
+ "SEP_LPAREN", "SEP_RPAREN", "SEP_LSQUARE", "SEP_RSQUARE", "SEP_LCURLY",
+ "SEP_RCURLY", "SEP_SEMICOLON", "SEP_COMMA", "SEP_DOT", "SEP_COLOR",
+};
enum keyword {
- kW_AUTO,
+ KW_AUTO,
KW_BREAK,
KW_CASE,
KW_CHAR,
@@ -130,7 +181,7 @@ enum seperator {
enum token_kind {
TOK_IDENTIFIER,
TOK_KEYWORD,
- TOK_CONSTANT,
+ TOK_INT_CONSTANT,
TOK_SEPERATOR,
TOK_OPERATOR,
TOK_END,
@@ -149,7 +200,12 @@ struct token {
union token_data data;
};
-char is_numeric(char c) { return (c >= '0' && c <= '9'); }
+char is_numeric(char c) { return c >= '0' && c <= '9'; }
+char is_octal(char c) { return c >= '0' && c <= '7'; }
+char is_hexadecial(char c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f');
+}
char is_alpha(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
@@ -162,7 +218,6 @@ struct token *tokenize(char *source) {
struct token *tokens = NULL;
while (*p) {
unsigned long remaining = end - p;
- // printf("tok %c\n", p[0]);
//* whitespace
if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') {
@@ -170,9 +225,31 @@ struct token *tokenize(char *source) {
continue;
}
+ //* comments
+ if (remaining >= 2) {
+ if (p[0] == '/' && p[1] == '/') {
+ p += 2;
+ for (char c; (c = *p++) && c != '\n';)
+ ;
+ continue;
+ } else if (p[0] == '/' && p[1] == '*') {
+ p += 2;
+ for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';)
+ ;
+ continue;
+ } else if (p[0] == '#') {
+ p += 1;
+ for (char c; (c = *p++) && c != '\n';)
+ ;
+ continue;
+ }
+ }
+
//* seperators
+ char match = 0;
for (int i = 0; SEPERATORS[i]; i++) {
if (p[0] == SEPERATORS[i]) {
+ p++;
num_tokens += 1;
tokens = realloc(tokens, sizeof(struct token) * num_tokens);
if (!tokens) {
@@ -182,11 +259,15 @@ struct token *tokenize(char *source) {
struct token *new_token = &tokens[num_tokens - 1];
new_token->kind = TOK_SEPERATOR;
new_token->data.seperator = i;
+ match = 1;
+ break;
}
}
+ if (match)
+ continue;
//* operators
- char match = 0;
+ match = 0;
for (int i = 0; i < NUM_OPERATORS; i++) {
char *op = OPERATORS[i];
if (remaining >= strlen(op)) {
@@ -207,29 +288,8 @@ struct token *tokenize(char *source) {
}
}
}
- if (match) {
+ if (match)
continue;
- }
-
- //* comments
- if (remaining >= 2) {
- if (p[0] == '/' && p[1] == '/') {
- p += 2;
- for (char c; (c = *p++) && c != '\n';)
- ;
- continue;
- } else if (p[0] == '/' && p[1] == '*') {
- p += 2;
- for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';)
- ;
- continue;
- } else if (p[0] == '#') {
- p += 1;
- for (char c; (c = *p++) && c != '\n';)
- ;
- continue;
- }
- }
//* keyword
match = 0;
@@ -254,7 +314,40 @@ struct token *tokenize(char *source) {
}
}
}
- if (match) {
+ if (match)
+ continue;
+
+ if (is_numeric(p[0])) {
+ int value = 0;
+ if (remaining >= 2 && p[1] == 'x') {
+ p += 2;
+ for (char c; (c = *p++) && is_hexadecial(c);) {
+ value *= 0x10;
+ value += c <= '9' ? c - '0' : 10 + (c <= 'F' ? c - 'A' : c - 'a');
+ }
+ } else if (p[0] == '0') {
+ p += 1;
+ for (char c; (c = *p++) && is_octal(c);) {
+ value *= 010;
+ value += c - '0';
+ }
+ } else {
+ for (char c; (c = *p++) && is_numeric(c);) {
+ value *= 10;
+ value += c - '0';
+ }
+ }
+ p--;
+
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_INT_CONSTANT;
+ new_token->data.constant_value = value;
continue;
}
@@ -263,6 +356,7 @@ struct token *tokenize(char *source) {
p++;
for (char c; (c = *p++) && is_alphanumeric(c);)
;
+ p--;
int ident_len = p - ident_start - 1;
char *ident_str = malloc(ident_len + 1);
if (!ident_str) {
@@ -270,7 +364,7 @@ struct token *tokenize(char *source) {
return NULL;
}
for (int i = 0; i < ident_len; i++)
- ident_str[i] = p[i];
+ ident_str[i] = ident_start[i];
ident_str[ident_len] = '\0';
num_tokens += 1;
@@ -287,7 +381,6 @@ struct token *tokenize(char *source) {
}
fprintf(stderr, "unknown token at %li\n", p - source);
- printf("%s", p);
return NULL;
}
@@ -307,19 +400,19 @@ void debug_tokens(struct token *tokens) {
for (int i = 0; tokens[i].kind != TOK_END; i++) {
switch (tokens[i].kind) {
case TOK_IDENTIFIER:
- printf("TOK_IDENTIFIER, ");
+ printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier);
break;
case TOK_KEYWORD:
- printf("TOK_KEYWORD, ");
+ printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tokens[i].data.keyword]);
break;
- case TOK_CONSTANT:
- printf("TOK_CONSTANT, ");
+ case TOK_INT_CONSTANT:
+ printf("TOK_CONSTANT:%i, ", tokens[i].data.constant_value);
break;
case TOK_OPERATOR:
- printf("TOK_OPERATOR, ");
+ printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tokens[i].data.operator] );
break;
case TOK_SEPERATOR:
- printf("TOK_SEPERATOR, ");
+ printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tokens[i].data.seperator]);
break;
case TOK_END:
break;