summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--attocc.c279
1 files changed, 132 insertions, 147 deletions
diff --git a/attocc.c b/attocc.c
index 86fbd3f..785c5d1 100644
--- a/attocc.c
+++ b/attocc.c
@@ -194,6 +194,7 @@ enum token_kind {
TOK_CONSTANT,
TOK_SEPERATOR,
TOK_OPERATOR,
+ TOK_ERROR,
};
enum constant_kind {
@@ -215,6 +216,7 @@ union token_data {
};
enum seperator seperator;
enum operator operator;
+ char *error_message;
};
struct token {
@@ -312,34 +314,40 @@ struct error {
unsigned long position;
};
-unsigned long *build_linemap(char *source) {
+struct linemap {
+ unsigned long *lines;
+};
+
+struct linemap linemap_new(char *source) {
+ struct linemap lm;
unsigned long i = 0;
- unsigned long *lines = realloc_failsafe(NULL, sizeof(unsigned long));
+ lm.lines = realloc_failsafe(NULL, sizeof(unsigned long));
int num_lines = 1;
- lines[0] = 0;
+ lm.lines[0] = 0;
while (source[i]) {
if (source[i] == '\n') {
num_lines += 1;
- lines = realloc_failsafe(lines, num_lines * sizeof(unsigned long));
- lines[num_lines - 1] = i;
+ lm.lines = realloc_failsafe(lm.lines, num_lines * sizeof(unsigned long));
+ lm.lines[num_lines - 1] = i;
}
i += 1;
}
- lines = realloc_failsafe(lines, (num_lines + 1) * sizeof(unsigned long));
- lines[num_lines] = 0xffffffff;
- return lines;
+ lm.lines =
+ realloc_failsafe(lm.lines, (num_lines + 1) * sizeof(unsigned long));
+ lm.lines[num_lines] = 0xffffffff;
+ return lm;
}
-int find_line(unsigned long *linemap, unsigned long position) {
+int linemap_find(struct linemap *lm, unsigned long position) {
int line = 0;
- while (linemap[line] < position)
+ while (lm->lines[line] < position)
line += 1;
return line;
}
-void print_error(struct error error, char *filename, unsigned long *linemap) {
- int line = find_line(linemap, error.position);
- int column = error.position - linemap[line];
+void print_error(struct error error, char *filename, struct linemap *linemap) {
+ int line = linemap_find(linemap, error.position);
+ int column = error.position - linemap->lines[line];
if (!error.message)
error.message = "<no message>";
printf("error: %s\n", error.message);
@@ -352,14 +360,37 @@ struct token *token_push(struct token **tokens, unsigned long *num_tokens) {
return &(*tokens)[*num_tokens - 1];
}
-struct token *tokenize(char *source) {
- char *end = source + strlen(source);
- char *p = source;
- unsigned long num_tokens = 0;
- struct token *tokens = NULL;
+struct token_iter {
+ char *end;
+ char *start;
+ char *p;
+};
+
+struct token_iter token_iter_new(char *source) {
+ struct token_iter iter;
+ iter.end = source + strlen(source);
+ iter.start = source;
+ iter.p = source;
+ return iter;
+}
+
+struct token t_error(char *message) {
+ struct token t;
+ t.kind = TOK_ERROR;
+ t.data.error_message = message;
+ return t;
+}
+
+struct token token_iter_next(struct token_iter *iter) {
+ char *p = iter->p;
+ unsigned long remaining = iter->end - p;
+ unsigned long position = p - iter->start;
+
+ struct token tok;
+ tok.kind = TOK_END;
+
while (*p) {
- unsigned long remaining = end - p;
- unsigned long position = p - source;
+ tok.position = position;
//* whitespace
if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') {
@@ -392,18 +423,14 @@ struct token *tokenize(char *source) {
for (int i = 0; SEPERATORS[i]; i++) {
if (p[0] == SEPERATORS[i]) {
p++;
- struct token *new_token = token_push(&tokens, &num_tokens);
- if (!new_token)
- return NULL;
- new_token->kind = TOK_SEPERATOR;
- new_token->data.seperator = i;
- new_token->position = position;
+ tok.kind = TOK_SEPERATOR;
+ tok.data.seperator = i;
match = 1;
break;
}
}
if (match)
- continue;
+ break;
//* operators
match = 0;
@@ -411,12 +438,8 @@ struct token *tokenize(char *source) {
char *op = OPERATORS[i];
if (remaining >= strlen(op)) {
if (strncmp(op, p, strlen(op)) == 0) {
- struct token *new_token = token_push(&tokens, &num_tokens);
- if (!new_token)
- return NULL;
- new_token->kind = TOK_OPERATOR;
- new_token->data.operator= i;
- new_token->position = position;
+ tok.kind = TOK_OPERATOR;
+ tok.data.operator= i;
p += strlen(op);
match = 1;
@@ -425,7 +448,7 @@ struct token *tokenize(char *source) {
}
}
if (match)
- continue;
+ break;
//* keyword
match = 0;
@@ -433,13 +456,9 @@ struct token *tokenize(char *source) {
char *kw = KEYWORDS[i];
if (remaining >= strlen(kw) + 1) {
if (strncmp(kw, p, strlen(kw)) == 0 && !is_ident(p[strlen(kw)])) {
- struct token *new_token = token_push(&tokens, &num_tokens);
- if (!new_token)
- return NULL;
- new_token->kind = TOK_KEYWORD;
- new_token->data.keyword = i;
- new_token->position = position;
+ tok.kind = TOK_KEYWORD;
+ tok.data.keyword = i;
p += strlen(kw);
match = 1;
break;
@@ -447,7 +466,7 @@ struct token *tokenize(char *source) {
}
}
if (match)
- continue;
+ break;
//* number constant
if (is_numeric(p[0])) {
@@ -472,14 +491,10 @@ struct token *tokenize(char *source) {
}
p--;
- struct token *new_token = token_push(&tokens, &num_tokens);
- if (!new_token)
- return NULL;
- new_token->kind = TOK_CONSTANT;
- new_token->data.constant_kind = CONST_INT;
- new_token->data.constant_int_value = value;
- new_token->position = position;
- continue;
+ tok.kind = TOK_CONSTANT;
+ tok.data.constant_kind = CONST_INT;
+ tok.data.constant_int_value = value;
+ break;
}
//* string constant
@@ -498,35 +513,31 @@ struct token *tokenize(char *source) {
str = realloc_failsafe(str, str_len);
str[str_len - 1] = '\0';
- struct token *new_token = token_push(&tokens, &num_tokens);
- new_token->kind = TOK_CONSTANT;
- new_token->data.constant_kind = CONST_STR;
- new_token->data.constant_str_value = str;
- new_token->position = position;
- continue;
+ tok.kind = TOK_CONSTANT;
+ tok.data.constant_kind = CONST_STR;
+ tok.data.constant_str_value = str;
+ break;
}
//* char constant
if (p[0] == '\'') {
if (!*p++)
- return NULL;
+ return t_error("eof");
char chr = p[0];
if (p[0] == '\\') {
if (!*p++)
- return NULL;
+ return t_error("eof");
chr = map_escape(p[0]);
}
if (!*p++)
- return NULL;
+ return t_error("eof");
if (*p++ != '\'')
- return fprintf(stderr, "expected '\n"), NULL;
+ return t_error("expected '\n");
- struct token *new_token = token_push(&tokens, &num_tokens);
- new_token->kind = TOK_CONSTANT;
- new_token->data.constant_kind = CONST_CHAR;
- new_token->data.constant_char_value = chr;
- new_token->position = position;
- continue;
+ tok.kind = TOK_CONSTANT;
+ tok.data.constant_kind = CONST_CHAR;
+ tok.data.constant_char_value = chr;
+ break;
}
//* identifier
@@ -541,83 +552,69 @@ struct token *tokenize(char *source) {
ident_str[i] = ident_start[i];
ident_str[ident_len] = '\0';
- struct token *new_token = token_push(&tokens, &num_tokens);
- new_token->kind = TOK_IDENTIFIER;
- new_token->data.identifier = ident_str;
- new_token->position = position;
- continue;
+ tok.kind = TOK_IDENTIFIER;
+ tok.data.identifier = ident_str;
+ break;
}
- fprintf(stderr, "unknown token at %li\n", p - source);
- return NULL;
+ return t_error("unknown token");
}
- struct token *new_token = token_push(&tokens, &num_tokens);
- new_token->kind = TOK_END;
- new_token->position = 0;
- return tokens;
+ iter->p = p;
+ return tok;
}
-void free_tokens(struct token *tokens) {
- for (int i = 0; tokens[i].kind != TOK_END; i++) {
- switch (tokens[i].kind) {
- case TOK_IDENTIFIER:
- free(tokens[i].data.identifier);
+void token_free(struct token tok) {
+ switch (tok.kind) {
+ case TOK_IDENTIFIER:
+ free(tok.data.identifier);
+ break;
+ case TOK_CONSTANT:
+ switch (tok.data.constant_kind) {
+ case CONST_STR:
+ free(tok.data.constant_str_value);
break;
- case TOK_CONSTANT:
- switch (tokens[i].data.constant_kind) {
- case CONST_STR:
- free(tokens[i].data.constant_str_value);
- break;
- default:
- break;
- }
default:
break;
}
+ default:
+ break;
}
- free(tokens);
}
#ifdef DEBUG
-void debug_tokens(struct token *tokens) {
- for (int i = 0; tokens[i].kind != TOK_END; i++) {
- printf("%4lu ", tokens[i].position);
- switch (tokens[i].kind) {
- case TOK_IDENTIFIER:
- printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier);
- break;
- case TOK_KEYWORD:
- printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tokens[i].data.keyword]);
- break;
- case TOK_CONSTANT:
- switch (tokens[i].data.constant_kind) {
- case CONST_INT:
- printf("TOK_CONSTANT:CONST_INT:%i, ",
- tokens[i].data.constant_int_value);
- break;
- case CONST_STR:
- printf("TOK_CONSTANT:CONST_STR:%s, ",
- tokens[i].data.constant_str_value);
- break;
- case CONST_CHAR:
- printf("TOK_CONSTANT:CONST_CHAR:%c, ",
- tokens[i].data.constant_char_value);
- break;
- }
- break;
- case TOK_OPERATOR:
- printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tokens[i].data.operator] );
+void token_print(struct token tok) {
+ printf("%4lu ", tok.position);
+ switch (tok.kind) {
+ case TOK_IDENTIFIER:
+ printf("TOK_IDENTIFIER:%s, ", tok.data.identifier);
+ break;
+ case TOK_KEYWORD:
+ printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tok.data.keyword]);
+ break;
+ case TOK_CONSTANT:
+ switch (tok.data.constant_kind) {
+ case CONST_INT:
+ printf("TOK_CONSTANT:CONST_INT:%i, ", tok.data.constant_int_value);
break;
- case TOK_SEPERATOR:
- printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tokens[i].data.seperator]);
+ case CONST_STR:
+ printf("TOK_CONSTANT:CONST_STR:%s, ", tok.data.constant_str_value);
break;
- case TOK_END:
+ case CONST_CHAR:
+ printf("TOK_CONSTANT:CONST_CHAR:%c, ", tok.data.constant_char_value);
break;
}
- printf("\n");
+ break;
+ case TOK_OPERATOR:
+ printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tok.data.operator] );
+ break;
+ case TOK_SEPERATOR:
+ printf("TOK_SEPERATOR:%s, ", SEPERATOR_NAMES[tok.data.seperator]);
+ break;
+ case TOK_END:
+ break;
}
- printf("TOK_END\n");
+ printf("\n");
}
#endif
@@ -1016,36 +1013,24 @@ int main(int argc, char **argv) {
}
source[source_len] = '\0';
-#ifdef DEBUG
- printf("%i bytes loaded\n", source_len);
- printf("\n=== TOKENIZE ===\n");
-#endif
+ struct linemap linemap;
+ struct token_iter iter;
+ struct token tok;
- struct token *tokens = tokenize(source);
- if (!tokens)
- return 1;
+ linemap = linemap_new(source);
+ iter = token_iter_new(source);
-#ifdef DEBUG
- debug_tokens(tokens);
- printf("\n=== PARSE ===\n");
-#endif
-
- int p = 0;
- struct error error;
- error.message = NULL;
- error.position = 0;
- struct node *node = parse(&p, tokens, &error);
- if (!node) {
- unsigned long *linemap = build_linemap(source);
- print_error(error, input, linemap);
- return 1;
- }
+ while (1) {
+ tok = token_iter_next(&iter);
+ if (tok.kind == TOK_END)
+ break;
#ifdef DEBUG
- debug_node(node, 0);
+ token_print(tok);
#endif
- free_tokens(tokens);
+ token_free(tok);
+ };
return 0;
}