diff options
Diffstat (limited to 'attocc.c')
-rw-r--r-- | attocc.c | 124 |
1 files changed, 110 insertions, 14 deletions
@@ -4,8 +4,8 @@ #include <string.h> #include <unistd.h> -#define NUM_KEYWORDS 32 -static char *KEYWORDS[NUM_KEYWORDS] = { +const int NUM_KEYWORDS = 32; +static char *KEYWORDS[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", @@ -13,6 +13,8 @@ static char *KEYWORDS[NUM_KEYWORDS] = { "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"}; +static char *SEPERATORS = "()[]{};,:"; + enum keyword { kW_AUTO, KW_BREAK, @@ -47,6 +49,7 @@ enum keyword { KW_VOLATILE, KW_WHILE, }; + enum operator{ OP_ADD, OP_SUB, @@ -54,19 +57,37 @@ enum operator{ OP_DIV, OP_MOD, }; + +enum seperator { + SEP_LPAREN, + SEP_RPAREN, + SEP_LSQUARE, + SEP_RSQUARE, + SEP_LCURLY, + SEP_RCURLY, + SEP_SEMICOLON, + SEP_COMMA, + SEP_DOT, + SEP_COLOR, +}; + enum token_kind { - IDENTIFIER, - KEYWORD, - CONSTANT, - OPERATOR, - END, + TOK_IDENTIFIER, + TOK_KEYWORD, + TOK_CONSTANT, + TOK_SEPERATOR, + TOK_OPERATOR, + TOK_END, }; + union token_data { char *identifier; enum keyword keyword; int constant_value; + enum seperator seperator; enum operator operator; }; + struct token { enum token_kind kind; union token_data data; @@ -76,9 +97,33 @@ struct token *tokenize(char *source) { char *end = source + strlen(source); char *p = source; int num_tokens = 0; - struct token *tokens; + struct token *tokens = NULL; while (*p) { int remaining = end - p; + // printf("tok %c\n", p[0]); + + //* whitespace + if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') { + p++; + continue; + } + + //* seperators + for (int i = 0; SEPERATORS[i]; i++) { + if (p[0] == SEPERATORS[i]) { + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) { + fprintf(stderr, "realloc failed\n"); + return NULL; + } + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_SEPERATOR; + new_token->data.seperator = i; + } + } + + //* operators //* comments if (remaining >= 2) { @@ -94,37 +139,84 @@ struct token *tokenize(char *source) { while ((d = c, c = *p++) && c == '/' && d == '*') ; continue; + } else if (p[0] == '#') { + p += 1; + char c; + while ((c = *p++) && c != '\n') + ; + continue; } } //* keyword + char match = 0; for (int i = 0; i < NUM_KEYWORDS; i++) { char *kw = KEYWORDS[i]; if (remaining >= strlen(kw)) { - if (strcmp(kw, p) == 0) { + if (strncmp(kw, p, strlen(kw)) == 0) { num_tokens += 1; - tokens = realloc(tokens, sizeof(struct token) * num_tokens); if (!tokens) { fprintf(stderr, "realloc failed\n"); return NULL; } struct token *new_token = &tokens[num_tokens - 1]; - new_token->kind = KEYWORD; + new_token->kind = TOK_KEYWORD; new_token->data.keyword = i; + // printf("kw match %i %li\n", i, strlen(kw)); p += strlen(kw); - continue; + match = 1; + break; } } } + if (match) { + continue; + } + + fprintf(stderr, "unknown token at %li\n", p - source); + printf("%s", p); + return NULL; + } - fprintf(stderr, "unknown token\n"); + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) { + fprintf(stderr, "realloc failed\n"); return NULL; } + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_END; + return tokens; } +void debug_tokens(struct token *tokens) { + for (int i = 0; tokens[i].kind != TOK_END; i++) { + switch (tokens[i].kind) { + case TOK_IDENTIFIER: + printf("TOK_IDENTIFIER, "); + break; + case TOK_KEYWORD: + printf("TOK_KEYWORD, "); + break; + case TOK_CONSTANT: + printf("TOK_CONSTANT, "); + break; + case TOK_OPERATOR: + printf("TOK_OPERATOR, "); + break; + case TOK_SEPERATOR: + printf("TOK_SEPERATOR, "); + break; + case TOK_END: + break; + } + } + printf("TOK_END\n"); +} + int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "USAGE:\n\tattocc <input> <output>\n"); @@ -169,7 +261,11 @@ int main(int argc, char **argv) { } source[source_len] = '\0'; - printf("%s", source); + struct token *tokens = tokenize(source); + if (!tokens) + return 1; + debug_tokens(tokens); + free(tokens); return 0; } |