#include #include #include #include #include const int NUM_KEYWORDS = 32; static char *KEYWORDS[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"}; static char *SEPERATORS = "()[]{};,:"; enum keyword { kW_AUTO, KW_BREAK, KW_CASE, KW_CHAR, KW_CONST, KW_CONTINUE, KW_DEFAULT, KW_DO, KW_DOUBLE, KW_ELSE, KW_ENUM, KW_EXTERN, KW_FLOAT, KW_FOR, KW_GOTO, KW_IF, KW_INT, KW_LONG, KW_REGISTER, KW_RETURN, KW_SHORT, KW_SIGNED, KW_SIZEOF, KW_STATIC, KW_STRUCT, KW_SWITCH, KW_TYPEDEF, KW_UNION, KW_UNSIGNED, KW_VOID, KW_VOLATILE, KW_WHILE, }; enum operator{ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, }; enum seperator { SEP_LPAREN, SEP_RPAREN, SEP_LSQUARE, SEP_RSQUARE, SEP_LCURLY, SEP_RCURLY, SEP_SEMICOLON, SEP_COMMA, SEP_DOT, SEP_COLOR, }; enum token_kind { TOK_IDENTIFIER, TOK_KEYWORD, TOK_CONSTANT, TOK_SEPERATOR, TOK_OPERATOR, TOK_END, }; union token_data { char *identifier; enum keyword keyword; int constant_value; enum seperator seperator; enum operator operator; }; struct token { enum token_kind kind; union token_data data; }; struct token *tokenize(char *source) { char *end = source + strlen(source); char *p = source; int num_tokens = 0; struct token *tokens = NULL; while (*p) { int remaining = end - p; // printf("tok %c\n", p[0]); //* whitespace if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') { p++; continue; } //* seperators for (int i = 0; SEPERATORS[i]; i++) { if (p[0] == SEPERATORS[i]) { num_tokens += 1; tokens = realloc(tokens, sizeof(struct token) * num_tokens); if (!tokens) { fprintf(stderr, "realloc failed\n"); return NULL; } struct token *new_token = &tokens[num_tokens - 1]; new_token->kind = TOK_SEPERATOR; new_token->data.seperator = i; } } //* operators //* comments if (remaining >= 2) { if (p[0] == '/' && p[1] == '/') { p += 2; char c; while ((c = *p++) && c != '\n') ; continue; } else if (p[0] == '/' && p[1] == '*') { p += 2; char c, d; while ((d = c, c = *p++) && c == '/' && d == '*') ; continue; } else if (p[0] == '#') { p += 1; char c; while ((c = *p++) && c != '\n') ; continue; } } //* keyword char match = 0; for (int i = 0; i < NUM_KEYWORDS; i++) { char *kw = KEYWORDS[i]; if (remaining >= strlen(kw)) { if (strncmp(kw, p, strlen(kw)) == 0) { num_tokens += 1; tokens = realloc(tokens, sizeof(struct token) * num_tokens); if (!tokens) { fprintf(stderr, "realloc failed\n"); return NULL; } struct token *new_token = &tokens[num_tokens - 1]; new_token->kind = TOK_KEYWORD; new_token->data.keyword = i; // printf("kw match %i %li\n", i, strlen(kw)); p += strlen(kw); match = 1; break; } } } if (match) { continue; } fprintf(stderr, "unknown token at %li\n", p - source); printf("%s", p); return NULL; } num_tokens += 1; tokens = realloc(tokens, sizeof(struct token) * num_tokens); if (!tokens) { fprintf(stderr, "realloc failed\n"); return NULL; } struct token *new_token = &tokens[num_tokens - 1]; new_token->kind = TOK_END; return tokens; } void debug_tokens(struct token *tokens) { for (int i = 0; tokens[i].kind != TOK_END; i++) { switch (tokens[i].kind) { case TOK_IDENTIFIER: printf("TOK_IDENTIFIER, "); break; case TOK_KEYWORD: printf("TOK_KEYWORD, "); break; case TOK_CONSTANT: printf("TOK_CONSTANT, "); break; case TOK_OPERATOR: printf("TOK_OPERATOR, "); break; case TOK_SEPERATOR: printf("TOK_SEPERATOR, "); break; case TOK_END: break; } } printf("TOK_END\n"); } int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "USAGE:\n\tattocc \n"); return 1; } char *input = argv[1]; char *output = argv[2]; int input_fd = open(input, O_RDONLY | O_CLOEXEC); if (input_fd < 0) { perror("cannot open input"); return 1; } int output_fd = open(output, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0640); if (output_fd < 0) { perror("cannot open input"); return 1; } int source_len = 0; char *source = NULL; int size; char buffer[4096]; while ((size = read(input_fd, &buffer, 4096))) { if (size < 0) { perror("cannot read source"); return 1; } source_len += size; source = realloc(source, source_len + 1); if (!source) { fprintf(stderr, "malloc failed\n"); return 1; } for (int i = 0; i < size; i++) { source[source_len - size + i] = buffer[i]; } } source[source_len] = '\0'; struct token *tokens = tokenize(source); if (!tokens) return 1; debug_tokens(tokens); free(tokens); return 0; }