#include #include #include #define NUM_KEYWORDS 32 static char *KEYWORDS[NUM_KEYWORDS] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"}; enum keyword { kW_AUTO, KW_BREAK, KW_CASE, KW_CHAR, KW_CONST, KW_CONTINUE, KW_DEFAULT, KW_DO, KW_DOUBLE, KW_ELSE, KW_ENUM, KW_EXTERN, KW_FLOAT, KW_FOR, KW_GOTO, KW_IF, KW_INT, KW_LONG, KW_REGISTER, KW_RETURN, KW_SHORT, KW_SIGNED, KW_SIZEOF, KW_STATIC, KW_STRUCT, KW_SWITCH, KW_TYPEDEF, KW_UNION, KW_UNSIGNED, KW_VOID, KW_VOLATILE, KW_WHILE, }; enum operator{ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, }; enum token_kind { IDENTIFIER, KEYWORD, CONSTANT, OPERATOR, END, }; union token_data { char *identifier; enum keyword keyword; int constant_value; enum operator operator; }; struct token { enum token_kind kind; union token_data data; }; struct token *tokenize(char *source) { char *end = source + strlen(source); char *p = source; int num_tokens = 0; struct token *tokens; while (*p) { int remaining = end - p; //* comments if (remaining >= 2) { if (p[0] == '/' && p[1] == '/') { p += 2; while (*p) { if (*p++ == '\n') { break; } } continue; } else if (p[0] == '/' && p[1] == '*') { p += 2; char last = '\0'; while (*p) { if (*p == '/' && last == '*') { p++; break; } last = *p; p++; } continue; } } //* keyword for (int i = 0; i < NUM_KEYWORDS; i++) { char *kw = KEYWORDS[i]; if (remaining >= strlen(kw)) { if (strcmp(kw, p) == 0) { num_tokens += 1; tokens = realloc(tokens, sizeof(struct token) * num_tokens); if (!tokens) { fprintf(stderr, "realloc failed"); return NULL; } struct token *new_token = &tokens[num_tokens - 1]; new_token->kind = KEYWORD; new_token->data.keyword = i; p += strlen(kw); continue; } } } fprintf(stderr, "unknown token\n"); return NULL; } return tokens; } int main() {}