diff options
-rw-r--r-- | attocc.c | 146 |
1 files changed, 125 insertions, 21 deletions
@@ -37,7 +37,7 @@ static char *OPERATORS[] = { "sizeof", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^=", "++", "--", "==", "!=", "<=", ">=", "||", "&&", "+", "-", "*", "/", "%", "<", - ">", "~", "&", "|", "^", "!", "&", "*", "=", + ">", "~", "&", "|", "^", "!", "=", ".", "?", }; #ifdef DEBUG @@ -65,19 +65,19 @@ static char *OPERATOR_NAMES[] = { "OP_LOGICAL_AND", "OP_ADD", "OP_SUB", - "OP_MUL", + "OP_MUL_OR_POINTER", "OP_DIV", "OP_MOD", "OP_LESS", "OP_GREATER", "OP_BITWISE_NOT", - "OP_BITWISE_AND", + "OP_BITWISE_AND_OR_REF", "OP_BITWISE_OR", "OP_BITWISE_XOR", "OP_LOGICAL_NOT", - "OP_POINTER_REF", - "OP_POINTER_DEREF", "OP_ASSIGN", + "OP_MEMBER_ACCESS", + "OP_TERNARY", }; static char *KEYWORD_NAMES[] = { "KW_AUTO", "KW_BREAK", "KW_CASE", "KW_CHAR", "KW_CONST", @@ -153,19 +153,19 @@ enum operator{ OP_LOGICAL_AND, OP_ADD, // len=1 OP_SUB, - OP_MUL, + OP_MUL_OR_POINTER, OP_DIV, OP_MOD, OP_LESS, OP_GREATER, OP_BITWISE_NOT, - OP_BITWISE_AND, + OP_BITWISE_AND_OR_REF, OP_BITWISE_OR, OP_BITWISE_XOR, OP_LOGICAL_NOT, - OP_POINTER_REF, - OP_POINTER_DEREF, OP_ASSIGN, + OP_MEMBER_ACCESS, + OP_TERNARY, }; enum seperator { @@ -184,16 +184,29 @@ enum seperator { enum token_kind { TOK_IDENTIFIER, TOK_KEYWORD, - TOK_INT_CONSTANT, + TOK_CONSTANT, TOK_SEPERATOR, TOK_OPERATOR, TOK_END, }; +enum constant_kind { + CONST_INT, + CONST_STR, + CONST_CHAR, +}; + union token_data { char *identifier; enum keyword keyword; - int constant_value; + struct { + enum constant_kind constant_kind; + union { + int constant_int_value; + char constant_char_value; + char *constant_str_value; + }; + }; enum seperator seperator; enum operator operator; }; @@ -213,6 +226,26 @@ char is_alpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } char is_alphanumeric(char c) { return is_alpha(c) || is_numeric(c); } +char is_ident(char c) { return is_alphanumeric(c) || c == '_'; } + +char map_escape(char c) { + switch (c) { + case 'n': + return '\n'; + case 't': + return '\t'; + case 'b': + return '\b'; + case 'r': + return '\r'; + case '\'': + return '\''; + case '"': + return '"'; + default: + return '\0'; + } +} struct token *tokenize(char *source) { char *end = source + strlen(source); @@ -237,7 +270,7 @@ struct token *tokenize(char *source) { continue; } else if (p[0] == '/' && p[1] == '*') { p += 2; - for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';) + for (char c, d = '\0'; (d = c, c = *p++) && !(c == '/' && d == '*');) ; continue; } else if (p[0] == '#') { @@ -310,7 +343,6 @@ struct token *tokenize(char *source) { new_token->kind = TOK_KEYWORD; new_token->data.keyword = i; - // printf("kw match %i %li\n", i, strlen(kw)); p += strlen(kw); match = 1; break; @@ -320,6 +352,7 @@ struct token *tokenize(char *source) { if (match) continue; + //* number constant if (is_numeric(p[0])) { int value = 0; if (remaining >= 2 && p[1] == 'x') { @@ -349,15 +382,72 @@ struct token *tokenize(char *source) { return NULL; } struct token *new_token = &tokens[num_tokens - 1]; - new_token->kind = TOK_INT_CONSTANT; - new_token->data.constant_value = value; + new_token->kind = TOK_CONSTANT; + new_token->data.constant_kind = CONST_INT; + new_token->data.constant_int_value = value; continue; } - if (is_alpha(p[0])) { - char *ident_start = p; + //* string constant + if (p[0] == '"') { p++; - for (char c; (c = *p++) && is_alphanumeric(c);) + char *str = NULL; + int str_len = 0; + for (char c; (c = *p++) && c != '"';) { + // TODO escape + str_len++; + str = realloc(str, str_len); + if (!str) { + fprintf(stderr, "realloc failed\n"); + return NULL; + } + str[str_len - 1] = c; + } + + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) { + fprintf(stderr, "realloc failed\n"); + return NULL; + } + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_IDENTIFIER; + new_token->data.constant_kind = CONST_STR; + new_token->data.constant_str_value = str; + continue; + } + + //* char constant + if (p[0] == '\'') { + if (!*p++) + return NULL; + char chr = p[0]; + if (p[0] == '\\') { + if (!*p++) + return NULL; + chr = map_escape(p[0]); + } + if (!*p++) + return NULL; + if (*p++ != '\'') + return fprintf(stderr, "expected '\n"), NULL; + + num_tokens += 1; + tokens = realloc(tokens, sizeof(struct token) * num_tokens); + if (!tokens) + return fprintf(stderr, "realloc failed\n"), NULL; + + struct token *new_token = &tokens[num_tokens - 1]; + new_token->kind = TOK_IDENTIFIER; + new_token->data.constant_kind = CONST_STR; + new_token->data.constant_char_value = chr; + continue; + } + + //* identifier + if (is_ident(p[0]) && !is_numeric(p[0])) { + char *ident_start = p; + for (char c; (c = *p++) && is_ident(c);) ; p--; int ident_len = p - ident_start - 1; @@ -404,13 +494,27 @@ void debug_tokens(struct token *tokens) { for (int i = 0; tokens[i].kind != TOK_END; i++) { switch (tokens[i].kind) { case TOK_IDENTIFIER: - printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier); + // printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier); + printf("TOK_IDENTIFIER, "); break; case TOK_KEYWORD: printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tokens[i].data.keyword]); break; - case TOK_INT_CONSTANT: - printf("TOK_CONSTANT:%i, ", tokens[i].data.constant_value); + case TOK_CONSTANT: + switch (tokens[i].data.constant_kind) { + case CONST_INT: + printf("TOK_CONSTANT:CONST_INT:%i, ", + tokens[i].data.constant_int_value); + break; + case CONST_STR: + printf("TOK_CONSTANT:CONST_STR:%s, ", + tokens[i].data.constant_str_value); + break; + case CONST_CHAR: + printf("TOK_CONSTANT:CONST_CHAR:%c, ", + tokens[i].data.constant_char_value); + break; + } break; case TOK_OPERATOR: printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tokens[i].data.operator] ); |