summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--attocc.c146
1 files changed, 125 insertions, 21 deletions
diff --git a/attocc.c b/attocc.c
index 7d4abf4..49ca7a2 100644
--- a/attocc.c
+++ b/attocc.c
@@ -37,7 +37,7 @@ static char *OPERATORS[] = {
"sizeof", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "<<=",
">>=", "&=", "|=", "^=", "++", "--", "==", "!=", "<=",
">=", "||", "&&", "+", "-", "*", "/", "%", "<",
- ">", "~", "&", "|", "^", "!", "&", "*", "=",
+ ">", "~", "&", "|", "^", "!", "=", ".", "?",
};
#ifdef DEBUG
@@ -65,19 +65,19 @@ static char *OPERATOR_NAMES[] = {
"OP_LOGICAL_AND",
"OP_ADD",
"OP_SUB",
- "OP_MUL",
+ "OP_MUL_OR_POINTER",
"OP_DIV",
"OP_MOD",
"OP_LESS",
"OP_GREATER",
"OP_BITWISE_NOT",
- "OP_BITWISE_AND",
+ "OP_BITWISE_AND_OR_REF",
"OP_BITWISE_OR",
"OP_BITWISE_XOR",
"OP_LOGICAL_NOT",
- "OP_POINTER_REF",
- "OP_POINTER_DEREF",
"OP_ASSIGN",
+ "OP_MEMBER_ACCESS",
+ "OP_TERNARY",
};
static char *KEYWORD_NAMES[] = {
"KW_AUTO", "KW_BREAK", "KW_CASE", "KW_CHAR", "KW_CONST",
@@ -153,19 +153,19 @@ enum operator{
OP_LOGICAL_AND,
OP_ADD, // len=1
OP_SUB,
- OP_MUL,
+ OP_MUL_OR_POINTER,
OP_DIV,
OP_MOD,
OP_LESS,
OP_GREATER,
OP_BITWISE_NOT,
- OP_BITWISE_AND,
+ OP_BITWISE_AND_OR_REF,
OP_BITWISE_OR,
OP_BITWISE_XOR,
OP_LOGICAL_NOT,
- OP_POINTER_REF,
- OP_POINTER_DEREF,
OP_ASSIGN,
+ OP_MEMBER_ACCESS,
+ OP_TERNARY,
};
enum seperator {
@@ -184,16 +184,29 @@ enum seperator {
enum token_kind {
TOK_IDENTIFIER,
TOK_KEYWORD,
- TOK_INT_CONSTANT,
+ TOK_CONSTANT,
TOK_SEPERATOR,
TOK_OPERATOR,
TOK_END,
};
+enum constant_kind {
+ CONST_INT,
+ CONST_STR,
+ CONST_CHAR,
+};
+
union token_data {
char *identifier;
enum keyword keyword;
- int constant_value;
+ struct {
+ enum constant_kind constant_kind;
+ union {
+ int constant_int_value;
+ char constant_char_value;
+ char *constant_str_value;
+ };
+ };
enum seperator seperator;
enum operator operator;
};
@@ -213,6 +226,26 @@ char is_alpha(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
char is_alphanumeric(char c) { return is_alpha(c) || is_numeric(c); }
+char is_ident(char c) { return is_alphanumeric(c) || c == '_'; }
+
+char map_escape(char c) {
+ switch (c) {
+ case 'n':
+ return '\n';
+ case 't':
+ return '\t';
+ case 'b':
+ return '\b';
+ case 'r':
+ return '\r';
+ case '\'':
+ return '\'';
+ case '"':
+ return '"';
+ default:
+ return '\0';
+ }
+}
struct token *tokenize(char *source) {
char *end = source + strlen(source);
@@ -237,7 +270,7 @@ struct token *tokenize(char *source) {
continue;
} else if (p[0] == '/' && p[1] == '*') {
p += 2;
- for (char c, d = '\0'; (d = c, c = *p++) && c == '/' && d == '*';)
+ for (char c, d = '\0'; (d = c, c = *p++) && !(c == '/' && d == '*');)
;
continue;
} else if (p[0] == '#') {
@@ -310,7 +343,6 @@ struct token *tokenize(char *source) {
new_token->kind = TOK_KEYWORD;
new_token->data.keyword = i;
- // printf("kw match %i %li\n", i, strlen(kw));
p += strlen(kw);
match = 1;
break;
@@ -320,6 +352,7 @@ struct token *tokenize(char *source) {
if (match)
continue;
+ //* number constant
if (is_numeric(p[0])) {
int value = 0;
if (remaining >= 2 && p[1] == 'x') {
@@ -349,15 +382,72 @@ struct token *tokenize(char *source) {
return NULL;
}
struct token *new_token = &tokens[num_tokens - 1];
- new_token->kind = TOK_INT_CONSTANT;
- new_token->data.constant_value = value;
+ new_token->kind = TOK_CONSTANT;
+ new_token->data.constant_kind = CONST_INT;
+ new_token->data.constant_int_value = value;
continue;
}
- if (is_alpha(p[0])) {
- char *ident_start = p;
+ //* string constant
+ if (p[0] == '"') {
p++;
- for (char c; (c = *p++) && is_alphanumeric(c);)
+ char *str = NULL;
+ int str_len = 0;
+ for (char c; (c = *p++) && c != '"';) {
+ // TODO escape
+ str_len++;
+ str = realloc(str, str_len);
+ if (!str) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ str[str_len - 1] = c;
+ }
+
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_IDENTIFIER;
+ new_token->data.constant_kind = CONST_STR;
+ new_token->data.constant_str_value = str;
+ continue;
+ }
+
+ //* char constant
+ if (p[0] == '\'') {
+ if (!*p++)
+ return NULL;
+ char chr = p[0];
+ if (p[0] == '\\') {
+ if (!*p++)
+ return NULL;
+ chr = map_escape(p[0]);
+ }
+ if (!*p++)
+ return NULL;
+ if (*p++ != '\'')
+ return fprintf(stderr, "expected '\n"), NULL;
+
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens)
+ return fprintf(stderr, "realloc failed\n"), NULL;
+
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_IDENTIFIER;
+ new_token->data.constant_kind = CONST_STR;
+ new_token->data.constant_char_value = chr;
+ continue;
+ }
+
+ //* identifier
+ if (is_ident(p[0]) && !is_numeric(p[0])) {
+ char *ident_start = p;
+ for (char c; (c = *p++) && is_ident(c);)
;
p--;
int ident_len = p - ident_start - 1;
@@ -404,13 +494,27 @@ void debug_tokens(struct token *tokens) {
for (int i = 0; tokens[i].kind != TOK_END; i++) {
switch (tokens[i].kind) {
case TOK_IDENTIFIER:
- printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier);
+ // printf("TOK_IDENTIFIER:%s, ", tokens[i].data.identifier);
+ printf("TOK_IDENTIFIER, ");
break;
case TOK_KEYWORD:
printf("TOK_KEYWORD:%s, ", KEYWORD_NAMES[tokens[i].data.keyword]);
break;
- case TOK_INT_CONSTANT:
- printf("TOK_CONSTANT:%i, ", tokens[i].data.constant_value);
+ case TOK_CONSTANT:
+ switch (tokens[i].data.constant_kind) {
+ case CONST_INT:
+ printf("TOK_CONSTANT:CONST_INT:%i, ",
+ tokens[i].data.constant_int_value);
+ break;
+ case CONST_STR:
+ printf("TOK_CONSTANT:CONST_STR:%s, ",
+ tokens[i].data.constant_str_value);
+ break;
+ case CONST_CHAR:
+ printf("TOK_CONSTANT:CONST_CHAR:%c, ",
+ tokens[i].data.constant_char_value);
+ break;
+ }
break;
case TOK_OPERATOR:
printf("TOK_OPERATOR:%s, ", OPERATOR_NAMES[tokens[i].data.operator] );