summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2024-06-15 00:18:06 +0200
committermetamuffin <metamuffin@disroot.org>2024-06-15 00:18:06 +0200
commitb45a8709d263a87f55fedbc62fac1468320fe035 (patch)
tree30cc7c5b3dbb340590a784b4489ab634c101b0c8
parent1a847e6f3f1727571d6c185de52b418cc5746f4f (diff)
downloadattocc-b45a8709d263a87f55fedbc62fac1468320fe035.tar
attocc-b45a8709d263a87f55fedbc62fac1468320fe035.tar.bz2
attocc-b45a8709d263a87f55fedbc62fac1468320fe035.tar.zst
parse seperators
-rw-r--r--.gitignore1
-rw-r--r--attocc.c124
2 files changed, 111 insertions, 14 deletions
diff --git a/.gitignore b/.gitignore
index 5cc1205..d2886f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
/attocc
+/test*.c
diff --git a/attocc.c b/attocc.c
index 9ff5ea4..d390900 100644
--- a/attocc.c
+++ b/attocc.c
@@ -4,8 +4,8 @@
#include <string.h>
#include <unistd.h>
-#define NUM_KEYWORDS 32
-static char *KEYWORDS[NUM_KEYWORDS] = {
+const int NUM_KEYWORDS = 32;
+static char *KEYWORDS[] = {
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
@@ -13,6 +13,8 @@ static char *KEYWORDS[NUM_KEYWORDS] = {
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"};
+static char *SEPERATORS = "()[]{};,:";
+
enum keyword {
kW_AUTO,
KW_BREAK,
@@ -47,6 +49,7 @@ enum keyword {
KW_VOLATILE,
KW_WHILE,
};
+
enum operator{
OP_ADD,
OP_SUB,
@@ -54,19 +57,37 @@ enum operator{
OP_DIV,
OP_MOD,
};
+
+enum seperator {
+ SEP_LPAREN,
+ SEP_RPAREN,
+ SEP_LSQUARE,
+ SEP_RSQUARE,
+ SEP_LCURLY,
+ SEP_RCURLY,
+ SEP_SEMICOLON,
+ SEP_COMMA,
+ SEP_DOT,
+ SEP_COLOR,
+};
+
enum token_kind {
- IDENTIFIER,
- KEYWORD,
- CONSTANT,
- OPERATOR,
- END,
+ TOK_IDENTIFIER,
+ TOK_KEYWORD,
+ TOK_CONSTANT,
+ TOK_SEPERATOR,
+ TOK_OPERATOR,
+ TOK_END,
};
+
union token_data {
char *identifier;
enum keyword keyword;
int constant_value;
+ enum seperator seperator;
enum operator operator;
};
+
struct token {
enum token_kind kind;
union token_data data;
@@ -76,9 +97,33 @@ struct token *tokenize(char *source) {
char *end = source + strlen(source);
char *p = source;
int num_tokens = 0;
- struct token *tokens;
+ struct token *tokens = NULL;
while (*p) {
int remaining = end - p;
+ // printf("tok %c\n", p[0]);
+
+ //* whitespace
+ if (p[0] == ' ' || p[0] == '\t' || p[0] == '\n') {
+ p++;
+ continue;
+ }
+
+ //* seperators
+ for (int i = 0; SEPERATORS[i]; i++) {
+ if (p[0] == SEPERATORS[i]) {
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
+ return NULL;
+ }
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_SEPERATOR;
+ new_token->data.seperator = i;
+ }
+ }
+
+ //* operators
//* comments
if (remaining >= 2) {
@@ -94,37 +139,84 @@ struct token *tokenize(char *source) {
while ((d = c, c = *p++) && c == '/' && d == '*')
;
continue;
+ } else if (p[0] == '#') {
+ p += 1;
+ char c;
+ while ((c = *p++) && c != '\n')
+ ;
+ continue;
}
}
//* keyword
+ char match = 0;
for (int i = 0; i < NUM_KEYWORDS; i++) {
char *kw = KEYWORDS[i];
if (remaining >= strlen(kw)) {
- if (strcmp(kw, p) == 0) {
+ if (strncmp(kw, p, strlen(kw)) == 0) {
num_tokens += 1;
-
tokens = realloc(tokens, sizeof(struct token) * num_tokens);
if (!tokens) {
fprintf(stderr, "realloc failed\n");
return NULL;
}
struct token *new_token = &tokens[num_tokens - 1];
- new_token->kind = KEYWORD;
+ new_token->kind = TOK_KEYWORD;
new_token->data.keyword = i;
+ // printf("kw match %i %li\n", i, strlen(kw));
p += strlen(kw);
- continue;
+ match = 1;
+ break;
}
}
}
+ if (match) {
+ continue;
+ }
+
+ fprintf(stderr, "unknown token at %li\n", p - source);
+ printf("%s", p);
+ return NULL;
+ }
- fprintf(stderr, "unknown token\n");
+ num_tokens += 1;
+ tokens = realloc(tokens, sizeof(struct token) * num_tokens);
+ if (!tokens) {
+ fprintf(stderr, "realloc failed\n");
return NULL;
}
+ struct token *new_token = &tokens[num_tokens - 1];
+ new_token->kind = TOK_END;
+
return tokens;
}
+void debug_tokens(struct token *tokens) {
+ for (int i = 0; tokens[i].kind != TOK_END; i++) {
+ switch (tokens[i].kind) {
+ case TOK_IDENTIFIER:
+ printf("TOK_IDENTIFIER, ");
+ break;
+ case TOK_KEYWORD:
+ printf("TOK_KEYWORD, ");
+ break;
+ case TOK_CONSTANT:
+ printf("TOK_CONSTANT, ");
+ break;
+ case TOK_OPERATOR:
+ printf("TOK_OPERATOR, ");
+ break;
+ case TOK_SEPERATOR:
+ printf("TOK_SEPERATOR, ");
+ break;
+ case TOK_END:
+ break;
+ }
+ }
+ printf("TOK_END\n");
+}
+
int main(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr, "USAGE:\n\tattocc <input> <output>\n");
@@ -169,7 +261,11 @@ int main(int argc, char **argv) {
}
source[source_len] = '\0';
- printf("%s", source);
+ struct token *tokens = tokenize(source);
+ if (!tokens)
+ return 1;
+ debug_tokens(tokens);
+ free(tokens);
return 0;
}