slash

slash is a simple type-oriented programming language
Log | Files | Refs | README | LICENSE

commit 20a5e899de62e071df698db1b8b12a676ef25764
parent 063eecc545062e319001d61c9cc2308636b4e6a9
Author: Mario Rosell R. Martinez <mario@mariorosell.es>
Date:   Mon,  6 Apr 2026 20:18:01 +0200

lex: stack for tokens

Diffstat:
Mlex.c | 296++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mlex.h | 126+++++++++++++++++++++++++++++++++++++++++--------------------------------------
2 files changed, 237 insertions(+), 185 deletions(-)

diff --git a/lex.c b/lex.c @@ -9,16 +9,25 @@ #include "lex.h" #include "stat.h" +#define MAX_STACK_SIZE 100 + +typedef struct { + tok_t stack[MAX_STACK_SIZE]; + int top; +} token_stack_t; + +token_stack_t tokstack; + static const int nkeyws = sizeof(kws) / sizeof(kws[0]); toktype_t lu_kw(const char *str) { - for (int i = 0; i < nkeyws; i++) { - if (strcmp(str, kws[i].name) == 0) - return kws[i].type; - } - return TOK_IDENT; + for (int i = 0; i < nkeyws; i++) { + if (strcmp(str, kws[i].name) == 0) + return kws[i].type; + } + return TOK_IDENT; } static int cur = EOF; @@ -27,169 +36,208 @@ static int next = EOF; void lex_init(void) { - cur = fgetc(stat.i_fstream); - next = fgetc(stat.i_fstream); + cur = fgetc(stat.i_fstream); + next = fgetc(stat.i_fstream); + tokstack.top = -1; } int peek(void) { - return cur; + return cur; } int peek2(void) { - return next; + return next; } int consume(void) { - int c = cur; - cur = next; - next = fgetc(stat.i_fstream); - return c; + int c = cur; + cur = next; + next = fgetc(stat.i_fstream); + return c; } static void skip_ws_c(void) { - while (1) { - /* whitespace */ - while (isspace(peek())) - consume(); - - /* line comment */ - if (peek() == '/' && peek2() == '/') { - consume(); /* / */ - consume(); /* / */ - while (peek() != '\n' && peek() != EOF) - consume(); - continue; - } - - /* block comment */ - if (peek() == '/' && peek2() == '*') { - consume(); /* / */ - consume(); /* * */ - - while (peek() != EOF) { - if (peek() == '*' && peek2() == '/') { - consume(); /* * */ - consume(); /* / */ - break; - } - consume(); - } - continue; - } - - break; - } + while (1) { + /* whitespace */ + while (isspace(peek())) + consume(); + + /* line comment */ + if (peek() == '/' && peek2() == '/') { + consume(); /* / */ + consume(); /* / */ + while (peek() != '\n' && peek() != EOF) + consume(); + continue; + } + + /* block comment */ + if (peek() == '/' && peek2() == '*') { + consume(); /* / */ + consume(); /* * */ + + while (peek() != EOF) { + if (peek() == '*' && peek2() == '/') { + consume(); /* * */ + consume(); /* / */ + break; + } + consume(); + } + continue; + } + + break; + } } tok_t id(void) { - tok_t t = {0}; - int len = 0; - int c; + tok_t t = {0}; + int len = 0; + int c; + + while ((c = peek()) != EOF && (isalnum(c) || c == '_')) { + if (len < (int)sizeof(t.lexeme) - 1) + t.lexeme[len++] = (char)consume(); + else + consume(); + } - while ((c = peek()) != EOF && (isalnum(c) || c == '_')) { - if (len < (int)sizeof(t.lexeme) - 1) - t.lexeme[len++] = (char)consume(); - else - consume(); - } + t.lexeme[len] = '\0'; + t.type = lu_kw(t.lexeme); - t.lexeme[len] = '\0'; - t.type = lu_kw(t.lexeme); + return t; +} - return t; +void +pushtok(tok_t t) +{ + if (tokstack.top < MAX_STACK_SIZE - 1) { + tokstack.stack[++tokstack.top] = t; + } else { + fprintf(stderr, "slash: token stack overflow\n"); + } +} + +tok_t +poptok(void) +{ + if (tokstack.top >= 0) { + return tokstack.stack[tokstack.top--]; + } else { + fprintf(stderr, "slash: token stack underflow\n"); + tok_t empty_token = {0}; + return empty_token; + } +} + +tok_t +peekstack(void) +{ + if (tokstack.top >= 0) { + return tokstack.stack[tokstack.top]; + } else { + fprintf(stderr, "slash: token stack is empty\n"); + tok_t empty_token = {0}; + return empty_token; + } } tok_t nexttok(void) { - tok_t t = {0}; - int c; - - skip_ws_c(); - - c = peek(); - - /* EOF */ - if (c == EOF) { - t.type = TOK_EOF; - return t; - } - - /* identifier or keyword */ - if (isalpha(c) || c == '_') - return id(); - - consume(); - - switch (c) { - case '{': - t.type = TOK_LBRACE; - break; - case '}': - t.type = TOK_RBRACE; - break; - case '(': - t.type = TOK_LPAREN; - break; - case ')': - t.type = TOK_RPAREN; - break; - case ',': - t.type = TOK_COMMA; - break; - case '.': - t.type = TOK_PERIOD; - break; - - default: - t.type = TOK_UNKNOWN; - t.lexeme[0] = (char)c; - t.lexeme[1] = '\0'; - break; - } - - return t; + tok_t t = {0}; + int c; + + skip_ws_c(); + + c = peek(); + + /* EOF */ + if (c == EOF) { + t.type = TOK_EOF; + pushtok(t); + return t; + } + + /* identifier or keyword */ + if (isalpha(c) || c == '_') { + t = id(); + pushtok(t); + return t; + } + + consume(); + + switch (c) { + case '{': + t.type = TOK_LBRACE; + break; + case '}': + t.type = TOK_RBRACE; + break; + case '(': + t.type = TOK_LPAREN; + break; + case ')': + t.type = TOK_RPAREN; + break; + case ',': + t.type = TOK_COMMA; + break; + case '.': + t.type = TOK_PERIOD; + break; + default: + t.type = TOK_UNKNOWN; + t.lexeme[0] = (char)c; + t.lexeme[1] = '\0'; + break; + } + + pushtok(t); + return t; } const char * tokname(toktype_t t) { - switch (t) { - case TOK_CLASS: return "CLASS"; - case TOK_FC: return "FC"; - case TOK_AS: return "AS"; + switch (t) { + case TOK_CLASS: return "CLASS"; + case TOK_FC: return "FC"; + case TOK_AS: return "AS"; - case TOK_F32: return "F32"; - case TOK_F64: return "F64"; + case TOK_F32: return "F32"; + case TOK_F64: return "F64"; - case TOK_I16: return "I16"; - case TOK_I32: return "I32"; + case TOK_I16: return "I16"; + case TOK_I32: return "I32"; - case TOK_U16: return "U16"; - case TOK_U64: return "U64"; + case TOK_U16: return "U16"; + case TOK_U64: return "U64"; - case TOK_IDENT: return "IDENT"; + case TOK_IDENT: return "IDENT"; - case TOK_PERIOD: return "PERIOD"; - case TOK_LBRACE: return "LBRACE"; - case TOK_RBRACE: return "RBRACE"; - case TOK_LPAREN: return "LPAREN"; - case TOK_RPAREN: return "RPAREN"; - case TOK_COMMA: return "COMMA"; + case TOK_PERIOD: return "PERIOD"; + case TOK_LBRACE: return "LBRACE"; + case TOK_RBRACE: return "RBRACE"; + case TOK_LPAREN: return "LPAREN"; + case TOK_RPAREN: return "RPAREN"; + case TOK_COMMA: return "COMMA"; - case TOK_EOF: return "EOF"; + case TOK_EOF: return "EOF"; - default: return "UNKNOWN"; - } + default: return "UNKNOWN"; + } } diff --git a/lex.h b/lex.h @@ -12,44 +12,44 @@ #define MAX_PATH_SIZE 1024 enum toktype { - TOK_EOF, - - TOK_CLASS, - TOK_FC, - TOK_IMPORT, - TOK_AS, - - TOK_F32, - TOK_F64, - TOK_I8, - TOK_I16, - TOK_I32, - TOK_I64, - TOK_U8, - TOK_U16, - TOK_U32, - TOK_U64, - - TOK_IDENT, - - TOK_LBRACE, - TOK_RBRACE, - TOK_LPAREN, - TOK_RPAREN, - TOK_COMMA, - TOK_PERIOD, - - TOK_UNKNOWN + TOK_EOF, + + TOK_CLASS, + TOK_FC, + TOK_IMPORT, + TOK_AS, + + TOK_F32, + TOK_F64, + TOK_I8, + TOK_I16, + TOK_I32, + TOK_I64, + TOK_U8, + TOK_U16, + TOK_U32, + TOK_U64, + + TOK_IDENT, + + TOK_LBRACE, + TOK_RBRACE, + TOK_LPAREN, + TOK_RPAREN, + TOK_COMMA, + TOK_PERIOD, + + TOK_UNKNOWN }; struct tok { - enum toktype type; - char lexeme[64]; + enum toktype type; + char lexeme[64]; }; struct keyword { - const char *name; - enum toktype type; + const char *name; + enum toktype type; }; typedef enum toktype toktype_t; @@ -57,44 +57,48 @@ typedef struct tok tok_t; typedef struct keyword keyword_t; static const keyword_t kws[] = { - {"import", TOK_IMPORT}, - {"class", TOK_CLASS}, - {"fc", TOK_FC}, - {"as", TOK_AS}, - - {"f32", TOK_F32}, - {"f64", TOK_F64}, - - {"i8", TOK_I8}, - {"i16", TOK_I16}, - {"i32", TOK_I32}, - {"i64", TOK_I64}, - - {"u8", TOK_U8}, - {"u16", TOK_U16}, - {"u32", TOK_U32}, - {"u64", TOK_U64}, + {"import", TOK_IMPORT}, + {"class", TOK_CLASS}, + {"fc", TOK_FC}, + {"as", TOK_AS}, + + {"f32", TOK_F32}, + {"f64", TOK_F64}, + + {"i8", TOK_I8}, + {"i16", TOK_I16}, + {"i32", TOK_I32}, + {"i64", TOK_I64}, + + {"u8", TOK_U8}, + {"u16", TOK_U16}, + {"u32", TOK_U32}, + {"u64", TOK_U64}, }; /* state defined in lex.c */ -extern const char *src; -extern size_t i; +extern const char *src; +extern size_t i; -toktype_t lu_kw(const char *str); +toktype_t lu_kw(const char *str); -int peek(void); -int consume(void); +int peek(void); +int consume(void); -void s_ws(void); -void s_lc(void); -void s_bc(void); +void s_ws(void); +void s_lc(void); +void s_bc(void); -void lex_init(void); +void lex_init(void); -tok_t id(void); -tok_t nexttok(void); +tok_t id(void); +tok_t nexttok(void); -const char *tokname(toktype_t t); +const char *tokname(toktype_t t); + +void push_token(tok_t t); +tok_t pop_token(void); +tok_t peek_stack(void); #endif /* LEX_H */