commit 20a5e899de62e071df698db1b8b12a676ef25764
parent 063eecc545062e319001d61c9cc2308636b4e6a9
Author: Mario Rosell R. Martinez <mario@mariorosell.es>
Date: Mon, 6 Apr 2026 20:18:01 +0200
lex: stack for tokens
Diffstat:
| M | lex.c | | | 296 | ++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- |
| M | lex.h | | | 126 | +++++++++++++++++++++++++++++++++++++++++-------------------------------------- |
2 files changed, 237 insertions(+), 185 deletions(-)
diff --git a/lex.c b/lex.c
@@ -9,16 +9,25 @@
#include "lex.h"
#include "stat.h"
+#define MAX_STACK_SIZE 100
+
+typedef struct {
+ tok_t stack[MAX_STACK_SIZE];
+ int top;
+} token_stack_t;
+
+token_stack_t tokstack;
+
static const int nkeyws = sizeof(kws) / sizeof(kws[0]);
toktype_t
lu_kw(const char *str)
{
- for (int i = 0; i < nkeyws; i++) {
- if (strcmp(str, kws[i].name) == 0)
- return kws[i].type;
- }
- return TOK_IDENT;
+ for (int i = 0; i < nkeyws; i++) {
+ if (strcmp(str, kws[i].name) == 0)
+ return kws[i].type;
+ }
+ return TOK_IDENT;
}
static int cur = EOF;
@@ -27,169 +36,208 @@ static int next = EOF;
void
lex_init(void)
{
- cur = fgetc(stat.i_fstream);
- next = fgetc(stat.i_fstream);
+ cur = fgetc(stat.i_fstream);
+ next = fgetc(stat.i_fstream);
+ tokstack.top = -1;
}
int
peek(void)
{
- return cur;
+ return cur;
}
int
peek2(void)
{
- return next;
+ return next;
}
int
consume(void)
{
- int c = cur;
- cur = next;
- next = fgetc(stat.i_fstream);
- return c;
+ int c = cur;
+ cur = next;
+ next = fgetc(stat.i_fstream);
+ return c;
}
static void
skip_ws_c(void)
{
- while (1) {
- /* whitespace */
- while (isspace(peek()))
- consume();
-
- /* line comment */
- if (peek() == '/' && peek2() == '/') {
- consume(); /* / */
- consume(); /* / */
- while (peek() != '\n' && peek() != EOF)
- consume();
- continue;
- }
-
- /* block comment */
- if (peek() == '/' && peek2() == '*') {
- consume(); /* / */
- consume(); /* * */
-
- while (peek() != EOF) {
- if (peek() == '*' && peek2() == '/') {
- consume(); /* * */
- consume(); /* / */
- break;
- }
- consume();
- }
- continue;
- }
-
- break;
- }
+ while (1) {
+ /* whitespace */
+ while (isspace(peek()))
+ consume();
+
+ /* line comment */
+ if (peek() == '/' && peek2() == '/') {
+ consume(); /* / */
+ consume(); /* / */
+ while (peek() != '\n' && peek() != EOF)
+ consume();
+ continue;
+ }
+
+ /* block comment */
+ if (peek() == '/' && peek2() == '*') {
+ consume(); /* / */
+ consume(); /* * */
+
+ while (peek() != EOF) {
+ if (peek() == '*' && peek2() == '/') {
+ consume(); /* * */
+ consume(); /* / */
+ break;
+ }
+ consume();
+ }
+ continue;
+ }
+
+ break;
+ }
}
tok_t
id(void)
{
- tok_t t = {0};
- int len = 0;
- int c;
+ tok_t t = {0};
+ int len = 0;
+ int c;
+
+ while ((c = peek()) != EOF && (isalnum(c) || c == '_')) {
+ if (len < (int)sizeof(t.lexeme) - 1)
+ t.lexeme[len++] = (char)consume();
+ else
+ consume();
+ }
- while ((c = peek()) != EOF && (isalnum(c) || c == '_')) {
- if (len < (int)sizeof(t.lexeme) - 1)
- t.lexeme[len++] = (char)consume();
- else
- consume();
- }
+ t.lexeme[len] = '\0';
+ t.type = lu_kw(t.lexeme);
- t.lexeme[len] = '\0';
- t.type = lu_kw(t.lexeme);
+ return t;
+}
- return t;
+void
+pushtok(tok_t t)
+{
+ if (tokstack.top < MAX_STACK_SIZE - 1) {
+ tokstack.stack[++tokstack.top] = t;
+ } else {
+ fprintf(stderr, "slash: token stack overflow\n");
+ }
+}
+
+tok_t
+poptok(void)
+{
+ if (tokstack.top >= 0) {
+ return tokstack.stack[tokstack.top--];
+ } else {
+ fprintf(stderr, "slash: token stack underflow\n");
+ tok_t empty_token = {0};
+ return empty_token;
+ }
+}
+
+tok_t
+peekstack(void)
+{
+ if (tokstack.top >= 0) {
+ return tokstack.stack[tokstack.top];
+ } else {
+ fprintf(stderr, "slash: token stack is empty\n");
+ tok_t empty_token = {0};
+ return empty_token;
+ }
}
tok_t
nexttok(void)
{
- tok_t t = {0};
- int c;
-
- skip_ws_c();
-
- c = peek();
-
- /* EOF */
- if (c == EOF) {
- t.type = TOK_EOF;
- return t;
- }
-
- /* identifier or keyword */
- if (isalpha(c) || c == '_')
- return id();
-
- consume();
-
- switch (c) {
- case '{':
- t.type = TOK_LBRACE;
- break;
- case '}':
- t.type = TOK_RBRACE;
- break;
- case '(':
- t.type = TOK_LPAREN;
- break;
- case ')':
- t.type = TOK_RPAREN;
- break;
- case ',':
- t.type = TOK_COMMA;
- break;
- case '.':
- t.type = TOK_PERIOD;
- break;
-
- default:
- t.type = TOK_UNKNOWN;
- t.lexeme[0] = (char)c;
- t.lexeme[1] = '\0';
- break;
- }
-
- return t;
+ tok_t t = {0};
+ int c;
+
+ skip_ws_c();
+
+ c = peek();
+
+ /* EOF */
+ if (c == EOF) {
+ t.type = TOK_EOF;
+ pushtok(t);
+ return t;
+ }
+
+ /* identifier or keyword */
+ if (isalpha(c) || c == '_') {
+ t = id();
+ pushtok(t);
+ return t;
+ }
+
+ consume();
+
+ switch (c) {
+ case '{':
+ t.type = TOK_LBRACE;
+ break;
+ case '}':
+ t.type = TOK_RBRACE;
+ break;
+ case '(':
+ t.type = TOK_LPAREN;
+ break;
+ case ')':
+ t.type = TOK_RPAREN;
+ break;
+ case ',':
+ t.type = TOK_COMMA;
+ break;
+ case '.':
+ t.type = TOK_PERIOD;
+ break;
+ default:
+ t.type = TOK_UNKNOWN;
+ t.lexeme[0] = (char)c;
+ t.lexeme[1] = '\0';
+ break;
+ }
+
+ pushtok(t);
+ return t;
}
const char *
tokname(toktype_t t)
{
- switch (t) {
- case TOK_CLASS: return "CLASS";
- case TOK_FC: return "FC";
- case TOK_AS: return "AS";
+ switch (t) {
+ case TOK_CLASS: return "CLASS";
+ case TOK_FC: return "FC";
+ case TOK_AS: return "AS";
- case TOK_F32: return "F32";
- case TOK_F64: return "F64";
+ case TOK_F32: return "F32";
+ case TOK_F64: return "F64";
- case TOK_I16: return "I16";
- case TOK_I32: return "I32";
+ case TOK_I16: return "I16";
+ case TOK_I32: return "I32";
- case TOK_U16: return "U16";
- case TOK_U64: return "U64";
+ case TOK_U16: return "U16";
+ case TOK_U64: return "U64";
- case TOK_IDENT: return "IDENT";
+ case TOK_IDENT: return "IDENT";
- case TOK_PERIOD: return "PERIOD";
- case TOK_LBRACE: return "LBRACE";
- case TOK_RBRACE: return "RBRACE";
- case TOK_LPAREN: return "LPAREN";
- case TOK_RPAREN: return "RPAREN";
- case TOK_COMMA: return "COMMA";
+ case TOK_PERIOD: return "PERIOD";
+ case TOK_LBRACE: return "LBRACE";
+ case TOK_RBRACE: return "RBRACE";
+ case TOK_LPAREN: return "LPAREN";
+ case TOK_RPAREN: return "RPAREN";
+ case TOK_COMMA: return "COMMA";
- case TOK_EOF: return "EOF";
+ case TOK_EOF: return "EOF";
- default: return "UNKNOWN";
- }
+ default: return "UNKNOWN";
+ }
}
diff --git a/lex.h b/lex.h
@@ -12,44 +12,44 @@
#define MAX_PATH_SIZE 1024
enum toktype {
- TOK_EOF,
-
- TOK_CLASS,
- TOK_FC,
- TOK_IMPORT,
- TOK_AS,
-
- TOK_F32,
- TOK_F64,
- TOK_I8,
- TOK_I16,
- TOK_I32,
- TOK_I64,
- TOK_U8,
- TOK_U16,
- TOK_U32,
- TOK_U64,
-
- TOK_IDENT,
-
- TOK_LBRACE,
- TOK_RBRACE,
- TOK_LPAREN,
- TOK_RPAREN,
- TOK_COMMA,
- TOK_PERIOD,
-
- TOK_UNKNOWN
+ TOK_EOF,
+
+ TOK_CLASS,
+ TOK_FC,
+ TOK_IMPORT,
+ TOK_AS,
+
+ TOK_F32,
+ TOK_F64,
+ TOK_I8,
+ TOK_I16,
+ TOK_I32,
+ TOK_I64,
+ TOK_U8,
+ TOK_U16,
+ TOK_U32,
+ TOK_U64,
+
+ TOK_IDENT,
+
+ TOK_LBRACE,
+ TOK_RBRACE,
+ TOK_LPAREN,
+ TOK_RPAREN,
+ TOK_COMMA,
+ TOK_PERIOD,
+
+ TOK_UNKNOWN
};
struct tok {
- enum toktype type;
- char lexeme[64];
+ enum toktype type;
+ char lexeme[64];
};
struct keyword {
- const char *name;
- enum toktype type;
+ const char *name;
+ enum toktype type;
};
typedef enum toktype toktype_t;
@@ -57,44 +57,48 @@ typedef struct tok tok_t;
typedef struct keyword keyword_t;
static const keyword_t kws[] = {
- {"import", TOK_IMPORT},
- {"class", TOK_CLASS},
- {"fc", TOK_FC},
- {"as", TOK_AS},
-
- {"f32", TOK_F32},
- {"f64", TOK_F64},
-
- {"i8", TOK_I8},
- {"i16", TOK_I16},
- {"i32", TOK_I32},
- {"i64", TOK_I64},
-
- {"u8", TOK_U8},
- {"u16", TOK_U16},
- {"u32", TOK_U32},
- {"u64", TOK_U64},
+ {"import", TOK_IMPORT},
+ {"class", TOK_CLASS},
+ {"fc", TOK_FC},
+ {"as", TOK_AS},
+
+ {"f32", TOK_F32},
+ {"f64", TOK_F64},
+
+ {"i8", TOK_I8},
+ {"i16", TOK_I16},
+ {"i32", TOK_I32},
+ {"i64", TOK_I64},
+
+ {"u8", TOK_U8},
+ {"u16", TOK_U16},
+ {"u32", TOK_U32},
+ {"u64", TOK_U64},
};
/* state defined in lex.c */
-extern const char *src;
-extern size_t i;
+extern const char *src;
+extern size_t i;
-toktype_t lu_kw(const char *str);
+toktype_t lu_kw(const char *str);
-int peek(void);
-int consume(void);
+int peek(void);
+int consume(void);
-void s_ws(void);
-void s_lc(void);
-void s_bc(void);
+void s_ws(void);
+void s_lc(void);
+void s_bc(void);
-void lex_init(void);
+void lex_init(void);
-tok_t id(void);
-tok_t nexttok(void);
+tok_t id(void);
+tok_t nexttok(void);
-const char *tokname(toktype_t t);
+const char *tokname(toktype_t t);
+
+void push_token(tok_t t);
+tok_t pop_token(void);
+tok_t peek_stack(void);
#endif /* LEX_H */