commit 2c5b5b85922af2473f3e8a3f1edfe67ec13d759e
parent bf8674c31ca8a6dfd0f92d1996d09e22db3c3b20
Author: Mario Rosell R. Martinez <mario@mariorosell.es>
Date: Sat, 4 Apr 2026 13:53:05 +0200
stat, lex,pipeline: add pipeline, improve lexer, and store i_fstream
Since I/O is kinda expensive, we now store a two-bytes lookahead buffer, instead
of ungetc'ing everytime.
Also added a pipeline.c file so main can remain like six lines of code, LOL. Oh,
and now the FILE* of the opened i_infile is stored on a i_fstream member of the
stat struct.
Diffstat:
| M | Makefile | | | 2 | +- |
| M | lex.c | | | 129 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
| M | lex.h | | | 6 | ++++-- |
| A | pipeline.c | | | 65 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | pipeline.h | | | 11 | +++++++++++ |
| M | slash.c | | | 2 | ++ |
| M | stat.h | | | 2 | ++ |
7 files changed, 162 insertions(+), 55 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,7 +2,7 @@
# Makefile -- build system
#
-SRCS := slash.c cli.c stat.c lex.c
+SRCS := slash.c cli.c stat.c lex.c pipeline.c
OBJS := ${SRCS:.c=.o}
CC ?= cc
diff --git a/lex.c b/lex.c
@@ -7,15 +7,15 @@
#include <ctype.h>
#include "lex.h"
+#include "stat.h"
-static const int nkeyws = sizeof(kws) / sizeof(kws[0]);
+static const int nkeyws = sizeof(kws) / sizeof(kws[0]);
toktype_t
lu_kw(const char *str)
{
- int i;
+ int i = 0;
- i = 0;
while (i < nkeyws) {
if (strcmp(str, kws[i].name) == 0)
return kws[i].type;
@@ -24,20 +24,35 @@ lu_kw(const char *str)
return TOK_IDENT;
}
-/* state */
-const char *src;
-size_t i = 0;
+static int cur = EOF;
+static int next = EOF;
+
+void
+lex_init(void)
+{
+ cur = fgetc(stat.i_fstream);
+ next = fgetc(stat.i_fstream);
+}
char
peek(void)
{
- return src[i];
+ return (char)cur;
+}
+
+char
+peek2(void)
+{
+ return (char)next;
}
char
consume(void)
{
- return src[i++];
+ int c = cur;
+ cur = next;
+ next = fgetc(stat.i_fstream);
+ return (char)c;
}
void
@@ -57,13 +72,12 @@ s_lc(void)
void
s_bc(void)
{
- consume();
- consume();
+ /* we already consumed / and * before calling this */
- while (peek()) {
- if (peek() == '*' && src[i + 1] == '/') {
- consume();
- consume();
+ while (peek() != EOF) {
+ if (peek() == '*' && peek2() == '/') {
+ consume(); /* '*' */
+ consume(); /* '/' */
break;
}
consume();
@@ -73,10 +87,8 @@ s_bc(void)
tok_t
id(void)
{
- tok_t t;
- int len;
-
- len = 0;
+ tok_t t;
+ int len = 0;
while (isalnum(peek()) || peek() == '_') {
if (len < 63)
@@ -93,31 +105,34 @@ id(void)
tok_t
nexttok(void)
{
- tok_t t;
- char c;
+ tok_t t;
+ char c;
s_ws();
t.lexeme[0] = '\0';
c = peek();
- if (c == '\0') {
+ if (c == '\0' || c == EOF) {
t.type = TOK_EOF;
return t;
}
if (c == '/') {
consume();
+
if (peek() == '/') {
consume();
s_lc();
return nexttok();
}
+
if (peek() == '*') {
consume();
s_bc();
return nexttok();
}
+
t.type = TOK_UNKNOWN;
return t;
}
@@ -127,22 +142,32 @@ nexttok(void)
consume();
- if (c == '{')
+ switch (c) {
+ case '{':
t.type = TOK_LBRACE;
- else if (c == '}')
+ break;
+ case '}':
t.type = TOK_RBRACE;
- else if (c == '(')
+ break;
+ case '(':
t.type = TOK_LPAREN;
- else if (c == ')')
+ break;
+ case ')':
t.type = TOK_RPAREN;
- else if (c == ',')
+ break;
+ case ',':
t.type = TOK_COMMA;
- else if (c == ';')
+ break;
+ case ';':
t.type = TOK_SEMI;
- else if (c == '.')
+ break;
+ case '.':
t.type = TOK_DOT;
- else
+ break;
+ default:
t.type = TOK_UNKNOWN;
+ break;
+ }
return t;
}
@@ -151,29 +176,29 @@ const char *
tokname(toktype_t t)
{
switch (t) {
- case TOK_CLASS: return "CLASS";
- case TOK_FC: return "FC";
- case TOK_AS: return "AS";
-
- case TOK_F32: return "F32";
- case TOK_F64: return "F64";
-
- case TOK_I16: return "I16";
- case TOK_I32: return "I32";
-
- case TOK_U16: return "U16";
- case TOK_U64: return "U64";
-
- case TOK_IDENT: return "IDENT";
- case TOK_DOT: return "DOT";
- case TOK_LBRACE: return "LBRACE";
- case TOK_RBRACE: return "RBRACE";
- case TOK_LPAREN: return "LPAREN";
- case TOK_RPAREN: return "RPAREN";
- case TOK_COMMA: return "COMMA";
- case TOK_SEMI: return "SEMI";
- case TOK_EOF: return "EOF";
- default: return "UNKNOWN";
+ case TOK_CLASS: return "CLASS";
+ case TOK_FC: return "FC";
+ case TOK_AS: return "AS";
+
+ case TOK_F32: return "F32";
+ case TOK_F64: return "F64";
+
+ case TOK_I16: return "I16";
+ case TOK_I32: return "I32";
+
+ case TOK_U16: return "U16";
+ case TOK_U64: return "U64";
+
+ case TOK_IDENT: return "IDENT";
+ case TOK_DOT: return "DOT";
+ case TOK_LBRACE: return "LBRACE";
+ case TOK_RBRACE: return "RBRACE";
+ case TOK_LPAREN: return "LPAREN";
+ case TOK_RPAREN: return "RPAREN";
+ case TOK_COMMA: return "COMMA";
+ case TOK_SEMI: return "SEMI";
+ case TOK_EOF: return "EOF";
+ default: return "UNKNOWN";
}
}
diff --git a/lex.h b/lex.h
@@ -40,8 +40,8 @@ enum toktype {
};
struct tok {
- toktype type;
- char lexeme[64];
+ enum toktype type;
+ char lexeme[64];
};
struct keyword {
@@ -86,6 +86,8 @@ void s_ws(void);
void s_lc(void);
void s_bc(void);
+void lex_init(void);
+
tok_t id(void);
tok_t nexttok(void);
diff --git a/pipeline.c b/pipeline.c
@@ -0,0 +1,65 @@
+/*
+ * pipeline.c -- compilation pipeline
+ */
+
+#include <stdio.h>
+
+#include "lex.h"
+#include "pipeline.h"
+#include "stat.h"
+
+static void ofile(void);
+static void lex(void);
+static void end(void);
+
+/* open stat.i_infile -> stat.i_fstream */
+static void
+ofile(void)
+{
+ stat.i_fstream = fopen(stat.i_infile, "r"); /* RO mode */
+
+ if (!stat.i_fstream) {
+ perror("opening input");
+ end();
+ }
+}
+
+static void
+lex(void)
+{
+ tok_t tk;
+
+ /* 1. init */
+ lex_init();
+
+ /* 2. lup */
+ do {
+ tk = nexttok();
+
+ /* debug: can be removed:
+ * puts(tokname(t.type));
+ * if (tk.type == TOK_IDENT)
+ * printf("(%s)", t.lexeme);
+ *
+ * printf("\n");
+ */
+ (void)tk;
+ } while (tk.type != TOK_EOF);
+}
+
+static void
+end(void)
+{
+ fclose(stat.i_fstream); /* don't leave the fd open */
+}
+
+void
+compile(void)
+{
+ ofile();
+
+ lex();
+
+ end();
+}
+
diff --git a/pipeline.h b/pipeline.h
@@ -0,0 +1,11 @@
+/*
+ * pipeline.h -- compile() fwddef
+ */
+
+#ifndef PIPELINE_H
+#define PIPELINE_H
+
+/* run the full compilation pipeline */
+void compile(void);
+
+#endif /* PIPELINE_H */
diff --git a/slash.c b/slash.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include "cli.h"
+#include "pipeline.h"
#include "stat.h"
int
@@ -13,6 +14,7 @@ main(int argc, char** argv)
{
initstat();
cli(argc, argv);
+ compile();
return EXIT_SUCCESS;
}
diff --git a/stat.h b/stat.h
@@ -6,11 +6,13 @@
#define STAT_H
#include <stdbool.h>
+#include <stdio.h>
struct stat {
char s_outf[512]; /* output file */
char s_asmf[512]; /* ASM output file */
char i_infile[512]; /* src file */
+ FILE* i_fstream; /* file stream */
bool mod_pipe; /* if using pipes for communication over tempfiles */
};