slash

slash is a simple type-oriented programming language
Log | Files | Refs | README | LICENSE

commit 2c5b5b85922af2473f3e8a3f1edfe67ec13d759e
parent bf8674c31ca8a6dfd0f92d1996d09e22db3c3b20
Author: Mario Rosell R. Martinez <mario@mariorosell.es>
Date:   Sat,  4 Apr 2026 13:53:05 +0200

stat, lex,pipeline: add pipeline, improve lexer, and store i_fstream

Since I/O is kinda expensive, we now store a two-bytes lookahead buffer, instead
of ungetc'ing everytime.

Also added a pipeline.c file so main can remain like six lines of code, LOL. Oh,
and now the FILE* of the opened i_infile is stored on a i_fstream member of the
stat struct.

Diffstat:
MMakefile | 2+-
Mlex.c | 129+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mlex.h | 6++++--
Apipeline.c | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apipeline.h | 11+++++++++++
Mslash.c | 2++
Mstat.h | 2++
7 files changed, 162 insertions(+), 55 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,7 +2,7 @@ # Makefile -- build system # -SRCS := slash.c cli.c stat.c lex.c +SRCS := slash.c cli.c stat.c lex.c pipeline.c OBJS := ${SRCS:.c=.o} CC ?= cc diff --git a/lex.c b/lex.c @@ -7,15 +7,15 @@ #include <ctype.h> #include "lex.h" +#include "stat.h" -static const int nkeyws = sizeof(kws) / sizeof(kws[0]); +static const int nkeyws = sizeof(kws) / sizeof(kws[0]); toktype_t lu_kw(const char *str) { - int i; + int i = 0; - i = 0; while (i < nkeyws) { if (strcmp(str, kws[i].name) == 0) return kws[i].type; @@ -24,20 +24,35 @@ lu_kw(const char *str) return TOK_IDENT; } -/* state */ -const char *src; -size_t i = 0; +static int cur = EOF; +static int next = EOF; + +void +lex_init(void) +{ + cur = fgetc(stat.i_fstream); + next = fgetc(stat.i_fstream); +} char peek(void) { - return src[i]; + return (char)cur; +} + +char +peek2(void) +{ + return (char)next; } char consume(void) { - return src[i++]; + int c = cur; + cur = next; + next = fgetc(stat.i_fstream); + return (char)c; } void @@ -57,13 +72,12 @@ s_lc(void) void s_bc(void) { - consume(); - consume(); + /* we already consumed / and * before calling this */ - while (peek()) { - if (peek() == '*' && src[i + 1] == '/') { - consume(); - consume(); + while (peek() != EOF) { + if (peek() == '*' && peek2() == '/') { + consume(); /* '*' */ + consume(); /* '/' */ break; } consume(); @@ -73,10 +87,8 @@ s_bc(void) tok_t id(void) { - tok_t t; - int len; - - len = 0; + tok_t t; + int len = 0; while (isalnum(peek()) || peek() == '_') { if (len < 63) @@ -93,31 +105,34 @@ id(void) tok_t nexttok(void) { - tok_t t; - char c; + tok_t t; + char c; s_ws(); t.lexeme[0] = '\0'; c = peek(); - if (c == '\0') { + if (c == '\0' || c == EOF) { t.type = TOK_EOF; return t; } if (c == '/') { consume(); + if (peek() == '/') { consume(); s_lc(); return nexttok(); } + if (peek() == '*') { consume(); s_bc(); return nexttok(); } + t.type = TOK_UNKNOWN; return t; } @@ -127,22 +142,32 @@ nexttok(void) consume(); - if (c == '{') + switch (c) { + case '{': t.type = TOK_LBRACE; - else if (c == '}') + break; + case '}': t.type = TOK_RBRACE; - else if (c == '(') + break; + case '(': t.type = TOK_LPAREN; - else if (c == ')') + break; + case ')': t.type = TOK_RPAREN; - else if (c == ',') + break; + case ',': t.type = TOK_COMMA; - else if (c == ';') + break; + case ';': t.type = TOK_SEMI; - else if (c == '.') + break; + case '.': t.type = TOK_DOT; - else + break; + default: t.type = TOK_UNKNOWN; + break; + } return t; } @@ -151,29 +176,29 @@ const char * tokname(toktype_t t) { switch (t) { - case TOK_CLASS: return "CLASS"; - case TOK_FC: return "FC"; - case TOK_AS: return "AS"; - - case TOK_F32: return "F32"; - case TOK_F64: return "F64"; - - case TOK_I16: return "I16"; - case TOK_I32: return "I32"; - - case TOK_U16: return "U16"; - case TOK_U64: return "U64"; - - case TOK_IDENT: return "IDENT"; - case TOK_DOT: return "DOT"; - case TOK_LBRACE: return "LBRACE"; - case TOK_RBRACE: return "RBRACE"; - case TOK_LPAREN: return "LPAREN"; - case TOK_RPAREN: return "RPAREN"; - case TOK_COMMA: return "COMMA"; - case TOK_SEMI: return "SEMI"; - case TOK_EOF: return "EOF"; - default: return "UNKNOWN"; + case TOK_CLASS: return "CLASS"; + case TOK_FC: return "FC"; + case TOK_AS: return "AS"; + + case TOK_F32: return "F32"; + case TOK_F64: return "F64"; + + case TOK_I16: return "I16"; + case TOK_I32: return "I32"; + + case TOK_U16: return "U16"; + case TOK_U64: return "U64"; + + case TOK_IDENT: return "IDENT"; + case TOK_DOT: return "DOT"; + case TOK_LBRACE: return "LBRACE"; + case TOK_RBRACE: return "RBRACE"; + case TOK_LPAREN: return "LPAREN"; + case TOK_RPAREN: return "RPAREN"; + case TOK_COMMA: return "COMMA"; + case TOK_SEMI: return "SEMI"; + case TOK_EOF: return "EOF"; + default: return "UNKNOWN"; } } diff --git a/lex.h b/lex.h @@ -40,8 +40,8 @@ enum toktype { }; struct tok { - toktype type; - char lexeme[64]; + enum toktype type; + char lexeme[64]; }; struct keyword { @@ -86,6 +86,8 @@ void s_ws(void); void s_lc(void); void s_bc(void); +void lex_init(void); + tok_t id(void); tok_t nexttok(void); diff --git a/pipeline.c b/pipeline.c @@ -0,0 +1,65 @@ +/* + * pipeline.c -- compilation pipeline + */ + +#include <stdio.h> + +#include "lex.h" +#include "pipeline.h" +#include "stat.h" + +static void ofile(void); +static void lex(void); +static void end(void); + +/* open stat.i_infile -> stat.i_fstream */ +static void +ofile(void) +{ + stat.i_fstream = fopen(stat.i_infile, "r"); /* RO mode */ + + if (!stat.i_fstream) { + perror("opening input"); + end(); + } +} + +static void +lex(void) +{ + tok_t tk; + + /* 1. init */ + lex_init(); + + /* 2. lup */ + do { + tk = nexttok(); + + /* debug: can be removed: + * puts(tokname(t.type)); + * if (tk.type == TOK_IDENT) + * printf("(%s)", t.lexeme); + * + * printf("\n"); + */ + (void)tk; + } while (tk.type != TOK_EOF); +} + +static void +end(void) +{ + fclose(stat.i_fstream); /* don't leave the fd open */ +} + +void +compile(void) +{ + ofile(); + + lex(); + + end(); +} + diff --git a/pipeline.h b/pipeline.h @@ -0,0 +1,11 @@ +/* + * pipeline.h -- compile() fwddef + */ + +#ifndef PIPELINE_H +#define PIPELINE_H + +/* run the full compilation pipeline */ +void compile(void); + +#endif /* PIPELINE_H */ diff --git a/slash.c b/slash.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include "cli.h" +#include "pipeline.h" #include "stat.h" int @@ -13,6 +14,7 @@ main(int argc, char** argv) { initstat(); cli(argc, argv); + compile(); return EXIT_SUCCESS; } diff --git a/stat.h b/stat.h @@ -6,11 +6,13 @@ #define STAT_H #include <stdbool.h> +#include <stdio.h> struct stat { char s_outf[512]; /* output file */ char s_asmf[512]; /* ASM output file */ char i_infile[512]; /* src file */ + FILE* i_fstream; /* file stream */ bool mod_pipe; /* if using pipes for communication over tempfiles */ };