slash

slash is a simple type-oriented programming language
Log | Files | Refs | README | LICENSE

lex.c (4269B)


      1 /*
      2  * lex.c -- simple enough dfa-based parser
      3  */
      4 
      5 #include <stdio.h>
      6 #include <string.h>
      7 #include <ctype.h>
      8 
      9 #include "lex.h"
     10 #include "stat.h"
     11 
     12 #define MAX_STACK_SIZE 100
     13 
     14 typedef struct {
     15     tok_t stack[MAX_STACK_SIZE];
     16     int top;
     17 } token_stack_t;
     18 
     19 token_stack_t tokstack;
     20 
     21 static const int nkeyws = sizeof(kws) / sizeof(kws[0]);
     22 
     23 toktype_t
     24 lu_kw(const char *str)
     25 {
     26     for (int i = 0; i < nkeyws; i++) {
     27         if (strcmp(str, kws[i].name) == 0)
     28             return kws[i].type;
     29     }
     30     return TOK_IDENT;
     31 }
     32 
     33 static int cur = EOF;
     34 static int next = EOF;
     35 
     36 void
     37 lex_init(void)
     38 {
     39     cur = fgetc(stat.i_fstream);
     40     next = fgetc(stat.i_fstream);
     41     tokstack.top = -1;
     42 }
     43 
     44 int
     45 peek(void)
     46 {
     47     return cur;
     48 }
     49 
     50 int
     51 peek2(void)
     52 {
     53     return next;
     54 }
     55 
     56 int
     57 consume(void)
     58 {
     59     int c = cur;
     60     cur = next;
     61     next = fgetc(stat.i_fstream);
     62     return c;
     63 }
     64 
     65 static void
     66 skip_ws_c(void)
     67 {
     68     while (1) {
     69         /* whitespace */
     70         while (isspace(peek()))
     71             consume();
     72 
     73         /* line comment */
     74         if (peek() == '/' && peek2() == '/') {
     75             consume(); /* / */
     76             consume(); /* / */
     77             while (peek() != '\n' && peek() != EOF)
     78                 consume();
     79             continue;
     80         }
     81 
     82         /* block comment */
     83         if (peek() == '/' && peek2() == '*') {
     84             consume(); /* / */
     85             consume(); /* * */
     86 
     87             while (peek() != EOF) {
     88                 if (peek() == '*' && peek2() == '/') {
     89                     consume(); /* * */
     90                     consume(); /* / */
     91                     break;
     92                 }
     93                 consume();
     94             }
     95             continue;
     96         }
     97 
     98         break;
     99     }
    100 }
    101 
    102 tok_t
    103 id(void)
    104 {
    105     tok_t t = {0};
    106     int len = 0;
    107     int c;
    108 
    109     while ((c = peek()) != EOF && (isalnum(c) || c == '_')) {
    110         if (len < (int)sizeof(t.lexeme) - 1)
    111             t.lexeme[len++] = (char)consume();
    112         else
    113             consume();
    114     }
    115 
    116     t.lexeme[len] = '\0';
    117     t.type = lu_kw(t.lexeme);
    118 
    119     return t;
    120 }
    121 
    122 void
    123 pushtok(tok_t t)
    124 {
    125     if (tokstack.top < MAX_STACK_SIZE - 1) {
    126         tokstack.stack[++tokstack.top] = t;
    127     } else {
    128         fprintf(stderr, "slash: token stack overflow\n");
    129     }
    130 }
    131 
    132 tok_t
    133 poptok(void)
    134 {
    135     if (tokstack.top >= 0) {
    136         return tokstack.stack[tokstack.top--];
    137     } else {
    138         fprintf(stderr, "slash: token stack underflow\n");
    139         tok_t empty_token = {0};
    140         return empty_token;
    141     }
    142 }
    143 
    144 tok_t
    145 peekstack(void)
    146 {
    147     if (tokstack.top >= 0) {
    148         return tokstack.stack[tokstack.top];
    149     } else {
    150         fprintf(stderr, "slash: token stack is empty\n");
    151         tok_t empty_token = {0};
    152         return empty_token;
    153     }
    154 }
    155 
    156 tok_t
    157 nexttok(void)
    158 {
    159     tok_t t = {0};
    160     int c;
    161 
    162     skip_ws_c();
    163 
    164     c = peek();
    165 
    166     /* EOF */
    167     if (c == EOF) {
    168         t.type = TOK_EOF;
    169         pushtok(t);
    170         return t;
    171     }
    172 
    173     /* identifier or keyword */
    174     if (isalpha(c) || c == '_') {
    175         t = id();
    176         pushtok(t);
    177         return t;
    178     }
    179 
    180     consume();
    181 
    182     switch (c) {
    183     case '{':
    184         t.type = TOK_LBRACE;
    185         break;
    186     case '}':
    187         t.type = TOK_RBRACE;
    188         break;
    189     case '(':
    190         t.type = TOK_LPAREN;
    191         break;
    192     case ')':
    193         t.type = TOK_RPAREN;
    194         break;
    195     case ',':
    196         t.type = TOK_COMMA;
    197         break;
    198     case '.':
    199         t.type = TOK_PERIOD;
    200         break;
    201     default:
    202         t.type = TOK_UNKNOWN;
    203         t.lexeme[0] = (char)c;
    204         t.lexeme[1] = '\0';
    205         break;
    206     }
    207 
    208     pushtok(t);
    209     return t;
    210 }
    211 
    212 const char *
    213 tokname(toktype_t t)
    214 {
    215     switch (t) {
    216     case TOK_CLASS:  return "CLASS";
    217     case TOK_FC:     return "FC";
    218     case TOK_AS:     return "AS";
    219 
    220     case TOK_F32:    return "F32";
    221     case TOK_F64:    return "F64";
    222 
    223     case TOK_I16:    return "I16";
    224     case TOK_I32:    return "I32";
    225 
    226     case TOK_U16:    return "U16";
    227     case TOK_U64:    return "U64";
    228 
    229     case TOK_IDENT:  return "IDENT";
    230 
    231     case TOK_PERIOD: return "PERIOD";
    232     case TOK_LBRACE: return "LBRACE";
    233     case TOK_RBRACE: return "RBRACE";
    234     case TOK_LPAREN: return "LPAREN";
    235     case TOK_RPAREN: return "RPAREN";
    236     case TOK_COMMA:  return "COMMA";
    237 
    238     case TOK_EOF:    return "EOF";
    239 
    240     default:         return "UNKNOWN";
    241     }
    242 }
    243