lex.c (4269B)
1 /* 2 * lex.c -- simple enough dfa-based parser 3 */ 4 5 #include <stdio.h> 6 #include <string.h> 7 #include <ctype.h> 8 9 #include "lex.h" 10 #include "stat.h" 11 12 #define MAX_STACK_SIZE 100 13 14 typedef struct { 15 tok_t stack[MAX_STACK_SIZE]; 16 int top; 17 } token_stack_t; 18 19 token_stack_t tokstack; 20 21 static const int nkeyws = sizeof(kws) / sizeof(kws[0]); 22 23 toktype_t 24 lu_kw(const char *str) 25 { 26 for (int i = 0; i < nkeyws; i++) { 27 if (strcmp(str, kws[i].name) == 0) 28 return kws[i].type; 29 } 30 return TOK_IDENT; 31 } 32 33 static int cur = EOF; 34 static int next = EOF; 35 36 void 37 lex_init(void) 38 { 39 cur = fgetc(stat.i_fstream); 40 next = fgetc(stat.i_fstream); 41 tokstack.top = -1; 42 } 43 44 int 45 peek(void) 46 { 47 return cur; 48 } 49 50 int 51 peek2(void) 52 { 53 return next; 54 } 55 56 int 57 consume(void) 58 { 59 int c = cur; 60 cur = next; 61 next = fgetc(stat.i_fstream); 62 return c; 63 } 64 65 static void 66 skip_ws_c(void) 67 { 68 while (1) { 69 /* whitespace */ 70 while (isspace(peek())) 71 consume(); 72 73 /* line comment */ 74 if (peek() == '/' && peek2() == '/') { 75 consume(); /* / */ 76 consume(); /* / */ 77 while (peek() != '\n' && peek() != EOF) 78 consume(); 79 continue; 80 } 81 82 /* block comment */ 83 if (peek() == '/' && peek2() == '*') { 84 consume(); /* / */ 85 consume(); /* * */ 86 87 while (peek() != EOF) { 88 if (peek() == '*' && peek2() == '/') { 89 consume(); /* * */ 90 consume(); /* / */ 91 break; 92 } 93 consume(); 94 } 95 continue; 96 } 97 98 break; 99 } 100 } 101 102 tok_t 103 id(void) 104 { 105 tok_t t = {0}; 106 int len = 0; 107 int c; 108 109 while ((c = peek()) != EOF && (isalnum(c) || c == '_')) { 110 if (len < (int)sizeof(t.lexeme) - 1) 111 t.lexeme[len++] = (char)consume(); 112 else 113 consume(); 114 } 115 116 t.lexeme[len] = '\0'; 117 t.type = lu_kw(t.lexeme); 118 119 return t; 120 } 121 122 void 123 pushtok(tok_t t) 124 { 125 if (tokstack.top < MAX_STACK_SIZE - 1) { 126 tokstack.stack[++tokstack.top] = t; 127 } else { 128 fprintf(stderr, "slash: token stack overflow\n"); 129 } 130 } 131 132 tok_t 133 poptok(void) 134 { 135 if (tokstack.top >= 0) { 136 return tokstack.stack[tokstack.top--]; 137 } else { 138 fprintf(stderr, "slash: token stack underflow\n"); 139 tok_t empty_token = {0}; 140 return empty_token; 141 } 142 } 143 144 tok_t 145 peekstack(void) 146 { 147 if (tokstack.top >= 0) { 148 return tokstack.stack[tokstack.top]; 149 } else { 150 fprintf(stderr, "slash: token stack is empty\n"); 151 tok_t empty_token = {0}; 152 return empty_token; 153 } 154 } 155 156 tok_t 157 nexttok(void) 158 { 159 tok_t t = {0}; 160 int c; 161 162 skip_ws_c(); 163 164 c = peek(); 165 166 /* EOF */ 167 if (c == EOF) { 168 t.type = TOK_EOF; 169 pushtok(t); 170 return t; 171 } 172 173 /* identifier or keyword */ 174 if (isalpha(c) || c == '_') { 175 t = id(); 176 pushtok(t); 177 return t; 178 } 179 180 consume(); 181 182 switch (c) { 183 case '{': 184 t.type = TOK_LBRACE; 185 break; 186 case '}': 187 t.type = TOK_RBRACE; 188 break; 189 case '(': 190 t.type = TOK_LPAREN; 191 break; 192 case ')': 193 t.type = TOK_RPAREN; 194 break; 195 case ',': 196 t.type = TOK_COMMA; 197 break; 198 case '.': 199 t.type = TOK_PERIOD; 200 break; 201 default: 202 t.type = TOK_UNKNOWN; 203 t.lexeme[0] = (char)c; 204 t.lexeme[1] = '\0'; 205 break; 206 } 207 208 pushtok(t); 209 return t; 210 } 211 212 const char * 213 tokname(toktype_t t) 214 { 215 switch (t) { 216 case TOK_CLASS: return "CLASS"; 217 case TOK_FC: return "FC"; 218 case TOK_AS: return "AS"; 219 220 case TOK_F32: return "F32"; 221 case TOK_F64: return "F64"; 222 223 case TOK_I16: return "I16"; 224 case TOK_I32: return "I32"; 225 226 case TOK_U16: return "U16"; 227 case TOK_U64: return "U64"; 228 229 case TOK_IDENT: return "IDENT"; 230 231 case TOK_PERIOD: return "PERIOD"; 232 case TOK_LBRACE: return "LBRACE"; 233 case TOK_RBRACE: return "RBRACE"; 234 case TOK_LPAREN: return "LPAREN"; 235 case TOK_RPAREN: return "RPAREN"; 236 case TOK_COMMA: return "COMMA"; 237 238 case TOK_EOF: return "EOF"; 239 240 default: return "UNKNOWN"; 241 } 242 } 243