143 lines
4.4 KiB
C
143 lines
4.4 KiB
C
/*
|
|
* lexer.c - Unlambda lexer
|
|
*
|
|
* Copyright (C) 2020 CismonX <admin@cismon.net>
|
|
*
|
|
* This file is part of U6a.
|
|
*
|
|
* U6a is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* U6a is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with U6a. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "lexer.h"
|
|
#include "logging.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <stddef.h>
|
|
#include <inttypes.h>
|
|
#include <ctype.h>
|
|
|
|
#define READ_CH() \
|
|
ch = fgetc(input_stream); \
|
|
if (UNLIKELY(ch == EOF)) { \
|
|
u6a_err_unexpected_eof(err_lex, fn); \
|
|
goto lex_failed; \
|
|
} \
|
|
if (LIKELY(isprint(ch) || ch == '\n')) { \
|
|
tokens[len].ch = ch; \
|
|
break; \
|
|
} \
|
|
u6a_err_unprintable_ch(err_lex, ch); \
|
|
goto lex_failed
|
|
|
|
static const char* err_lex = "lex error";
|
|
static const char* info_lex = "lex";
|
|
|
|
bool
|
|
u6a_lex(FILE* restrict input_stream, struct u6a_token** token_arr, uint32_t* token_len) {
|
|
uint32_t token_arr_size = U6A_TOKEN_INIT_LEN;
|
|
struct u6a_token* tokens = malloc(token_arr_size * sizeof(struct u6a_token));
|
|
if (UNLIKELY(tokens == NULL)) {
|
|
u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
|
|
return false;
|
|
}
|
|
int fn, ch;
|
|
uint32_t len = 0;
|
|
while (true) {
|
|
fn = fgetc(input_stream);
|
|
if (fn == '#') {
|
|
do {
|
|
fn = fgetc(input_stream);
|
|
} while (fn != EOF && fn != '\n');
|
|
}
|
|
if (UNLIKELY(fn == EOF)) {
|
|
break;
|
|
}
|
|
if (isspace(fn)) {
|
|
continue;
|
|
}
|
|
if (UNLIKELY(len >= token_arr_size)) {
|
|
token_arr_size *= 2;
|
|
if (token_arr_size > U6A_TOKEN_MAX_LEN) {
|
|
u6a_err_bad_alloc(err_lex, token_arr_size);
|
|
goto lex_failed;
|
|
}
|
|
struct u6a_token* new_tokens = realloc(tokens, token_arr_size * sizeof(struct u6a_token));
|
|
if (new_tokens == NULL) {
|
|
u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
|
|
goto lex_failed;
|
|
}
|
|
tokens = new_tokens;
|
|
}
|
|
switch (fn) {
|
|
case '`':
|
|
tokens[len].fn = u6a_tf_app;
|
|
break;
|
|
case 'S':
|
|
case 's':
|
|
tokens[len].fn = u6a_tf_s;
|
|
break;
|
|
case 'K':
|
|
case 'k':
|
|
tokens[len].fn = u6a_tf_k;
|
|
break;
|
|
case 'I':
|
|
case 'i':
|
|
tokens[len].fn = u6a_tf_i;
|
|
break;
|
|
case 'V':
|
|
case 'v':
|
|
tokens[len].fn = u6a_tf_v;
|
|
break;
|
|
case '.':
|
|
tokens[len].fn = u6a_tf_out;
|
|
READ_CH();
|
|
case 'R':
|
|
case 'r':
|
|
tokens[len] = U6A_TOKEN(u6a_tf_out, '\n');
|
|
break;
|
|
case 'C':
|
|
case 'c':
|
|
tokens[len].fn = u6a_tf_c;
|
|
break;
|
|
case 'D':
|
|
case 'd':
|
|
tokens[len].fn = u6a_tf_d;
|
|
break;
|
|
case '?':
|
|
tokens[len].fn = u6a_tf_cmp;
|
|
READ_CH();
|
|
case '@':
|
|
tokens[len].fn = u6a_tf_in;
|
|
break;
|
|
case '|':
|
|
tokens[len].fn = u6a_tf_pipe;
|
|
break;
|
|
case 'E':
|
|
case 'e':
|
|
tokens[len].fn = u6a_tf_e;
|
|
break;
|
|
default:
|
|
u6a_err_bad_ch(err_lex, fn);
|
|
lex_failed:
|
|
free(tokens);
|
|
return false;
|
|
}
|
|
++len;
|
|
}
|
|
*token_arr = tokens;
|
|
*token_len = len;
|
|
u6a_info_verbose(info_lex, "completed, %" PRIu32 " tokens total", len);
|
|
return true;
|
|
}
|