u6a/src/lexer.c

143 lines
4.4 KiB
C
Raw Normal View History

2020-01-30 10:11:10 +00:00
/*
* lexer.c - Unlambda lexer
*
* Copyright (C) 2020 CismonX <admin@cismon.net>
*
2020-10-10 19:50:45 +00:00
* This file is part of U6a.
*
* U6a is free software: you can redistribute it and/or modify
2020-01-30 10:11:10 +00:00
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
2020-10-10 19:50:45 +00:00
* U6a is distributed in the hope that it will be useful,
2020-01-30 10:11:10 +00:00
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
2020-10-10 19:50:45 +00:00
* along with U6a. If not, see <https://www.gnu.org/licenses/>.
2020-01-30 10:11:10 +00:00
*/
#include "lexer.h"
#include "logging.h"
#include <stdlib.h>
#include <stddef.h>
#include <inttypes.h>
#include <ctype.h>
#define READ_CH() \
ch = fgetc(input_stream); \
if (UNLIKELY(ch == EOF)) { \
u6a_err_unexpected_eof(err_lex, fn); \
goto lex_failed; \
} \
if (LIKELY(isprint(ch) || ch == '\n')) { \
tokens[len].ch = ch; \
break; \
} \
u6a_err_unprintable_ch(err_lex, ch); \
goto lex_failed
static const char* err_lex = "lex error";
static const char* info_lex = "lex";
bool
u6a_lex(FILE* restrict input_stream, struct u6a_token** token_arr, uint32_t* token_len) {
uint32_t token_arr_size = U6A_TOKEN_INIT_LEN;
struct u6a_token* tokens = malloc(token_arr_size * sizeof(struct u6a_token));
if (UNLIKELY(tokens == NULL)) {
u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
return false;
}
int fn, ch;
uint32_t len = 0;
while (true) {
fn = fgetc(input_stream);
if (fn == '#') {
do {
fn = fgetc(input_stream);
} while (fn != EOF && fn != '\n');
}
if (UNLIKELY(fn == EOF)) {
break;
}
if (isspace(fn)) {
continue;
}
if (UNLIKELY(len >= token_arr_size)) {
token_arr_size *= 2;
if (token_arr_size > U6A_TOKEN_MAX_LEN) {
u6a_err_bad_alloc(err_lex, token_arr_size);
goto lex_failed;
}
struct u6a_token* new_tokens = realloc(tokens, token_arr_size * sizeof(struct u6a_token));
if (new_tokens == NULL) {
u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
goto lex_failed;
}
tokens = new_tokens;
}
switch (fn) {
case '`':
tokens[len].fn = u6a_tf_app;
break;
case 'S':
case 's':
tokens[len].fn = u6a_tf_s;
break;
case 'K':
case 'k':
tokens[len].fn = u6a_tf_k;
break;
case 'I':
case 'i':
tokens[len].fn = u6a_tf_i;
break;
case 'V':
case 'v':
tokens[len].fn = u6a_tf_v;
break;
case '.':
tokens[len].fn = u6a_tf_out;
READ_CH();
case 'R':
case 'r':
tokens[len] = U6A_TOKEN(u6a_tf_out, '\n');
break;
case 'C':
case 'c':
tokens[len].fn = u6a_tf_c;
break;
case 'D':
case 'd':
tokens[len].fn = u6a_tf_d;
break;
case '?':
tokens[len].fn = u6a_tf_cmp;
READ_CH();
case '@':
tokens[len].fn = u6a_tf_in;
break;
case '|':
tokens[len].fn = u6a_tf_pipe;
break;
case 'E':
case 'e':
tokens[len].fn = u6a_tf_e;
break;
default:
u6a_err_bad_ch(err_lex, fn);
lex_failed:
free(tokens);
return false;
}
++len;
}
*token_arr = tokens;
*token_len = len;
u6a_info_verbose(info_lex, "completed, %" PRIu32 " tokens total", len);
return true;
}