u6a/src/lexer.c

/*
 * lexer.c - Unlambda lexer
 * 
 * Copyright (C) 2020  CismonX <admin@cismon.net>
 *
 * This file is part of U6a.
 *
 * U6a is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * U6a is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with U6a.  If not, see <https://www.gnu.org/licenses/>.
 */

#include "lexer.h"
#include "logging.h"

#include <stdlib.h>
#include <stddef.h>
#include <inttypes.h>
#include <ctype.h>

#define READ_CH()                              \
    ch = fgetc(input_stream);                  \
    if (UNLIKELY(ch == EOF)) {                 \
        u6a_err_unexpected_eof(err_lex, fn);   \
        goto lex_failed;                       \
    }                                          \
    if (LIKELY(isprint(ch) || ch == '\n')) {   \
        tokens[len].ch = ch;                   \
        break;                                 \
    }                                          \
    u6a_err_unprintable_ch(err_lex, ch);       \
    goto lex_failed

static const char* err_lex = "lex error";
static const char* info_lex = "lex";

bool
u6a_lex(FILE* restrict input_stream, struct u6a_token** token_arr, uint32_t* token_len) {
    uint32_t token_arr_size = U6A_TOKEN_INIT_LEN;
    struct u6a_token* tokens = malloc(token_arr_size * sizeof(struct u6a_token));
    if (UNLIKELY(tokens == NULL)) {
        u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
        return false;
    }
    int fn, ch;
    uint32_t len = 0;
    while (true) {
        fn = fgetc(input_stream);
        if (fn == '#') {
            do {
                fn = fgetc(input_stream);
            } while (fn != EOF && fn != '\n');
        }
        if (UNLIKELY(fn == EOF)) {
            break;
        }
        if (isspace(fn)) {
            continue;
        }
        if (UNLIKELY(len >= token_arr_size)) {
            token_arr_size *= 2;
            if (token_arr_size > U6A_TOKEN_MAX_LEN) {
                u6a_err_bad_alloc(err_lex, token_arr_size);
                goto lex_failed;
            }
            struct u6a_token* new_tokens = realloc(tokens, token_arr_size * sizeof(struct u6a_token));
            if (new_tokens == NULL) {
                u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));
                goto lex_failed;
            }
            tokens = new_tokens;
        }
        switch (fn) {
            case '`':
                tokens[len].fn = u6a_tf_app;
                break;
            case 'S':
            case 's':
                tokens[len].fn = u6a_tf_s;
                break;
            case 'K':
            case 'k':
                tokens[len].fn = u6a_tf_k;
                break;
            case 'I':
            case 'i':
                tokens[len].fn = u6a_tf_i;
                break;
            case 'V':
            case 'v':
                tokens[len].fn = u6a_tf_v;
                break;
            case '.':
                tokens[len].fn = u6a_tf_out;
                READ_CH();
            case 'R':
            case 'r':
                tokens[len] = U6A_TOKEN(u6a_tf_out, '\n');
                break;
            case 'C':
            case 'c':
                tokens[len].fn = u6a_tf_c;
                break;
            case 'D':
            case 'd':
                tokens[len].fn = u6a_tf_d;
                break;
            case '?':
                tokens[len].fn = u6a_tf_cmp;
                READ_CH();
            case '@':
                tokens[len].fn = u6a_tf_in;
                break;
            case '|':
                tokens[len].fn = u6a_tf_pipe;
                break;
            case 'E':
            case 'e':
                tokens[len].fn = u6a_tf_e;
                break;
            default:
                u6a_err_bad_ch(err_lex, fn);
                lex_failed:
                free(tokens);
                return false;
        }
        ++len;
    }
    *token_arr = tokens;
    *token_len = len;
    u6a_info_verbose(info_lex, "completed, %" PRIu32 " tokens total", len);
    return true;
}
initial commit 2020-01-30 10:11:10 +00:00			`/*`
			`* lexer.c - Unlambda lexer`
			`*`
			`* Copyright (C) 2020 CismonX <admin@cismon.net>`
			`*`
Update copyright notice 2020-10-10 19:50:45 +00:00			`* This file is part of U6a.`
			`*`
			`* U6a is free software: you can redistribute it and/or modify`
initial commit 2020-01-30 10:11:10 +00:00			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 3 of the License, or`
			`* (at your option) any later version.`
			`*`
Update copyright notice 2020-10-10 19:50:45 +00:00			`* U6a is distributed in the hope that it will be useful,`
initial commit 2020-01-30 10:11:10 +00:00			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
Update copyright notice 2020-10-10 19:50:45 +00:00			`* along with U6a. If not, see <https://www.gnu.org/licenses/>.`
initial commit 2020-01-30 10:11:10 +00:00			`*/`

			`#include "lexer.h"`
			`#include "logging.h"`

			`#include <stdlib.h>`
			`#include <stddef.h>`
			`#include <inttypes.h>`
			`#include <ctype.h>`

			`#define READ_CH() \`
			`ch = fgetc(input_stream); \`
			`if (UNLIKELY(ch == EOF)) { \`
			`u6a_err_unexpected_eof(err_lex, fn); \`
			`goto lex_failed; \`
			`} \`
			`if (LIKELY(isprint(ch) \|\| ch == '\n')) { \`
			`tokens[len].ch = ch; \`
			`break; \`
			`} \`
			`u6a_err_unprintable_ch(err_lex, ch); \`
			`goto lex_failed`

			`static const char* err_lex = "lex error";`
			`static const char* info_lex = "lex";`

			`bool`
			`u6a_lex(FILE* restrict input_stream, struct u6a_token** token_arr, uint32_t* token_len) {`
			`uint32_t token_arr_size = U6A_TOKEN_INIT_LEN;`
			`struct u6a_token* tokens = malloc(token_arr_size * sizeof(struct u6a_token));`
			`if (UNLIKELY(tokens == NULL)) {`
			`u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));`
			`return false;`
			`}`
			`int fn, ch;`
			`uint32_t len = 0;`
			`while (true) {`
			`fn = fgetc(input_stream);`
			`if (fn == '#') {`
			`do {`
			`fn = fgetc(input_stream);`
			`} while (fn != EOF && fn != '\n');`
			`}`
			`if (UNLIKELY(fn == EOF)) {`
			`break;`
			`}`
			`if (isspace(fn)) {`
			`continue;`
			`}`
			`if (UNLIKELY(len >= token_arr_size)) {`
			`token_arr_size *= 2;`
			`if (token_arr_size > U6A_TOKEN_MAX_LEN) {`
			`u6a_err_bad_alloc(err_lex, token_arr_size);`
			`goto lex_failed;`
			`}`
			`struct u6a_token* new_tokens = realloc(tokens, token_arr_size * sizeof(struct u6a_token));`
			`if (new_tokens == NULL) {`
			`u6a_err_bad_alloc(err_lex, token_arr_size * sizeof(struct u6a_token));`
			`goto lex_failed;`
			`}`
			`tokens = new_tokens;`
			`}`
			`switch (fn) {`
			case '`':
			`tokens[len].fn = u6a_tf_app;`
			`break;`
			`case 'S':`
			`case 's':`
			`tokens[len].fn = u6a_tf_s;`
			`break;`
			`case 'K':`
			`case 'k':`
			`tokens[len].fn = u6a_tf_k;`
			`break;`
			`case 'I':`
			`case 'i':`
			`tokens[len].fn = u6a_tf_i;`
			`break;`
			`case 'V':`
			`case 'v':`
			`tokens[len].fn = u6a_tf_v;`
			`break;`
			`case '.':`
			`tokens[len].fn = u6a_tf_out;`
			`READ_CH();`
			`case 'R':`
			`case 'r':`
			`tokens[len] = U6A_TOKEN(u6a_tf_out, '\n');`
			`break;`
			`case 'C':`
			`case 'c':`
			`tokens[len].fn = u6a_tf_c;`
			`break;`
			`case 'D':`
			`case 'd':`
			`tokens[len].fn = u6a_tf_d;`
			`break;`
			`case '?':`
			`tokens[len].fn = u6a_tf_cmp;`
			`READ_CH();`
			`case '@':`
			`tokens[len].fn = u6a_tf_in;`
			`break;`
			`case '\|':`
			`tokens[len].fn = u6a_tf_pipe;`
			`break;`
			`case 'E':`
			`case 'e':`
			`tokens[len].fn = u6a_tf_e;`
			`break;`
			`default:`
			`u6a_err_bad_ch(err_lex, fn);`
			`lex_failed:`
			`free(tokens);`
			`return false;`
			`}`
			`++len;`
			`}`
			`*token_arr = tokens;`
			`*token_len = len;`
			`u6a_info_verbose(info_lex, "completed, %" PRIu32 " tokens total", len);`
			`return true;`
			`}`