From 0ab477710a4173d4b59a716c7fe13138890aab83 Mon Sep 17 00:00:00 2001 From: CismonX Date: Sun, 3 May 2020 20:54:34 +0800 Subject: [PATCH] bugfix; add mnemonics dumping support --- man/u6ac.1 | 3 ++ src/Makefile.am | 2 +- src/codegen.c | 29 +++++++++--- src/codegen.h | 2 +- src/dump.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ src/dump.h | 37 +++++++++++++++ src/logging.c | 10 ++-- src/mnemonic.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ src/mnemonic.h | 37 +++++++++++++++ src/u6ac.c | 8 +++- 10 files changed, 353 insertions(+), 14 deletions(-) create mode 100644 src/dump.c create mode 100644 src/dump.h create mode 100644 src/mnemonic.c create mode 100644 src/mnemonic.h diff --git a/man/u6ac.1 b/man/u6ac.1 index 580afd2..54c2762 100644 --- a/man/u6ac.1 +++ b/man/u6ac.1 @@ -23,6 +23,9 @@ Compile-time optimization level. \fB\-O0\fR: Turn off optimization. \fB\-O1\fR(d \fB\-\-syntax\-only\fR Only check for lexical and syntactic correctness of the source file, and skips bytecode generation. .TP +\fB\-S\fR +Produce mnemonic pseudo-instructions instead of bytecode. +.TP \fB\-v\fR, \fB\-\-verbose\fR Print extra debug messages to \fBSTDOUT\fR. When this option is enabled, \fIout\-file\fR should not be \fBSTDOUT\fR. .TP diff --git a/src/Makefile.am b/src/Makefile.am index 8efc76e..14a5647 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,7 +2,7 @@ AUTOMAKE_OPTIONS = dejagnu bin_PROGRAMS = u6ac u6a -u6ac_SOURCES = logging.c lexer.c parser.c codegen.c u6ac.c +u6ac_SOURCES = logging.c lexer.c parser.c codegen.c u6ac.c mnemonic.c dump.c u6a_SOURCES = logging.c vm_stack.c vm_pool.c runtime.c u6a.c TEST_DIR = ${srcdir}/../tests diff --git a/src/codegen.c b/src/codegen.c index 4f38f33..542b8d5 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -20,6 +20,7 @@ #include "codegen.h" #include "logging.h" #include "vm_defs.h" +#include "dump.h" #include #include @@ -37,6 +38,7 @@ static FILE* output_stream; static const char* file_name; static bool optimize_const; +static bool dump_mnemonics; static const char* err_codegen = "codegen error"; static const char* info_codegen = "codegen"; @@ -65,6 +67,9 @@ write_bc_header(FILE* restrict output_stream, uint32_t text_len, uint32_t rodata bool u6a_write_prefix(const char* prefix_string) { + if (dump_mnemonics) { + return true; + } if (prefix_string == NULL) { return true; } @@ -78,10 +83,11 @@ u6a_write_prefix(const char* prefix_string) { } void -u6a_codegen_init(FILE* output_stream_, const char* file_name_, bool optimize_const_) { +u6a_codegen_init(FILE* output_stream_, const char* file_name_, bool optimize_const_, bool dump_mnemonics_) { output_stream = output_stream_; file_name = file_name_; optimize_const = optimize_const_; + dump_mnemonics = dump_mnemonics_; } bool @@ -194,13 +200,22 @@ u6a_codegen(struct u6a_ast_node* ast_arr, uint32_t ast_len) { } } } - uint32_t write_len; - if (UNLIKELY(!write_bc_header(output_stream, text_len, rodata_len))) { - write_len = sizeof(struct u6a_bc_header); - goto codegen_failed; + uint32_t write_len = 0; + if (UNLIKELY(dump_mnemonics)) { + if (UNLIKELY(!u6a_dump_mnemonics(output_stream, text_buffer, text_len))) { + goto codegen_failed; + } + if (UNLIKELY(!u6a_dump_data(output_stream, rodata_buffer, rodata_len))) { + goto codegen_failed; + } + } else { + if (UNLIKELY(!write_bc_header(output_stream, text_len, rodata_len))) { + write_len = sizeof(struct u6a_bc_header); + goto codegen_failed; + } + WRITE_SECION(text_buffer, sizeof(struct u6a_vm_ins), text_len, output_stream); + WRITE_SECION(rodata_buffer, sizeof(char), rodata_len, output_stream); } - WRITE_SECION(text_buffer, sizeof(struct u6a_vm_ins), text_len, output_stream); - WRITE_SECION(rodata_buffer, sizeof(char), rodata_len, output_stream); free(bc_buffer); free(stack); u6a_info_verbose(info_codegen, "completed, text: %" PRIu32 ", rodata: %" PRIu32, text_len, rodata_len); diff --git a/src/codegen.h b/src/codegen.h index 8f2bf0c..a3783e9 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -27,7 +27,7 @@ #include void -u6a_codegen_init(FILE* output_stream, const char* file_name, bool optimize_const); +u6a_codegen_init(FILE* output_stream, const char* file_name, bool optimize_const, bool dump_mnemonics); bool u6a_write_prefix(const char* prefix_string); diff --git a/src/dump.c b/src/dump.c new file mode 100644 index 0000000..91c6e62 --- /dev/null +++ b/src/dump.c @@ -0,0 +1,121 @@ +/* + * dump.c - dump utility + * + * Copyright (C) 2020 CismonX + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "common.h" +#include "dump.h" +#include "mnemonic.h" + +#include +#include +#include +#include + +#define D_INC data[idx + __COUNTER__] +#define D_INC_DUP_4 D_INC, D_INC, D_INC, D_INC +#define D_INC_DUP_16 D_INC_DUP_4, D_INC_DUP_4, D_INC_DUP_4, D_INC_DUP_4 +#define F_2B_DUP_2 "%02x%02x %02x%02x " +#define F_2B_DUP_8 F_2B_DUP_2 F_2B_DUP_2 F_2B_DUP_2 F_2B_DUP_2 + +#define fprintf_check(os, format, ...) \ + if (fprintf(os, format, __VA_ARGS__) < 0) { \ + return false; \ + } + +static inline bool +write_mnemonic_ins(FILE* restrict output_stream, uint32_t offset, struct u6a_vm_ins ins) { + fprintf_check(output_stream, "%08x: ", offset); + const char* op = u6a_mnemonic_op(ins.opcode); + if (ins.opcode & U6A_VM_OP_EXTENTED) { + const char* op_ex = u6a_mnemonic_op_ex(ins.opcode_ex); + int op_len = strlen(op); + fprintf_check(output_stream, "%-*s<%-5s>%-*s", op_len, op, op_ex, 3 - op_len, ""); + } else { + fprintf_check(output_stream, "%-10s", op); + } + if (ins.opcode & U6A_VM_OP_OFFSET) { + fprintf_check(output_stream, " 0x%08x\n", ntohl(ins.operand.offset)); + } else if (ins.opcode == u6a_vo_app) { + const char* fn_1 = u6a_mnemonic_fn(ins.operand.fn.first.fn); + int fn_1_len = strlen(fn_1); + if (ins.operand.fn.first.fn & U6A_VM_FN_CHAR) { + const char* ch_1 = u6a_mnemonic_ch(ins.operand.fn.first.ch); + int fn_ch_1_len = fn_1_len + strlen(ch_1); + fprintf_check(output_stream, " %-*s%s,%-*s", fn_1_len, fn_1, ch_1, 5 - fn_ch_1_len, ""); + } else { + fprintf_check(output_stream, " %-*s,%-*s", fn_1_len, fn_1, 5 - fn_1_len, ""); + } + const char* fn_2 = u6a_mnemonic_fn(ins.operand.fn.second.fn); + if (ins.operand.fn.second.fn & U6A_VM_FN_CHAR) { + const char* ch_2 = u6a_mnemonic_ch(ins.operand.fn.second.ch); + int fn_2_len = strlen(fn_2); + fprintf_check(output_stream, " %-*s%-4s\n", fn_2_len, fn_2, ch_2); + } else { + fprintf_check(output_stream, " %s\n", fn_2); + } + } else { + fprintf_check(output_stream, " %c", '\n'); + } + return true; +} + +static inline bool +u6a_hexdump(FILE* restrict output_stream, const char* data, const char* formatted, uint32_t length) { + static const char* format = "%08x: " F_2B_DUP_8 " %.16s\n"; + for (uint32_t idx = 0; idx < length; idx += U6A_HEXDUMP_BYTES_PER_LINE) { + uint32_t remaining = length - idx; + if (UNLIKELY(remaining < U6A_HEXDUMP_BYTES_PER_LINE)) { + fprintf_check(output_stream, "%08x: ", idx); + for (; idx < length - 1; idx += 2) { + fprintf_check(output_stream, "%02x%02x ", data[idx], data[idx + 1]); + } + if (idx == length - 1) { + fprintf_check(output_stream, "%02x ", data[idx]); + } + int blanks = (U6A_HEXDUMP_BYTES_PER_LINE - remaining) / 2 * 5; + fprintf_check(output_stream, " %*s%s\n", blanks, "", formatted + length - remaining); + } else { + fprintf_check(output_stream, format, idx, D_INC_DUP_16, formatted + idx); + } + } + return true; +} + +bool +u6a_dump_mnemonics(FILE* restrict output_stream, struct u6a_vm_ins* data, uint32_t length) { + fprintf_check(output_stream, "%s\n", ".text"); + for (uint32_t idx = 0; idx < length; ++idx) { + if (UNLIKELY(!write_mnemonic_ins(output_stream, idx, data[idx]))) { + return false; + } + } + fprintf_check(output_stream, "%c", '\n'); + return true; +} + +bool +u6a_dump_data(FILE* restrict output_stream, const char* data, uint32_t length) { + fprintf_check(output_stream, "%s\n", ".rodata"); + char* formatted_data = malloc(length); + for (uint32_t idx = 0; idx < length; ++idx) { + formatted_data[idx] = isprint(data[idx]) ? data[idx] : '.'; + } + bool result = u6a_hexdump(output_stream, data, formatted_data, length); + free(formatted_data); + return result; +} diff --git a/src/dump.h b/src/dump.h new file mode 100644 index 0000000..f84c83a --- /dev/null +++ b/src/dump.h @@ -0,0 +1,37 @@ +/* + * dump.h - dump utility definitions + * + * Copyright (C) 2020 CismonX + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef U6A_DUMP_H_ +#define U6A_DUMP_H_ + +#include "vm_defs.h" + +#include +#include +#include + +#define U6A_HEXDUMP_BYTES_PER_LINE 16 + +bool +u6a_dump_mnemonics(FILE* restrict output_stream, struct u6a_vm_ins* data, uint32_t length); + +bool +u6a_dump_data(FILE* restrict output_stream, const char* data, uint32_t length); + +#endif diff --git a/src/logging.c b/src/logging.c index 9f4c855..0c27bf2 100644 --- a/src/logging.c +++ b/src/logging.c @@ -52,7 +52,7 @@ u6a_err_unexpected_eof(const char* stage, int after) { U6A_COLD void u6a_err_unprintable_ch(const char* stage, int got) { - fprintf(stderr, "%s[%s]: printable character or '\\n' expected, 0x%02X given.\n", prog_name, stage, got); + fprintf(stderr, "%s[%s]: printable character or '\\n' expected, 0x%02x given.\n", prog_name, stage, got); } U6A_COLD void @@ -62,7 +62,7 @@ u6a_err_bad_ch(const char* stage, int got) { } else if (LIKELY(got == '\n')) { fprintf(stderr, E_UNRECOGNIZABLE_CHAR "'\\n'.\n", prog_name, stage); } else { - fprintf(stderr, E_UNRECOGNIZABLE_CHAR "0x%02X.\n", prog_name, stage, got); + fprintf(stderr, E_UNRECOGNIZABLE_CHAR "0x%02x.\n", prog_name, stage, got); } } @@ -73,7 +73,11 @@ u6a_err_bad_syntax(const char* stage) { U6A_COLD void u6a_err_write_failed(const char* stage, size_t bytes, const char* filename) { - fprintf(stderr, "%s: [%s] failed writing %zu bytes to %s.\n", prog_name, stage, bytes, filename); + if (bytes > 0) { + fprintf(stderr, "%s: [%s] failed writing %zu bytes to %s.\n", prog_name, stage, bytes, filename); + } else { + fprintf(stderr, "%s: [%s] failed writing data to %s.\n", prog_name, stage, filename); + } } U6A_COLD void diff --git a/src/mnemonic.c b/src/mnemonic.c new file mode 100644 index 0000000..1fb864f --- /dev/null +++ b/src/mnemonic.c @@ -0,0 +1,118 @@ +/* + * mnemonic.c - Unlambda mnemonics + * + * Copyright (C) 2020 CismonX + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "mnemonic.h" +#include "vm_defs.h" + +const char* +u6a_mnemonic_op(uint8_t op) { + switch (op) { + case u6a_vo_app: + return "APP"; + case u6a_vo_la: + return "LA"; + case u6a_vo_sa: + return "SA"; + case u6a_vo_del: + return "DEL"; + case u6a_vo_lc: + return "LC"; + case u6a_vo_xch: + return "XCH"; + default: + U6A_NOT_REACHED(); + } +} + +const char* +u6a_mnemonic_op_ex(uint8_t op_ex) { + switch (op_ex) { + case u6a_vo_ex_print: + return "print"; + default: + U6A_NOT_REACHED(); + } +} + +const char* +u6a_mnemonic_fn(uint8_t fn) { + switch (fn) { + case u6a_vf_placeholder_: + return "acc"; + case u6a_vf_k: + return "k"; + case u6a_vf_s: + return "s"; + case u6a_vf_i: + return "i"; + case u6a_vf_v: + return "v"; + case u6a_vf_c: + return "c"; + case u6a_vf_d: + return "d"; + case u6a_vf_e: + return "e"; + case u6a_vf_in: + return "@"; + case u6a_vf_pipe: + return "|"; + case u6a_vf_out: + return "."; + case u6a_vf_cmp: + return "?"; + case u6a_vf_k1: + return "`k"; + case u6a_vf_s1: + return "`s"; + case u6a_vf_s2: + return "``s"; + case u6a_vf_c1: + return "`c"; + case u6a_vf_d1_s: + case u6a_vf_d1_c: + case u6a_vf_d1_d: + return "`d"; + case u6a_vf_j: + return "~j"; + case u6a_vf_f: + return "~f"; + case u6a_vf_p: + return "~p"; + default: + U6A_NOT_REACHED(); + } +} + +const char* +u6a_mnemonic_ch(uint8_t ch) { + static const char* ascii_table = + "!\0\"\0#\0$\0%\0&\0'\0(\0)\0*\0+\0,\0-\0.\0/\0000\0001\0002\0003\0004\0005\0006\0007\0008\0009\0" + ":\0;\0<\0=\0>\0?\0@\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U\0V\0W\0X\0Y\0Z\0" + "[\0\\\0]\0^\0_\0`\0a\0b\0c\0d\0e\0f\0g\0h\0i\0j\0k\0l\0m\0n\0o\0p\0q\0r\0s\0t\0u\0v\0w\0x\0y\0z\0{\0|\0}\0~"; + if (ch == ' ') { + return ""; + } else if (ch == '\n') { + return ""; + } else if (ch > 32 && ch < 127) { + return ascii_table + ((ch - 33) << 1); + } else { + U6A_NOT_REACHED(); + } +} diff --git a/src/mnemonic.h b/src/mnemonic.h new file mode 100644 index 0000000..376bc9c --- /dev/null +++ b/src/mnemonic.h @@ -0,0 +1,37 @@ +/* + * mnemonic.h - Unlambda mnemonics definitions + * + * Copyright (C) 2020 CismonX + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef U6A_MNEMONIC_H_ +#define U6A_MNEMONIC_H_ + +#include + +const char* +u6a_mnemonic_op(uint8_t op); + +const char* +u6a_mnemonic_op_ex(uint8_t op_ex); + +const char* +u6a_mnemonic_fn(uint8_t fn); + +const char* +u6a_mnemonic_ch(uint8_t ch); + +#endif diff --git a/src/u6ac.c b/src/u6ac.c index 702444c..c58c354 100644 --- a/src/u6ac.c +++ b/src/u6ac.c @@ -39,6 +39,7 @@ struct arg_options { char* output_file_prefix; char* output_file_name; bool optimize_const; + bool dump_mnemonics; bool print_only; }; @@ -75,7 +76,7 @@ process_options(struct arg_options* options, int argc, char** argv) { bool verbose = false; char optimize_level = '1'; while (true) { - int result = getopt_long(argc, argv, "o:O::vHV", long_opts, NULL); + int result = getopt_long(argc, argv, "o:O::SvHV", long_opts, NULL); if (result == -1) { break; } @@ -94,6 +95,9 @@ process_options(struct arg_options* options, int argc, char** argv) { } options->output_file_prefix = optarg ? optarg : "#!/usr/bin/env u6a\n"; break; + case 'S': + options->dump_mnemonics = true; + break; case 'v': verbose = true; break; @@ -208,7 +212,7 @@ main(int argc, char** argv) { if (UNLIKELY(options.output_file == NULL)) { goto terminate; } - u6a_codegen_init(options.output_file, options.output_file_name, options.optimize_const); + u6a_codegen_init(options.output_file, options.output_file_name, options.optimize_const, options.dump_mnemonics); if (UNLIKELY(!u6a_write_prefix(options.output_file_prefix))) { exit_code = EC_ERR_CODEGEN; goto terminate;