/** * ctlseqs.c - helper library for terminal control sequences * * Copyright (C) 2020 CismonX * * This file is part of the ctlseqs library. * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this library. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif // HAVE_CONFIG_H #include "ctlseqs.h" #include #include #include #include #include #include #include #include #include #ifdef HAVE___BUILTIN_EXPECT # define CTLSEQS_LIKELY(expr) __builtin_expect(!!(expr), 1) # define CTLSEQS_UNLIKELY(expr) __builtin_expect(!!(expr), 0) #else # define CTLSEQS_LIKELY(expr) (expr) # define CTLSEQS_UNLIKELY(expr) (expr) #endif // HAVE___BUILTIN_EXPECT #ifdef HAVE___BUILTIN_UNREACHABLE # define CTLSEQS_UNREACHABLE() __builtin_unreachable() #else # define CTLSEQS_UNREACHABLE() #endif // HAVE___BUILTIN_UNREACHABLE #ifdef HAVE_FUNC_ATTRIBUTE_COLD # define CTLSEQS_COLD __attribute__((cold)) #else # define CTLSEQS_COLD #endif // HAVE_FUNC_ATTRIBUTE_COLD #ifdef HAVE_FUNC_ATTRIBUTE_HOT # define CTLSEQS_HOT __attribute__((hot)) #else # define CTLSEQS_HOT #endif // HAVE_FUNC_ATTRIBUTE_HOT #ifndef CTLSEQS_TRIE_INIT_SIZE # define CTLSEQS_TRIE_INIT_SIZE 16 #endif // !CTLSEQS_TRIE_INIT_SIZE #define CTLSEQS_VALUE_STR(stop_cond) \ for (cnt = 0; ; ++cnt) { \ num = seq_val[cnt]; \ if (stop_cond) { \ break; \ } \ } \ buf[0].num = cnt; \ buf[1].str = seq_val; \ *seq = seq_val + cnt; \ *buf_offset += 2; \ return true #define CTLSEQS_VALUE_NUM(base) \ errno = 0; \ num = strtoul(seq_val, seq, base); \ if (errno != 0 || seq_val == *seq) { \ return false; \ } \ buf[0].num = num; \ *buf_offset += 1; \ return true enum ctlseqs_placeholder { ctlseqs_ph_begin = 0x0e, ctlseqs_ph_num = ctlseqs_ph_begin, ctlseqs_ph_nums, ctlseqs_ph_str, ctlseqs_ph_cmdstr, ctlseqs_ph_csi_param, ctlseqs_ph_csi_intmd, ctlseqs_ph_hexnum, ctlseqs_ph_chrstr, ctlseqs_ph_end, }; enum ctlseqs_state { ctlseqs_state_none, ctlseqs_state_esc, ctlseqs_state_csi, ctlseqs_state_csi_intmd, ctlseqs_state_cmdstr, ctlseqs_state_ss, ctlseqs_state_ctlstr, ctlseqs_state_str_end, ctlseqs_state_done, ctlseqs_state_err, }; struct ctlseqs_trie_node { ssize_t value; int placeholder; struct ctlseqs_trie_node *next; struct ctlseqs_trie_node *children[128]; }; struct ctlseqs_match_ctx { ssize_t value; struct ctlseqs_trie_node const *node; char *buf; }; struct ctlseqs_matcher { struct ctlseqs_trie_node root; struct ctlseqs_trie_node *node_pool; size_t node_pool_size; struct ctlseqs_match_ctx *match_stack; }; struct ctlseqs_reader { union ctlseqs_value *buffer; size_t readlen; struct pollfd pollfd; bool no_poll; bool retain_partial; char *rbuf; size_t buf_start; size_t buf_end; enum ctlseqs_state state; }; CTLSEQS_HOT static int ctlseqs_poll(struct pollfd *pollfd, int timeout) { int nevents = poll(pollfd, 1, timeout); if (nevents == -1) { return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR; } if (nevents == 0) { return CTLSEQS_TIMEOUT; } if (CTLSEQS_LIKELY(pollfd->revents & POLLIN)) { return CTLSEQS_OK; } return pollfd->revents & POLLHUP ? CTLSEQS_EOF : CTLSEQS_ERROR; } CTLSEQS_HOT static int ctlseqs_do_read(struct ctlseqs_reader *reader) { ssize_t nbytes = read(reader->pollfd.fd, reader->rbuf + reader->buf_start, reader->readlen - reader->buf_start); if (CTLSEQS_UNLIKELY(nbytes == -1)) { if (errno == EAGAIN || errno == EWOULDBLOCK) { return CTLSEQS_TIMEOUT; } return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR; } if (CTLSEQS_UNLIKELY(nbytes == 0)) { return CTLSEQS_EOF; } reader->buf_end += nbytes; return CTLSEQS_OK; } CTLSEQS_HOT static enum ctlseqs_state ctlseqs_state_transit(enum ctlseqs_state state, char ch) { switch (state) { case ctlseqs_state_none: return ch == 0x1b ? ctlseqs_state_esc : ctlseqs_state_err; case ctlseqs_state_esc: switch (ch) { case '[': return ctlseqs_state_csi; case 'N': case 'O': return ctlseqs_state_ss; case 'P': case ']': case '_': case '^': return ctlseqs_state_cmdstr; case 'X': return ctlseqs_state_ctlstr; default: return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; } case ctlseqs_state_csi: if (ch >= '0' && ch <= '?') { return state; } return (ch >= ' ' && ch <= '/') ? ctlseqs_state_csi_intmd : ctlseqs_state_err; case ctlseqs_state_csi_intmd: if (ch >= ' ' && ch <= '/') { return state; } return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; case ctlseqs_state_cmdstr: if (ch == 0x1b) { return ctlseqs_state_str_end; } else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) { return ctlseqs_state_err; } else { return state; } case ctlseqs_state_ss: return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; case ctlseqs_state_ctlstr: return ch == 0x1b ? ctlseqs_state_str_end : state; case ctlseqs_state_str_end: return ch == '\\' ? ctlseqs_state_done : ctlseqs_state_err; default: CTLSEQS_UNREACHABLE(); return state; } } CTLSEQS_HOT bool ctlseqs_fetch(char **seq, int type, union ctlseqs_value *buf, size_t *buf_offset) { unsigned long cnt, num; char *seq_val = *seq; buf += *buf_offset; switch (type) { case ctlseqs_ph_num: CTLSEQS_VALUE_NUM(10); case ctlseqs_ph_nums: for (cnt = 1; ; ++cnt) { num = strtoul(seq_val, seq, 10); if (errno != 0 || seq_val == *seq) { return false; } buf[cnt].num = num; if (seq_val[0] == ';') { ++seq_val; } else { break; } } buf[0].num = cnt; *buf_offset += 1; return true; case ctlseqs_ph_str: CTLSEQS_VALUE_STR(num < ' ' || num > '~'); case ctlseqs_ph_cmdstr: CTLSEQS_VALUE_STR(num < 0x08 || num > '~' || (num > 0x0d && num < ' ')); case ctlseqs_ph_csi_param: CTLSEQS_VALUE_STR(num < '0' || num > '?'); case ctlseqs_ph_csi_intmd: CTLSEQS_VALUE_STR(num < ' ' || num > '/'); case ctlseqs_ph_hexnum: CTLSEQS_VALUE_NUM(16); case ctlseqs_ph_chrstr: CTLSEQS_VALUE_STR(num > 0x7f); default: CTLSEQS_UNREACHABLE(); return false; } } CTLSEQS_HOT static ssize_t ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) { static struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_match_ctx match_ctx = { .node = matcher == NULL ? &empty_node : &matcher->root, .buf = reader->rbuf + reader->buf_start + 1, }; ssize_t match_stack_top = -1; size_t buffer_offset = 0; while (true) { match_ctx.value = match_ctx.node->value; if (match_ctx.value == -1) { // Match character as-is. match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.buf++[0]]; if (match_ctx.node == NULL) { break; } } else if (match_ctx.value < -1) { // Match placeholder. start_match: old_node = match_ctx.node; match_ctx.node = match_ctx.node->children[-match_ctx.value]; // If other placeholders exist on the same level, save them // to the stack for future retry. if (match_ctx.node->next != NULL) { matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { .value = match_ctx.node->next->placeholder, .node = old_node, .buf = match_ctx.buf, }; } if (!ctlseqs_fetch(&match_ctx.buf, -match_ctx.value, reader->buffer, &buffer_offset)) { break; } } else { // Successfully matched. return match_ctx.value; } } if (match_stack_top >= 0) { match_ctx = matcher->match_stack[match_stack_top--]; goto start_match; } return CTLSEQS_NOMATCH; } CTLSEQS_HOT static ssize_t ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) { ssize_t retval = CTLSEQS_PARTIAL; char const *buf = reader->rbuf + reader->buf_start; size_t idx, len = reader->buf_end - reader->buf_start; enum ctlseqs_state state = reader->state; for (idx = 0; idx < len; ++idx) { state = ctlseqs_state_transit(state, buf[idx]); if (state == ctlseqs_state_err) { retval = CTLSEQS_NOSEQ; for (; idx < len; ++idx) { if (buf[idx] == 0x1b) { break; } } reader->buffer[0].num = idx; reader->buffer[1].str = buf; break; } if (state == ctlseqs_state_done) { retval = ctlseqs_match_pattern(reader, matcher); break; } } reader->state = state; reader->buf_start += idx; if (reader->buf_start == reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } return retval; } struct ctlseqs_matcher * ctlseqs_matcher_init() { struct ctlseqs_matcher *matcher = malloc(sizeof(struct ctlseqs_matcher)); if (CTLSEQS_LIKELY(matcher != NULL)) { *matcher = (struct ctlseqs_matcher) { .node_pool_size = 0 }; } return matcher; } int ctlseqs_matcher_setopt(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_opts const *options) { size_t node_pool_idx = 0, max_format_size = 0; if (matcher->node_pool == NULL) { matcher->node_pool_size = sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_INIT_SIZE; matcher->node_pool = malloc(matcher->node_pool_size); if (CTLSEQS_UNLIKELY(matcher->node_pool == NULL)) { return CTLSEQS_NOMEM; } } matcher->root = (struct ctlseqs_trie_node) { .value = -1 }; for (size_t i = 0; i < options->npatterns; ++i) { char const *pattern = options->patterns[i]; struct ctlseqs_trie_node *node = &matcher->root; for (size_t j = 1; ; ++j) { int ch = pattern[j]; if (ch == '\0') { node->value = i; if (j > max_format_size) { max_format_size = j; } break; } struct ctlseqs_trie_node *old_node = node; node = node->children[ch]; if (node != NULL) { continue; } if (CTLSEQS_UNLIKELY(++node_pool_idx >= matcher->node_pool_size)) { struct ctlseqs_trie_node *new_pool = realloc(matcher->node_pool, matcher->node_pool_size * 2); if (CTLSEQS_UNLIKELY(new_pool == NULL)) { return CTLSEQS_NOMEM; } matcher->node_pool = new_pool; matcher->node_pool_size *= 2; } old_node->children[ch] = node = &matcher->node_pool[node_pool_idx]; *node = (struct ctlseqs_trie_node) { .value = -1 }; if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) { continue; } // Node with multiple placeholders contains negated offset of child // which is the head of the linked list. if (old_node->value < -1) { node->next = old_node->children[-old_node->value]; node->next->value = old_node->value; } old_node->value = -ch; } } size_t stack_size = sizeof(struct ctlseqs_match_ctx) * max_format_size; struct ctlseqs_match_ctx *new_stack = realloc(matcher->match_stack, stack_size); if (CTLSEQS_UNLIKELY(new_stack == NULL)) { return CTLSEQS_NOMEM; } matcher->match_stack = new_stack; return CTLSEQS_OK; } CTLSEQS_COLD void ctlseqs_matcher_free(struct ctlseqs_matcher *matcher) { if (matcher != NULL) { free(matcher->node_pool); free(matcher); } } struct ctlseqs_reader * ctlseqs_reader_init() { struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader)); if (CTLSEQS_LIKELY(reader != NULL)) { *reader = (struct ctlseqs_reader) { 0 }; } return reader; } int ctlseqs_reader_setopt(struct ctlseqs_reader *reader, struct ctlseqs_reader_opts const *options) { reader->buffer = options->buffer; size_t readlen = options->maxlen; if (reader->readlen != readlen) { char *rbuf; if (reader->rbuf == NULL) { rbuf = malloc(readlen); } else { rbuf = realloc(reader->rbuf, readlen); } if (rbuf == NULL) { return CTLSEQS_NOMEM; } reader->readlen = readlen; reader->rbuf = rbuf; if (reader->buf_end > reader->readlen) { reader->buf_end = reader->readlen; } } reader->pollfd = (struct pollfd) { .fd = options->fd, .events = POLLIN }; reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL; reader->retain_partial = options->flags & CTLSEQS_READER_RETAIN_PARTIAL; return CTLSEQS_OK; } CTLSEQS_HOT ssize_t ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher, int timeout) { if (reader->buf_start != 0) { ssize_t match_result = ctlseqs_match(reader, matcher); if (match_result >= 0) { return match_result; } } if (!reader->no_poll) { int poll_result = ctlseqs_poll(&reader->pollfd, timeout); if (poll_result < 0) { return poll_result; } } int read_result = ctlseqs_do_read(reader); if (CTLSEQS_UNLIKELY(read_result < 0)) { return read_result; } return ctlseqs_match(reader, matcher); } CTLSEQS_COLD void ctlseqs_reader_free(struct ctlseqs_reader *reader) { if (reader != NULL) { free(reader->rbuf); free(reader); } }