/** * ctlseqs.c - helper library for terminal control sequences * * Copyright (C) 2020 CismonX * * This file is part of the ctlseqs library. * * ctlseqs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ctlseqs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ctlseqs. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif // HAVE_CONFIG_H #include "ctlseqs.h" #include #include #include #include #include #include #include #include #ifdef HAVE___BUILTIN_EXPECT # define CTLSEQS_LIKELY(expr) __builtin_expect(!!(expr), 1) # define CTLSEQS_UNLIKELY(expr) __builtin_expect(!!(expr), 0) #else # define CTLSEQS_LIKELY(expr) (expr) # define CTLSEQS_UNLIKELY(expr) (expr) #endif // HAVE___BUILTIN_EXPECT #ifdef HAVE___BUILTIN_UNREACHABLE # define CTLSEQS_UNREACHABLE() __builtin_unreachable() #else # define CTLSEQS_UNREACHABLE() #endif // HAVE___BUILTIN_UNREACHABLE #ifdef HAVE_FUNC_ATTRIBUTE_COLD # define CTLSEQS_COLD __attribute__((cold)) #else # define CTLSEQS_COLD #endif // HAVE_FUNC_ATTRIBUTE_COLD #ifdef HAVE_FUNC_ATTRIBUTE_HOT # define CTLSEQS_HOT __attribute__((hot)) #else # define CTLSEQS_HOT #endif // HAVE_FUNC_ATTRIBUTE_HOT #ifndef CTLSEQS_TRIE_INIT_SIZE # define CTLSEQS_TRIE_INIT_SIZE 16 #endif // !CTLSEQS_TRIE_INIT_SIZE #define CTLSEQS_VALUE_STR(stop_cond) \ for (cnt = 0; ; ++cnt) { \ num = seq_val[cnt]; \ if (stop_cond) { \ break; \ } \ } \ buf_val[0].num = cnt; \ buf_val[1].str = seq_val; \ *seq = seq_val + cnt; \ *buf += 2; \ return true #define CTLSEQS_VALUE_NUM(base) \ errno = 0; \ num = strtoul(seq_val, seq, base); \ if (errno || seq_val == *seq) { \ return false; \ } \ buf_val[0].num = num; \ ++*buf; \ return true enum ctlseqs_placeholder { ctlseqs_ph_begin = 0x0e, ctlseqs_ph_num = ctlseqs_ph_begin, ctlseqs_ph_nums, ctlseqs_ph_str, ctlseqs_ph_cmdstr, ctlseqs_ph_csi_param, ctlseqs_ph_csi_intmd, ctlseqs_ph_hexnum, ctlseqs_ph_chrstr, ctlseqs_ph_end, }; enum ctlseqs_state { ctlseqs_state_none, ctlseqs_state_esc, ctlseqs_state_csi, ctlseqs_state_csi_intmd, ctlseqs_state_cmdstr, ctlseqs_state_ss, ctlseqs_state_ctlstr, ctlseqs_state_str_end, ctlseqs_state_done, ctlseqs_state_err, }; struct ctlseqs_trie_node { ssize_t value; ssize_t placeholder; struct ctlseqs_trie_node *next; struct ctlseqs_trie_node *children[128]; }; struct ctlseqs_match_ctx { ssize_t value; struct ctlseqs_trie_node const *node; char *rbuf; union ctlseqs_value *result; }; struct ctlseqs_matcher { struct ctlseqs_trie_node root; struct ctlseqs_trie_node *node_pool; size_t node_pool_size; struct ctlseqs_match_ctx *match_stack; }; struct ctlseqs_reader { union ctlseqs_value *buffer; size_t readlen; struct pollfd pollfd; char *rbuf; size_t buf_start; size_t buf_end; size_t last_idx; enum ctlseqs_state state; bool no_poll; }; CTLSEQS_HOT static inline int ctlseqs_poll(struct pollfd *pollfd, int timeout) { int nevents = poll(pollfd, 1, timeout); if (nevents == -1) { return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR; } if (nevents == 0) { return CTLSEQS_TIMEOUT; } if (CTLSEQS_LIKELY(pollfd->revents & POLLIN)) { return CTLSEQS_OK; } return pollfd->revents & POLLHUP ? CTLSEQS_EOF : CTLSEQS_ERROR; } CTLSEQS_HOT static inline int ctlseqs_do_read(struct ctlseqs_reader *reader) { size_t offset = reader->buf_start + reader->last_idx; ssize_t nbytes = read(reader->pollfd.fd, reader->rbuf + offset, reader->readlen - offset); if (CTLSEQS_UNLIKELY(nbytes == -1)) { if (errno == EAGAIN || errno == EWOULDBLOCK) { return CTLSEQS_TIMEOUT; } return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR; } if (CTLSEQS_UNLIKELY(nbytes == 0)) { return CTLSEQS_EOF; } reader->buf_end += nbytes; return CTLSEQS_OK; } CTLSEQS_HOT static enum ctlseqs_state ctlseqs_state_transition(enum ctlseqs_state state, char ch) { switch (state) { case ctlseqs_state_none: return ch == 0x1b ? ctlseqs_state_esc : ctlseqs_state_err; case ctlseqs_state_esc: switch (ch) { case '[': return ctlseqs_state_csi; case 'N': case 'O': return ctlseqs_state_ss; case 'P': case ']': case '_': case '^': return ctlseqs_state_cmdstr; case 'X': return ctlseqs_state_ctlstr; default: return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; } case ctlseqs_state_csi: if (ch >= '0' && ch <= '?') { return state; } else if (ch >= ' ' && ch <= '/') { return ctlseqs_state_csi_intmd; } return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; case ctlseqs_state_csi_intmd: if (ch >= ' ' && ch <= '/') { return state; } return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; case ctlseqs_state_cmdstr: if (ch == 0x1b) { return ctlseqs_state_str_end; } else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) { return ctlseqs_state_err; } return state; case ctlseqs_state_ss: return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err; case ctlseqs_state_ctlstr: return ch == 0x1b ? ctlseqs_state_str_end : state; case ctlseqs_state_str_end: return ch == '\\' ? ctlseqs_state_done : ctlseqs_state_err; default: CTLSEQS_UNREACHABLE(); return state; } } CTLSEQS_HOT static bool ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf) { unsigned long cnt, num; char *seq_val = *seq; union ctlseqs_value *buf_val = *buf; switch (type) { case ctlseqs_ph_num: CTLSEQS_VALUE_NUM(10); case ctlseqs_ph_nums: for (cnt = 1; ; ++cnt) { errno = 0; num = strtoul(seq_val, seq, 10); if (errno || seq_val == *seq) { return false; } buf_val[cnt].num = num; if ((*seq)[0] != ';') { break; } seq_val = *seq + 1; } buf_val[0].num = cnt; *buf += cnt + 1; return true; case ctlseqs_ph_str: CTLSEQS_VALUE_STR(num < ' ' || num > '~'); case ctlseqs_ph_cmdstr: CTLSEQS_VALUE_STR(num < 0x08 || num > '~' || (num > 0x0d && num < ' ')); case ctlseqs_ph_csi_param: CTLSEQS_VALUE_STR(num < '0' || num > '?'); case ctlseqs_ph_csi_intmd: CTLSEQS_VALUE_STR(num < ' ' || num > '/'); case ctlseqs_ph_hexnum: CTLSEQS_VALUE_NUM(16); case ctlseqs_ph_chrstr: CTLSEQS_VALUE_STR(num > 0x7f); default: CTLSEQS_UNREACHABLE(); return false; } } CTLSEQS_HOT static ssize_t ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) { struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_match_ctx match_ctx = { .node = matcher == NULL ? &empty_node : &matcher->root, .rbuf = reader->rbuf + reader->buf_start + 1, .result = reader->buffer, }; ssize_t match_stack_top = -1; while (true) { match_ctx.value = match_ctx.node->value; if (match_ctx.value == -1) { match_character: match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]]; if (match_ctx.node == NULL) { break; } } else if (match_ctx.value < -1) { match_placeholder: old_node = match_ctx.node; match_ctx.node = match_ctx.node->children[-match_ctx.value]; struct ctlseqs_trie_node *next_node = match_ctx.node->next; matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { .value = next_node == NULL ? 0 : -next_node->placeholder, .node = old_node, .rbuf = match_ctx.rbuf, .result = match_ctx.result, }; if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, &match_ctx.result)) { break; } } else { // Successfully matched. return match_ctx.value; } } if (match_stack_top >= 0) { match_ctx = matcher->match_stack[match_stack_top--]; if (match_ctx.value == 0) { goto match_character; } else { goto match_placeholder; } } return CTLSEQS_NOMATCH; } CTLSEQS_HOT static ssize_t ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) { ssize_t retval = CTLSEQS_PARTIAL; char const *buf = reader->rbuf + reader->buf_start; size_t idx, len = reader->buf_end - reader->buf_start; enum ctlseqs_state state = reader->state; for (idx = reader->last_idx; idx < len; ++idx) { state = ctlseqs_state_transition(state, buf[idx]); if (state == ctlseqs_state_err) { // Anything before next ESC is definitely not a control sequence. for (; idx < len; ++idx) { if (buf[idx] == 0x1b) { break; } } retval = CTLSEQS_NOSEQ; break; } if (state == ctlseqs_state_done) { retval = ctlseqs_match_pattern(reader, matcher); ++idx; break; } } if (retval < 0) { reader->buffer[0].num = idx; reader->buffer[1].str = buf; } if (retval == CTLSEQS_PARTIAL) { reader->last_idx = idx; if (CTLSEQS_UNLIKELY(reader->buf_start + idx == reader->readlen)) { // Buffer is full but a match is still pending. // This may happen when the reader's maxlen option is not large enough to hold a sequence, // or when the the sequences are produced faster than consumed. if (reader->buf_start > reader->readlen / 2) { memcpy(reader->rbuf, reader->rbuf + reader->buf_start, idx); reader->buf_start = 0; reader->buf_end = idx; } else { // We could memmove() here, but having a buffer no larger than twice the size of a sequence // is hardly what a normal program would desire. retval = CTLSEQS_NOMEM; } } } else { reader->buf_start += idx; reader->last_idx = 0; if (reader->buf_start == reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } } reader->state = state >= ctlseqs_state_done ? ctlseqs_state_none : state; return retval; } struct ctlseqs_matcher * ctlseqs_matcher_init() { struct ctlseqs_matcher *matcher = malloc(sizeof(struct ctlseqs_matcher)); if (CTLSEQS_LIKELY(matcher != NULL)) { *matcher = (struct ctlseqs_matcher) { .node_pool_size = 0 }; } return matcher; } int ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_options const *options) { size_t node_pool_idx = 0, max_format_size = 0; if (matcher->node_pool == NULL) { matcher->node_pool_size = sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_INIT_SIZE; matcher->node_pool = malloc(matcher->node_pool_size); if (CTLSEQS_UNLIKELY(matcher->node_pool == NULL)) { return CTLSEQS_NOMEM; } } matcher->root = (struct ctlseqs_trie_node) { .value = -1 }; for (size_t i = 0; i < options->npatterns; ++i) { char const *pattern = options->patterns[i]; struct ctlseqs_trie_node *node = &matcher->root; // We assume that pattern[0] is always ESC. for (size_t j = 1; ; ++j) { int ch = pattern[j]; if (ch == '\0') { node->value = i; if (j > max_format_size) { max_format_size = j; } break; } struct ctlseqs_trie_node *old_node = node; node = node->children[ch]; if (node != NULL) { continue; } if (CTLSEQS_UNLIKELY(++node_pool_idx >= matcher->node_pool_size)) { struct ctlseqs_trie_node *new_pool = realloc(matcher->node_pool, matcher->node_pool_size * 2); if (CTLSEQS_UNLIKELY(new_pool == NULL)) { return CTLSEQS_NOMEM; } matcher->node_pool = new_pool; matcher->node_pool_size *= 2; } old_node->children[ch] = node = &matcher->node_pool[node_pool_idx]; *node = (struct ctlseqs_trie_node) { .value = -1, // Value -1 indicates that there's no match on current node. .placeholder = ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end ? 0 : ch, }; if (node->placeholder == 0) { continue; } if (old_node->value < -1) { // Node with multiple placeholders contains negated offset of the child node // which is the head of the linked list. node->next = old_node->children[-old_node->value]; } old_node->value = -ch; } } size_t stack_size = sizeof(struct ctlseqs_match_ctx) * max_format_size; struct ctlseqs_match_ctx *new_stack = realloc(matcher->match_stack, stack_size); if (CTLSEQS_UNLIKELY(new_stack == NULL)) { return CTLSEQS_NOMEM; } matcher->match_stack = new_stack; return CTLSEQS_OK; } CTLSEQS_COLD void ctlseqs_matcher_free(struct ctlseqs_matcher *matcher) { if (matcher != NULL) { free(matcher->node_pool); free(matcher); } } struct ctlseqs_reader * ctlseqs_reader_init() { struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader)); if (CTLSEQS_LIKELY(reader != NULL)) { *reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN, }; } return reader; } int ctlseqs_reader_config(struct ctlseqs_reader *reader, struct ctlseqs_reader_options const *options) { size_t readlen = options->maxlen; if (reader->readlen != readlen) { char *rbuf; if (readlen < reader->buf_end) { return CTLSEQS_ERROR; } rbuf = realloc(reader->rbuf, readlen); if (rbuf == NULL) { return CTLSEQS_NOMEM; } reader->readlen = readlen; reader->rbuf = rbuf; if (reader->buf_end > reader->readlen) { reader->buf_end = reader->readlen; } } reader->buffer = options->buffer; reader->pollfd.fd = options->fd; reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL; return CTLSEQS_OK; } CTLSEQS_HOT ssize_t ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher, int timeout) { ssize_t result; // Whether we have read more than we could match in the preview call. if (reader->state == ctlseqs_state_none && reader->buf_start != 0) { result = ctlseqs_match(reader, matcher); if (result != CTLSEQS_PARTIAL) { return result; } } if (!reader->no_poll) { result = ctlseqs_poll(&reader->pollfd, timeout); if (result < 0) { return result; } } result = ctlseqs_do_read(reader); if (CTLSEQS_UNLIKELY(result < 0)) { return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL; } return ctlseqs_match(reader, matcher); } void ctlseqs_purge(struct ctlseqs_reader *reader, size_t nbytes) { if (CTLSEQS_UNLIKELY(nbytes == 0)) { return; } reader->buf_start += nbytes; if (reader->buf_start >= reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } reader->last_idx = 0; reader->state = ctlseqs_state_none; } CTLSEQS_COLD void ctlseqs_reader_free(struct ctlseqs_reader *reader) { if (reader != NULL) { free(reader->rbuf); free(reader); } }