/** * ctlseqs.c - helper library for control sequences * * Copyright (C) 2020,2021 CismonX * * This file is part of the ctlseqs library. * * ctlseqs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ctlseqs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ctlseqs. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif // HAVE_CONFIG_H #include "ctlseqs.h" #include #include #include #include #include #include #include #ifdef HAVE___BUILTIN_EXPECT # define ctlseqs_likely(expr) __builtin_expect(!!(expr), 1) # define ctlseqs_unlikely(expr) __builtin_expect(!!(expr), 0) #else # define ctlseqs_likely(expr) (expr) # define ctlseqs_unlikely(expr) (expr) #endif // HAVE___BUILTIN_EXPECT #ifdef HAVE___BUILTIN_UNREACHABLE # define ctlseqs_unreachable() __builtin_unreachable() #else # define ctlseqs_unreachable() #endif // HAVE___BUILTIN_UNREACHABLE #ifdef HAVE_FUNC_ATTRIBUTE_COLD # define ctlseqs_cold __attribute__((cold)) #else # define ctlseqs_cold #endif // HAVE_FUNC_ATTRIBUTE_COLD #ifdef HAVE_FUNC_ATTRIBUTE_HOT # define ctlseqs_hot __attribute__((hot)) #else # define ctlseqs_hot #endif // HAVE_FUNC_ATTRIBUTE_HOT #ifndef CTLSEQS_TRIE_NODE_POOL_INIT_SIZE # define CTLSEQS_TRIE_NODE_POOL_INIT_SIZE 16 #endif // !CTLSEQS_TRIE_NODE_POOL_INIT_SIZE #ifndef CTLSEQS_TRIE_NODE_POOL_MAX_NUM # define CTLSEQS_TRIE_NODE_POOL_MAX_NUM 8 #endif // !CTLSEQS_TRIE_NODE_POOL_MAX_NUM #define CTLSEQS_VALUE_STR(stop_cond) \ for (cnt = 0; ; ++cnt) { \ num = seq[cnt]; \ if (stop_cond) { \ break; \ } \ } \ buf_val[0].len = cnt; \ buf_val[1].str = seq; \ *buf += 2; \ return seq + cnt #define CTLSEQS_VALUE_NUM(base) \ errno = 0; \ num = strtoul(seq, &endptr, base); \ if (errno || seq == endptr) { \ return NULL; \ } \ buf_val[0].num = num; \ ++*buf; \ return endptr enum ctlseqs_placeholder { ctlseqs_ph_begin = 0x0e, ctlseqs_ph_num = ctlseqs_ph_begin, ctlseqs_ph_nums, ctlseqs_ph_str, ctlseqs_ph_cmdstr, ctlseqs_ph_csi_param, ctlseqs_ph_csi_intmd, ctlseqs_ph_hexnum, ctlseqs_ph_chrstr, ctlseqs_ph_end, }; enum ctlseqs_state { ctlseqs_state_none, ctlseqs_state_esc, ctlseqs_state_csi, ctlseqs_state_csi_intmd, ctlseqs_state_cmdstr, ctlseqs_state_ss, ctlseqs_state_ctlstr, ctlseqs_state_str_end, ctlseqs_state_done, ctlseqs_state_err, }; struct ctlseqs_trie_node { ssize_t value; ssize_t placeholder; struct ctlseqs_trie_node *next; struct ctlseqs_trie_node *children[128]; }; struct ctlseqs_match_args { char const *seq; size_t seq_len; size_t offset; union ctlseqs_value *result; size_t result_idx; enum ctlseqs_state state; bool save_seq; }; struct ctlseqs_match_ctx { ssize_t value; struct ctlseqs_trie_node const *node; char const *seq; union ctlseqs_value *result; }; struct ctlseqs_matcher { struct ctlseqs_trie_node root; struct ctlseqs_trie_node *node_pools[CTLSEQS_TRIE_NODE_POOL_MAX_NUM]; size_t pool_idx; size_t pool_size; size_t match_stack_size; }; struct ctlseqs_reader { union ctlseqs_value *result; size_t readlen; struct pollfd pollfd; char *rbuf; size_t buf_start; size_t buf_end; size_t last_idx; enum ctlseqs_state state; bool no_poll; bool save_matched; }; ctlseqs_hot static inline int ctlseqs_poll( struct pollfd *pollfd, int timeout ) { int nevents = poll(pollfd, 1, timeout); switch (nevents) { case 0: return CTLSEQS_TIMEOUT; case 1: if (ctlseqs_likely(pollfd->revents & POLLIN)) { return CTLSEQS_OK; } else if (pollfd->revents & POLLHUP) { return CTLSEQS_EOF; } else { return CTLSEQS_ERROR; } default: if (errno == EINTR) { return CTLSEQS_INTR; } else { return CTLSEQS_ERROR; } } } ctlseqs_hot static inline int ctlseqs_do_read( struct ctlseqs_reader *reader ) { size_t offset = reader->buf_start + reader->last_idx; ssize_t nbytes = read( reader->pollfd.fd, reader->rbuf + offset, reader->readlen - offset ); switch (nbytes) { case -1: if (errno == EAGAIN || errno == EWOULDBLOCK) { return CTLSEQS_TIMEOUT; } else if (errno == EINTR) { return CTLSEQS_INTR; } else { return CTLSEQS_ERROR; } case 0: return CTLSEQS_EOF; default: reader->buf_end += nbytes; return CTLSEQS_OK; } } ctlseqs_hot static enum ctlseqs_state ctlseqs_state_transition( enum ctlseqs_state state, char ch ) { switch (state) { case ctlseqs_state_none: if (ch == 0x1b) { return ctlseqs_state_esc; } else { return ctlseqs_state_err; } case ctlseqs_state_esc: switch (ch) { case '[': return ctlseqs_state_csi; case 'N': case 'O': return ctlseqs_state_ss; case 'P': case ']': case '_': case '^': return ctlseqs_state_cmdstr; case 'X': return ctlseqs_state_ctlstr; default: if (ch >= ' ' && ch <= '~') { return ctlseqs_state_done; } else { return ctlseqs_state_err; } } case ctlseqs_state_csi: if (ch >= '0' && ch <= '?') { return state; } else if (ch >= ' ' && ch <= '/') { return ctlseqs_state_csi_intmd; } else if (ch >= '@' && ch <= '~') { return ctlseqs_state_done; } else { return ctlseqs_state_err; } case ctlseqs_state_csi_intmd: if (ch >= ' ' && ch <= '/') { return state; } else if (ch >= '@' && ch <= '~') { return ctlseqs_state_done; } else { return ctlseqs_state_err; } case ctlseqs_state_cmdstr: if (ch == 0x1b) { return ctlseqs_state_str_end; } else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) { return ctlseqs_state_err; } else { return state; } case ctlseqs_state_ss: if (ch >= ' ' && ch <= '~') { return ctlseqs_state_done; } else { return ctlseqs_state_err; } case ctlseqs_state_ctlstr: if (ch == 0x1b) { return ctlseqs_state_str_end; } else { return state; } case ctlseqs_state_str_end: if (ch == '\\') { return ctlseqs_state_done; } else { return ctlseqs_state_err; } default: ctlseqs_unreachable(); return state; } } ctlseqs_hot static char const * ctlseqs_fetch_value( char const *seq, int type, union ctlseqs_value **buf ) { size_t cnt; unsigned long num; char *endptr = NULL; union ctlseqs_value *buf_val = *buf; switch (type) { case ctlseqs_ph_num: CTLSEQS_VALUE_NUM(10); case ctlseqs_ph_nums: for (cnt = 1; ; ++cnt) { errno = 0; num = strtoul(seq, &endptr, 10); if (errno || seq == endptr) { return NULL; } buf_val[cnt].num = num; if (endptr[0] != ';') { break; } seq = endptr + 1; } buf_val[0].len = cnt; *buf += cnt + 1; return endptr; case ctlseqs_ph_str: CTLSEQS_VALUE_STR(num < ' ' || num > '~'); case ctlseqs_ph_cmdstr: CTLSEQS_VALUE_STR( num < 0x08 || num > '~' || (num > 0x0d && num < ' ')); case ctlseqs_ph_csi_param: CTLSEQS_VALUE_STR(num < '0' || num > '?'); case ctlseqs_ph_csi_intmd: CTLSEQS_VALUE_STR(num < ' ' || num > '/'); case ctlseqs_ph_hexnum: CTLSEQS_VALUE_NUM(16); case ctlseqs_ph_chrstr: CTLSEQS_VALUE_STR(num > 0x7f); default: ctlseqs_unreachable(); return NULL; } } ctlseqs_hot static ssize_t ctlseqs_match_pattern( struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args ) { struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_match_ctx match_stack[matcher->match_stack_size]; struct ctlseqs_match_ctx match_ctx = { .node = matcher == NULL ? &empty_node : &matcher->root, .seq = args->seq + 1, .result = args->result + (args->save_seq ? 2 : 0), }; ssize_t match_stack_top = -1; while (true) { match_ctx.value = match_ctx.node->value; if (match_ctx.value == -1) { match_character: match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]]; if (match_ctx.node == NULL) { break; } } else if (match_ctx.value < -1) { match_placeholder: old_node = match_ctx.node; match_ctx.node = match_ctx.node->children[-match_ctx.value]; struct ctlseqs_trie_node *next_node = match_ctx.node->next; match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { .value = next_node == NULL ? 0 : -next_node->placeholder, .node = old_node, .seq = match_ctx.seq, .result = match_ctx.result, }; match_ctx.seq = ctlseqs_fetch_value( match_ctx.seq, -match_ctx.value, &match_ctx.result ); if (match_ctx.seq == NULL) { break; } } else { // Successfully matched. return match_ctx.value; } } if (match_stack_top >= 0) { match_ctx = match_stack[match_stack_top--]; if (match_ctx.value == 0) { goto match_character; } else { goto match_placeholder; } } return CTLSEQS_NOMATCH; } ctlseqs_hot static inline ssize_t ctlseqs_do_match( struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args ) { ssize_t retval = CTLSEQS_PARTIAL; char const *seq = args->seq; size_t idx, len = args->seq_len; enum ctlseqs_state state = args->state; for (idx = args->offset; idx < len; ++idx) { state = ctlseqs_state_transition(state, seq[idx]); if (state == ctlseqs_state_err) { // Anything before next ESC is definitely not a control sequence. for (; idx < len; ++idx) { if (seq[idx] == 0x1b) { break; } } retval = CTLSEQS_NOSEQ; break; } if (state == ctlseqs_state_done) { retval = ctlseqs_match_pattern(matcher, args); ++idx; break; } } if (retval < 0 || args->save_seq) { args->result[0].len = idx; args->result[1].str = seq; } args->result_idx = idx; args->state = state; return retval; } ctlseqs_hot static ssize_t ctlseqs_reader_match( struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher ) { struct ctlseqs_match_args args = { .seq = reader->rbuf + reader->buf_start, .seq_len = reader->buf_end - reader->buf_start, .offset = reader->last_idx, .result = reader->result, .state = reader->state, .save_seq = reader->save_matched, }; ssize_t retval = ctlseqs_do_match(matcher, &args); if (retval == CTLSEQS_PARTIAL) { reader->last_idx = args.result_idx; if (ctlseqs_unlikely( reader->buf_start + args.result_idx == reader->readlen )) { // Buffer is full but a match is still pending. // This may happen when the reader's maxlen option is not // large enough to hold a sequence, // or when the the sequences are produced faster than consumed. if (reader->buf_start > reader->readlen / 2) { memcpy( reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx ); reader->buf_start = 0; reader->buf_end = args.result_idx; } else { // We could memmove() here, but having a buffer no larger than // twice the size of a sequence is hardly what a normal program // would desire. retval = CTLSEQS_NOMEM; } } } else { reader->buf_start += args.result_idx; reader->last_idx = 0; if (reader->buf_start == reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } } if (args.state >= ctlseqs_state_done) { reader->state = ctlseqs_state_none; } else { reader->state = args.state; } return retval; } struct ctlseqs_matcher * ctlseqs_matcher_init() { struct ctlseqs_matcher *mem = malloc(sizeof(struct ctlseqs_matcher) + sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_NODE_POOL_INIT_SIZE); if (ctlseqs_likely(mem != NULL)) { *mem = (struct ctlseqs_matcher) { .node_pools = { (struct ctlseqs_trie_node *)(mem + 1) }, .pool_size = CTLSEQS_TRIE_NODE_POOL_INIT_SIZE }; } return mem; } int ctlseqs_matcher_config( struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_options const *options ) { size_t node_idx = 0, max_format_size = 0; struct ctlseqs_trie_node *node_pool = matcher->node_pools[matcher->pool_idx]; matcher->root = (struct ctlseqs_trie_node) { .value = -1 }; for (size_t i = 0; i < options->npatterns; ++i) { char const *pattern = options->patterns[i]; struct ctlseqs_trie_node *node = &matcher->root; // We assume that pattern[0] is always ESC. for (size_t j = 1; ; ++j) { int ch = pattern[j]; if (ch == '\0') { node->value = i; if (j > max_format_size) { max_format_size = j; } break; } struct ctlseqs_trie_node *old_node = node; node = node->children[ch]; if (node != NULL) { continue; } if (ctlseqs_unlikely(++node_idx >= matcher->pool_size)) { if (ctlseqs_unlikely( matcher->pool_idx >= CTLSEQS_TRIE_NODE_POOL_MAX_NUM - 1 )) { return CTLSEQS_NOMEM; } node_pool = malloc( sizeof(struct ctlseqs_trie_node) * matcher->pool_size * 2 ); if (ctlseqs_unlikely(node_pool == NULL)) { return CTLSEQS_NOMEM; } node_idx = 0; matcher->node_pools[++matcher->pool_idx] = node_pool; matcher->pool_size *= 2; } old_node->children[ch] = node = node_pool + node_idx; ssize_t placeholder; if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) { placeholder = 0; } else { placeholder = ch; } *node = (struct ctlseqs_trie_node) { // Value -1 indicates that there's no match on current node. .value = -1, .placeholder = placeholder, }; if (placeholder == 0) { continue; } if (old_node->value < -1) { // Node with multiple placeholders contains negated offset of // the child node which is the head of the linked list. node->next = old_node->children[-old_node->value]; } old_node->value = -ch; } } matcher->match_stack_size = max_format_size; return CTLSEQS_OK; } ctlseqs_hot ssize_t ctlseqs_match( struct ctlseqs_matcher const *matcher, char const *str, size_t str_len, union ctlseqs_value *result ) { struct ctlseqs_match_args args = { .seq = str, .seq_len = str_len, .result = result, .save_seq = true, }; ssize_t retval; try_match: retval = ctlseqs_do_match(matcher, &args); if (retval == CTLSEQS_NOSEQ) { size_t result_len = args.result[0].len; if (result_len < str_len) { args.seq += result_len; args.seq_len -= result_len; goto try_match; } } return retval; } ctlseqs_cold void ctlseqs_matcher_free( struct ctlseqs_matcher *matcher ) { if (ctlseqs_likely(matcher == NULL)) { return; } for (size_t idx = 1; idx <= matcher->pool_idx; ++idx) { free(matcher->node_pools[idx]); } free(matcher); } struct ctlseqs_reader * ctlseqs_reader_init() { struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader)); if (ctlseqs_likely(reader != NULL)) { *reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN }; } return reader; } int ctlseqs_reader_config( struct ctlseqs_reader *reader, struct ctlseqs_reader_options const *options ) { size_t const readlen = options->maxlen; if (reader->readlen != readlen) { if (readlen < reader->buf_end) { return CTLSEQS_ERROR; } char *rbuf = realloc(reader->rbuf, readlen); if (rbuf == NULL) { return CTLSEQS_NOMEM; } reader->readlen = readlen; reader->rbuf = rbuf; if (reader->buf_end > reader->readlen) { reader->buf_end = reader->readlen; } } reader->result = options->result; reader->pollfd.fd = options->fd; reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL; reader->save_matched = options->flags & CTLSEQS_READER_SAVE_MATCHED_SEQS; return CTLSEQS_OK; } ctlseqs_hot ssize_t ctlseqs_read( struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher, int timeout ) { ssize_t result; // Whether we have read more than we could match in the preview call. if (reader->state == ctlseqs_state_none && reader->buf_start != 0) { result = ctlseqs_reader_match(reader, matcher); if (result != CTLSEQS_PARTIAL) { return result; } } if (!reader->no_poll) { result = ctlseqs_poll(&reader->pollfd, timeout); if (result < 0) { return result; } } result = ctlseqs_do_read(reader); if (ctlseqs_unlikely(result < 0)) { if (reader->state == ctlseqs_state_none) { return result; } else { return CTLSEQS_PARTIAL; } } return ctlseqs_reader_match(reader, matcher); } void ctlseqs_purge( struct ctlseqs_reader *reader, size_t nbytes ) { if (ctlseqs_unlikely(nbytes == 0)) { return; } reader->buf_start += nbytes; if (reader->buf_start >= reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } reader->last_idx = 0; reader->state = ctlseqs_state_none; } ctlseqs_cold void ctlseqs_reader_free( struct ctlseqs_reader *reader ) { if (ctlseqs_unlikely(reader == NULL)) { return; } free(reader->rbuf); free(reader); }