ctlseqs/src/ctlseqs.c

613 lines
20 KiB
C

/**
* ctlseqs.c - helper library for terminal control sequences
*
* Copyright (C) 2020,2021 CismonX <admin@cismon.net>
*
* This file is part of the ctlseqs library.
*
* ctlseqs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ctlseqs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ctlseqs. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif // HAVE_CONFIG_H
#include "ctlseqs.h"
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <poll.h>
#include <unistd.h>
#ifdef HAVE___BUILTIN_EXPECT
# define CTLSEQS_LIKELY(expr) __builtin_expect(!!(expr), 1)
# define CTLSEQS_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
#else
# define CTLSEQS_LIKELY(expr) (expr)
# define CTLSEQS_UNLIKELY(expr) (expr)
#endif // HAVE___BUILTIN_EXPECT
#ifdef HAVE___BUILTIN_UNREACHABLE
# define CTLSEQS_UNREACHABLE() __builtin_unreachable()
#else
# define CTLSEQS_UNREACHABLE()
#endif // HAVE___BUILTIN_UNREACHABLE
#ifdef HAVE_FUNC_ATTRIBUTE_COLD
# define CTLSEQS_COLD __attribute__((cold))
#else
# define CTLSEQS_COLD
#endif // HAVE_FUNC_ATTRIBUTE_COLD
#ifdef HAVE_FUNC_ATTRIBUTE_HOT
# define CTLSEQS_HOT __attribute__((hot))
#else
# define CTLSEQS_HOT
#endif // HAVE_FUNC_ATTRIBUTE_HOT
#ifndef CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
# define CTLSEQS_TRIE_NODE_POOL_INIT_SIZE 16
#endif // !CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
#ifndef CTLSEQS_TRIE_NODE_POOL_MAX_NUM
# define CTLSEQS_TRIE_NODE_POOL_MAX_NUM 8
#endif // !CTLSEQS_TRIE_NODE_POOL_MAX_NUM
#define CTLSEQS_VALUE_STR(stop_cond) \
for (cnt = 0; ; ++cnt) { \
num = seq[cnt]; \
if (stop_cond) { \
break; \
} \
} \
buf_val[0].len = cnt; \
buf_val[1].str = seq; \
*buf += 2; \
return seq + cnt
#define CTLSEQS_VALUE_NUM(base) \
errno = 0; \
num = strtoul(seq, &endptr, base); \
if (errno || seq == endptr) { \
return NULL; \
} \
buf_val[0].num = num; \
++*buf; \
return endptr
enum ctlseqs_placeholder {
ctlseqs_ph_begin = 0x0e,
ctlseqs_ph_num = ctlseqs_ph_begin,
ctlseqs_ph_nums,
ctlseqs_ph_str,
ctlseqs_ph_cmdstr,
ctlseqs_ph_csi_param,
ctlseqs_ph_csi_intmd,
ctlseqs_ph_hexnum,
ctlseqs_ph_chrstr,
ctlseqs_ph_end,
};
enum ctlseqs_state {
ctlseqs_state_none,
ctlseqs_state_esc,
ctlseqs_state_csi,
ctlseqs_state_csi_intmd,
ctlseqs_state_cmdstr,
ctlseqs_state_ss,
ctlseqs_state_ctlstr,
ctlseqs_state_str_end,
ctlseqs_state_done,
ctlseqs_state_err,
};
struct ctlseqs_trie_node {
ssize_t value;
ssize_t placeholder;
struct ctlseqs_trie_node *next;
struct ctlseqs_trie_node *children[128];
};
struct ctlseqs_match_args {
char const *seq;
size_t seq_len;
size_t offset;
union ctlseqs_value *result;
size_t result_idx;
enum ctlseqs_state state;
bool save_seq;
};
struct ctlseqs_match_ctx {
ssize_t value;
struct ctlseqs_trie_node const *node;
char const *seq;
union ctlseqs_value *result;
};
struct ctlseqs_matcher {
struct ctlseqs_trie_node root;
struct ctlseqs_trie_node *node_pools[CTLSEQS_TRIE_NODE_POOL_MAX_NUM];
size_t pool_idx;
size_t pool_size;
struct ctlseqs_match_ctx *match_stack;
};
struct ctlseqs_reader {
union ctlseqs_value *result;
size_t readlen;
struct pollfd pollfd;
char *rbuf;
size_t buf_start;
size_t buf_end;
size_t last_idx;
enum ctlseqs_state state;
bool no_poll;
bool save_matched;
};
CTLSEQS_HOT static inline int
ctlseqs_poll(struct pollfd *pollfd, int timeout)
{
int nevents = poll(pollfd, 1, timeout);
if (nevents == -1) {
return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR;
}
if (nevents == 0) {
return CTLSEQS_TIMEOUT;
}
if (CTLSEQS_LIKELY(pollfd->revents & POLLIN)) {
return CTLSEQS_OK;
}
return pollfd->revents & POLLHUP ? CTLSEQS_EOF : CTLSEQS_ERROR;
}
CTLSEQS_HOT static inline int
ctlseqs_do_read(struct ctlseqs_reader *reader)
{
size_t offset = reader->buf_start + reader->last_idx;
ssize_t nbytes = read(reader->pollfd.fd, reader->rbuf + offset, reader->readlen - offset);
if (CTLSEQS_UNLIKELY(nbytes == -1)) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return CTLSEQS_TIMEOUT;
}
return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR;
}
if (CTLSEQS_UNLIKELY(nbytes == 0)) {
return CTLSEQS_EOF;
}
reader->buf_end += nbytes;
return CTLSEQS_OK;
}
CTLSEQS_HOT static enum ctlseqs_state
ctlseqs_state_transition(enum ctlseqs_state state, char ch)
{
switch (state) {
case ctlseqs_state_none:
return ch == 0x1b ? ctlseqs_state_esc : ctlseqs_state_err;
case ctlseqs_state_esc:
switch (ch) {
case '[':
return ctlseqs_state_csi;
case 'N':
case 'O':
return ctlseqs_state_ss;
case 'P':
case ']':
case '_':
case '^':
return ctlseqs_state_cmdstr;
case 'X':
return ctlseqs_state_ctlstr;
default:
return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
}
case ctlseqs_state_csi:
if (ch >= '0' && ch <= '?') {
return state;
} else if (ch >= ' ' && ch <= '/') {
return ctlseqs_state_csi_intmd;
}
return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
case ctlseqs_state_csi_intmd:
if (ch >= ' ' && ch <= '/') {
return state;
}
return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
case ctlseqs_state_cmdstr:
if (ch == 0x1b) {
return ctlseqs_state_str_end;
} else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) {
return ctlseqs_state_err;
}
return state;
case ctlseqs_state_ss:
return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
case ctlseqs_state_ctlstr:
return ch == 0x1b ? ctlseqs_state_str_end : state;
case ctlseqs_state_str_end:
return ch == '\\' ? ctlseqs_state_done : ctlseqs_state_err;
default:
CTLSEQS_UNREACHABLE();
return state;
}
}
CTLSEQS_HOT static char const *
ctlseqs_fetch_value(char const *seq, int type, union ctlseqs_value **buf)
{
size_t cnt;
unsigned long num;
char *endptr = NULL;
union ctlseqs_value *buf_val = *buf;
switch (type) {
case ctlseqs_ph_num:
CTLSEQS_VALUE_NUM(10);
case ctlseqs_ph_nums:
for (cnt = 1; ; ++cnt) {
errno = 0;
num = strtoul(seq, &endptr, 10);
if (errno || seq == endptr) {
return NULL;
}
buf_val[cnt].num = num;
if (endptr[0] != ';') {
break;
}
seq = endptr + 1;
}
buf_val[0].len = cnt;
*buf += cnt + 1;
return endptr;
case ctlseqs_ph_str:
CTLSEQS_VALUE_STR(num < ' ' || num > '~');
case ctlseqs_ph_cmdstr:
CTLSEQS_VALUE_STR(num < 0x08 || num > '~' || (num > 0x0d && num < ' '));
case ctlseqs_ph_csi_param:
CTLSEQS_VALUE_STR(num < '0' || num > '?');
case ctlseqs_ph_csi_intmd:
CTLSEQS_VALUE_STR(num < ' ' || num > '/');
case ctlseqs_ph_hexnum:
CTLSEQS_VALUE_NUM(16);
case ctlseqs_ph_chrstr:
CTLSEQS_VALUE_STR(num > 0x7f);
default:
CTLSEQS_UNREACHABLE();
return NULL;
}
}
CTLSEQS_HOT static ssize_t
ctlseqs_match_pattern(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args)
{
struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root,
.seq = args->seq + 1,
.result = args->result + (args->save_seq ? 2 : 0),
};
ssize_t match_stack_top = -1;
while (true) {
match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) {
match_character:
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]];
if (match_ctx.node == NULL) {
break;
}
} else if (match_ctx.value < -1) {
match_placeholder:
old_node = match_ctx.node;
match_ctx.node = match_ctx.node->children[-match_ctx.value];
struct ctlseqs_trie_node *next_node = match_ctx.node->next;
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
.value = next_node == NULL ? 0 : -next_node->placeholder,
.node = old_node,
.seq = match_ctx.seq,
.result = match_ctx.result,
};
match_ctx.seq = ctlseqs_fetch_value(match_ctx.seq, -match_ctx.value, &match_ctx.result);
if (match_ctx.seq == NULL) {
break;
}
} else {
// Successfully matched.
return match_ctx.value;
}
}
if (match_stack_top >= 0) {
match_ctx = matcher->match_stack[match_stack_top--];
if (match_ctx.value == 0) {
goto match_character;
} else {
goto match_placeholder;
}
}
return CTLSEQS_NOMATCH;
}
CTLSEQS_HOT static inline ssize_t
ctlseqs_do_match(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args)
{
ssize_t retval = CTLSEQS_PARTIAL;
char const *seq = args->seq;
size_t idx, len = args->seq_len;
enum ctlseqs_state state = args->state;
for (idx = args->offset; idx < len; ++idx) {
state = ctlseqs_state_transition(state, seq[idx]);
if (state == ctlseqs_state_err) {
// Anything before next ESC is definitely not a control sequence.
for (; idx < len; ++idx) {
if (seq[idx] == 0x1b) {
break;
}
}
retval = CTLSEQS_NOSEQ;
break;
}
if (state == ctlseqs_state_done) {
retval = ctlseqs_match_pattern(matcher, args);
++idx;
break;
}
}
if (retval < 0 || args->save_seq) {
args->result[0].len = idx;
args->result[1].str = seq;
}
args->result_idx = idx;
args->state = state;
return retval;
}
CTLSEQS_HOT static ssize_t
ctlseqs_reader_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{
struct ctlseqs_match_args args = {
.seq = reader->rbuf + reader->buf_start,
.seq_len = reader->buf_end - reader->buf_start,
.offset = reader->last_idx,
.result = reader->result,
.state = reader->state,
.save_seq = reader->save_matched,
};
ssize_t retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_PARTIAL) {
reader->last_idx = args.result_idx;
if (CTLSEQS_UNLIKELY(reader->buf_start + args.result_idx == reader->readlen)) {
// Buffer is full but a match is still pending.
// This may happen when the reader's maxlen option is not large enough to hold a sequence,
// or when the the sequences are produced faster than consumed.
if (reader->buf_start > reader->readlen / 2) {
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx);
reader->buf_start = 0;
reader->buf_end = args.result_idx;
} else {
// We could memmove() here, but having a buffer no larger than twice the size of a sequence
// is hardly what a normal program would desire.
retval = CTLSEQS_NOMEM;
}
}
} else {
reader->buf_start += args.result_idx;
reader->last_idx = 0;
if (reader->buf_start == reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
}
reader->state = args.state >= ctlseqs_state_done ? ctlseqs_state_none : args.state;
return retval;
}
struct ctlseqs_matcher *
ctlseqs_matcher_init()
{
struct ctlseqs_trie_node *pool = malloc(sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_NODE_POOL_INIT_SIZE);
struct ctlseqs_matcher *matcher = malloc(sizeof(struct ctlseqs_matcher));
if (CTLSEQS_UNLIKELY(pool == NULL || matcher == NULL)) {
free(pool);
free(matcher);
return NULL;
}
*matcher = (struct ctlseqs_matcher) {
.node_pools = { pool },
.pool_size = CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
};
return matcher;
}
int
ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_options const *options)
{
size_t node_idx = 0, max_format_size = 0;
struct ctlseqs_trie_node *node_pool = matcher->node_pools[matcher->pool_idx];
matcher->root = (struct ctlseqs_trie_node) { .value = -1 };
for (size_t i = 0; i < options->npatterns; ++i) {
char const *pattern = options->patterns[i];
struct ctlseqs_trie_node *node = &matcher->root;
// We assume that pattern[0] is always ESC.
for (size_t j = 1; ; ++j) {
int ch = pattern[j];
if (ch == '\0') {
node->value = i;
if (j > max_format_size) {
max_format_size = j;
}
break;
}
struct ctlseqs_trie_node *old_node = node;
node = node->children[ch];
if (node != NULL) {
continue;
}
if (CTLSEQS_UNLIKELY(++node_idx >= matcher->pool_size)) {
if (CTLSEQS_UNLIKELY(++matcher->pool_idx >= CTLSEQS_TRIE_NODE_POOL_MAX_NUM)) {
return CTLSEQS_NOMEM;
}
node_pool = malloc(sizeof(struct ctlseqs_trie_node) * matcher->pool_size * 2);
if (CTLSEQS_UNLIKELY(node_pool == NULL)) {
return CTLSEQS_NOMEM;
}
node_idx = 0;
matcher->node_pools[matcher->pool_idx] = node_pool;
matcher->pool_size *= 2;
}
old_node->children[ch] = node = node_pool + node_idx;
*node = (struct ctlseqs_trie_node) {
.value = -1, // Value -1 indicates that there's no match on current node.
.placeholder = ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end ? 0 : ch,
};
if (node->placeholder == 0) {
continue;
}
if (old_node->value < -1) {
// Node with multiple placeholders contains negated offset of the child node
// which is the head of the linked list.
node->next = old_node->children[-old_node->value];
}
old_node->value = -ch;
}
}
size_t stack_size = sizeof(struct ctlseqs_match_ctx) * max_format_size;
struct ctlseqs_match_ctx *new_stack = realloc(matcher->match_stack, stack_size);
if (CTLSEQS_UNLIKELY(new_stack == NULL)) {
return CTLSEQS_NOMEM;
}
matcher->match_stack = new_stack;
return CTLSEQS_OK;
}
CTLSEQS_HOT ssize_t
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *str, size_t str_len, union ctlseqs_value *result)
{
struct ctlseqs_match_args args = {
.seq = str,
.seq_len = str_len,
.result = result,
.save_seq = true,
};
ssize_t retval;
try_match:
retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_NOSEQ) {
size_t result_len = args.result[0].len;
if (result_len < str_len) {
args.seq += result_len;
args.seq_len -= result_len;
goto try_match;
}
}
return retval;
}
CTLSEQS_COLD void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher)
{
if (matcher != NULL) {
for (size_t idx = 0; idx <= matcher->pool_idx; ++idx) {
free(matcher->node_pools[idx]);
}
free(matcher->match_stack);
free(matcher);
}
}
struct ctlseqs_reader *
ctlseqs_reader_init()
{
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
if (CTLSEQS_LIKELY(reader != NULL)) {
*reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN };
}
return reader;
}
int
ctlseqs_reader_config(struct ctlseqs_reader *reader, struct ctlseqs_reader_options const *options)
{
size_t readlen = options->maxlen;
if (reader->readlen != readlen) {
char *rbuf;
if (readlen < reader->buf_end) {
return CTLSEQS_ERROR;
}
rbuf = realloc(reader->rbuf, readlen);
if (rbuf == NULL) {
return CTLSEQS_NOMEM;
}
reader->readlen = readlen;
reader->rbuf = rbuf;
if (reader->buf_end > reader->readlen) {
reader->buf_end = reader->readlen;
}
}
reader->result = options->result;
reader->pollfd.fd = options->fd;
reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL;
reader->save_matched = options->flags & CTLSEQS_READER_SAVE_MATCHED_SEQS;
return CTLSEQS_OK;
}
CTLSEQS_HOT ssize_t
ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher, int timeout)
{
ssize_t result;
// Whether we have read more than we could match in the preview call.
if (reader->state == ctlseqs_state_none && reader->buf_start != 0) {
result = ctlseqs_reader_match(reader, matcher);
if (result != CTLSEQS_PARTIAL) {
return result;
}
}
if (!reader->no_poll) {
result = ctlseqs_poll(&reader->pollfd, timeout);
if (result < 0) {
return result;
}
}
result = ctlseqs_do_read(reader);
if (CTLSEQS_UNLIKELY(result < 0)) {
return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL;
}
return ctlseqs_reader_match(reader, matcher);
}
void
ctlseqs_purge(struct ctlseqs_reader *reader, size_t nbytes)
{
if (CTLSEQS_UNLIKELY(nbytes == 0)) {
return;
}
reader->buf_start += nbytes;
if (reader->buf_start >= reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
reader->last_idx = 0;
reader->state = ctlseqs_state_none;
}
CTLSEQS_COLD void
ctlseqs_reader_free(struct ctlseqs_reader *reader)
{
if (reader != NULL) {
free(reader->rbuf);
free(reader);
}
}