ctlseqs/src/ctlseqs.c

503 lines
16 KiB
C
Raw Normal View History

2020-11-17 06:24:43 +00:00
/**
* ctlseqs.c - helper library for terminal control sequences
*
* Copyright (C) 2020 CismonX <admin@cismon.net>
*
* This file is part of the ctlseqs library.
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this library. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif // HAVE_CONFIG_H
#include "ctlseqs.h"
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <poll.h>
#include <unistd.h>
#ifdef HAVE___BUILTIN_EXPECT
# define CTLSEQS_LIKELY(expr) __builtin_expect(!!(expr), 1)
# define CTLSEQS_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
#else
# define CTLSEQS_LIKELY(expr) (expr)
# define CTLSEQS_UNLIKELY(expr) (expr)
#endif // HAVE___BUILTIN_EXPECT
#ifdef HAVE___BUILTIN_UNREACHABLE
# define CTLSEQS_UNREACHABLE() __builtin_unreachable()
#else
# define CTLSEQS_UNREACHABLE()
#endif // HAVE___BUILTIN_UNREACHABLE
#ifdef HAVE_FUNC_ATTRIBUTE_COLD
# define CTLSEQS_COLD __attribute__((cold))
#else
# define CTLSEQS_COLD
#endif // HAVE_FUNC_ATTRIBUTE_COLD
#ifdef HAVE_FUNC_ATTRIBUTE_HOT
# define CTLSEQS_HOT __attribute__((hot))
#else
# define CTLSEQS_HOT
#endif // HAVE_FUNC_ATTRIBUTE_HOT
#ifndef CTLSEQS_TRIE_INIT_SIZE
# define CTLSEQS_TRIE_INIT_SIZE 16
#endif // !CTLSEQS_TRIE_INIT_SIZE
#define CTLSEQS_VALUE_STR(stop_cond) \
for (cnt = 0; ; ++cnt) { \
num = seq_val[cnt]; \
if (stop_cond) { \
break; \
} \
} \
buf[0].num = cnt; \
buf[1].str = seq_val; \
*seq = seq_val + cnt; \
*buf_offset += 2; \
return true
#define CTLSEQS_VALUE_NUM(base) \
errno = 0; \
num = strtoul(seq_val, seq, base); \
if (errno != 0 || seq_val == *seq) { \
return false; \
} \
buf[0].num = num; \
*buf_offset += 1; \
return true
enum ctlseqs_placeholder {
ctlseqs_ph_begin = 0x0e,
ctlseqs_ph_num = ctlseqs_ph_begin,
ctlseqs_ph_nums,
ctlseqs_ph_str,
ctlseqs_ph_cmdstr,
ctlseqs_ph_csi_param,
ctlseqs_ph_csi_intmd,
ctlseqs_ph_hexnum,
ctlseqs_ph_chrstr,
ctlseqs_ph_end,
};
enum ctlseqs_state {
ctlseqs_state_none,
ctlseqs_state_esc,
ctlseqs_state_csi,
ctlseqs_state_csi_intmd,
ctlseqs_state_cmdstr,
ctlseqs_state_ss,
ctlseqs_state_ctlstr,
ctlseqs_state_str_end,
ctlseqs_state_done,
ctlseqs_state_err,
};
struct ctlseqs_trie_node {
ssize_t value;
int placeholder;
struct ctlseqs_trie_node *next;
struct ctlseqs_trie_node *children[128];
};
struct ctlseqs_match_ctx {
ssize_t value;
struct ctlseqs_trie_node const *node;
char *buf;
};
struct ctlseqs_matcher {
struct ctlseqs_trie_node root;
struct ctlseqs_trie_node *node_pool;
size_t node_pool_size;
struct ctlseqs_match_ctx *match_stack;
};
struct ctlseqs_reader {
union ctlseqs_value *buffer;
size_t readlen;
struct pollfd pollfd;
bool no_poll;
bool retain_partial;
char *rbuf;
size_t buf_start;
size_t buf_end;
enum ctlseqs_state state;
};
CTLSEQS_HOT static int
ctlseqs_poll(struct pollfd *pollfd, int timeout)
{
int nevents = poll(pollfd, 1, timeout);
if (nevents == -1) {
return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR;
}
if (nevents == 0) {
return CTLSEQS_TIMEOUT;
}
if (CTLSEQS_LIKELY(pollfd->revents & POLLIN)) {
return CTLSEQS_OK;
}
return pollfd->revents & POLLHUP ? CTLSEQS_EOF : CTLSEQS_ERROR;
}
CTLSEQS_HOT static int
ctlseqs_do_read(struct ctlseqs_reader *reader)
{
ssize_t nbytes = read(reader->pollfd.fd, reader->rbuf + reader->buf_start, reader->readlen - reader->buf_start);
if (CTLSEQS_UNLIKELY(nbytes == -1)) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return CTLSEQS_TIMEOUT;
}
return errno == EINTR ? CTLSEQS_INTR : CTLSEQS_ERROR;
}
if (CTLSEQS_UNLIKELY(nbytes == 0)) {
return CTLSEQS_EOF;
}
reader->buf_end += nbytes;
return CTLSEQS_OK;
}
CTLSEQS_HOT static enum ctlseqs_state
ctlseqs_state_transit(enum ctlseqs_state state, char ch)
{
switch (state) {
case ctlseqs_state_none:
return ch == 0x1b ? ctlseqs_state_esc : ctlseqs_state_err;
case ctlseqs_state_esc:
switch (ch) {
case '[':
return ctlseqs_state_csi;
case 'N':
case 'O':
return ctlseqs_state_ss;
case 'P':
case ']':
case '_':
case '^':
return ctlseqs_state_cmdstr;
case 'X':
return ctlseqs_state_ctlstr;
default:
return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
}
case ctlseqs_state_csi:
if (ch >= '0' && ch <= '?') {
return state;
}
return (ch >= ' ' && ch <= '/') ? ctlseqs_state_csi_intmd : ctlseqs_state_err;
case ctlseqs_state_csi_intmd:
if (ch >= ' ' && ch <= '/') {
return state;
}
return (ch >= '@' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
case ctlseqs_state_cmdstr:
if (ch == 0x1b) {
return ctlseqs_state_str_end;
} else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) {
return ctlseqs_state_err;
} else {
return state;
}
case ctlseqs_state_ss:
return (ch >= ' ' && ch <= '~') ? ctlseqs_state_done : ctlseqs_state_err;
case ctlseqs_state_ctlstr:
return ch == 0x1b ? ctlseqs_state_str_end : state;
case ctlseqs_state_str_end:
return ch == '\\' ? ctlseqs_state_done : ctlseqs_state_err;
default:
CTLSEQS_UNREACHABLE();
return state;
}
}
CTLSEQS_HOT bool
ctlseqs_fetch(char **seq, int type, union ctlseqs_value *buf, size_t *buf_offset)
{
unsigned long cnt, num;
char *seq_val = *seq;
buf += *buf_offset;
switch (type) {
case ctlseqs_ph_num:
CTLSEQS_VALUE_NUM(10);
case ctlseqs_ph_nums:
for (cnt = 1; ; ++cnt) {
num = strtoul(seq_val, seq, 10);
if (errno != 0 || seq_val == *seq) {
return false;
}
buf[cnt].num = num;
if (seq_val[0] == ';') {
++seq_val;
} else {
break;
}
}
buf[0].num = cnt;
*buf_offset += 1;
return true;
case ctlseqs_ph_str:
CTLSEQS_VALUE_STR(num < ' ' || num > '~');
case ctlseqs_ph_cmdstr:
CTLSEQS_VALUE_STR(num < 0x08 || num > '~' || (num > 0x0d && num < ' '));
case ctlseqs_ph_csi_param:
CTLSEQS_VALUE_STR(num < '0' || num > '?');
case ctlseqs_ph_csi_intmd:
CTLSEQS_VALUE_STR(num < ' ' || num > '/');
case ctlseqs_ph_hexnum:
CTLSEQS_VALUE_NUM(16);
case ctlseqs_ph_chrstr:
CTLSEQS_VALUE_STR(num > 0x7f);
default:
CTLSEQS_UNREACHABLE();
return false;
}
}
CTLSEQS_HOT static ssize_t
ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{
static struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root,
.buf = reader->rbuf + reader->buf_start + 1,
};
ssize_t match_stack_top = -1;
size_t buffer_offset = 0;
while (true) {
match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) {
// Match character as-is.
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.buf++[0]];
if (match_ctx.node == NULL) {
break;
}
} else if (match_ctx.value < -1) {
// Match placeholder.
start_match:
old_node = match_ctx.node;
match_ctx.node = match_ctx.node->children[-match_ctx.value];
// If other placeholders exist on the same level, save them
// to the stack for future retry.
if (match_ctx.node->next != NULL) {
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
.value = match_ctx.node->next->placeholder,
.node = old_node,
.buf = match_ctx.buf,
};
}
if (!ctlseqs_fetch(&match_ctx.buf, -match_ctx.value, reader->buffer, &buffer_offset)) {
break;
}
} else {
// Successfully matched.
return match_ctx.value;
}
}
if (match_stack_top >= 0) {
match_ctx = matcher->match_stack[match_stack_top--];
goto start_match;
}
return CTLSEQS_NOMATCH;
}
CTLSEQS_HOT static ssize_t
ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{
ssize_t retval = CTLSEQS_PARTIAL;
char const *buf = reader->rbuf + reader->buf_start;
size_t idx, len = reader->buf_end - reader->buf_start;
enum ctlseqs_state state = reader->state;
for (idx = 0; idx < len; ++idx) {
state = ctlseqs_state_transit(state, buf[idx]);
if (state == ctlseqs_state_err) {
retval = CTLSEQS_NOSEQ;
for (; idx < len; ++idx) {
if (buf[idx] == 0x1b) {
break;
}
}
reader->buffer[0].num = idx;
reader->buffer[1].str = buf;
break;
}
if (state == ctlseqs_state_done) {
retval = ctlseqs_match_pattern(reader, matcher);
break;
}
}
reader->state = state;
reader->buf_start += idx;
if (reader->buf_start == reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
return retval;
}
struct ctlseqs_matcher *
ctlseqs_matcher_init()
{
struct ctlseqs_matcher *matcher = malloc(sizeof(struct ctlseqs_matcher));
if (CTLSEQS_LIKELY(matcher != NULL)) {
*matcher = (struct ctlseqs_matcher) { .node_pool_size = 0 };
}
return matcher;
}
int
ctlseqs_matcher_setopt(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_opts const *options)
{
size_t node_pool_idx = 0, max_format_size = 0;
if (matcher->node_pool == NULL) {
matcher->node_pool_size = sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_INIT_SIZE;
matcher->node_pool = malloc(matcher->node_pool_size);
if (CTLSEQS_UNLIKELY(matcher->node_pool == NULL)) {
return CTLSEQS_NOMEM;
}
}
matcher->root = (struct ctlseqs_trie_node) { .value = -1 };
for (size_t i = 0; i < options->npatterns; ++i) {
char const *pattern = options->patterns[i];
struct ctlseqs_trie_node *node = &matcher->root;
for (size_t j = 1; ; ++j) {
int ch = pattern[j];
if (ch == '\0') {
node->value = i;
if (j > max_format_size) {
max_format_size = j;
}
break;
}
struct ctlseqs_trie_node *old_node = node;
node = node->children[ch];
if (node != NULL) {
continue;
}
if (CTLSEQS_UNLIKELY(++node_pool_idx >= matcher->node_pool_size)) {
struct ctlseqs_trie_node *new_pool = realloc(matcher->node_pool, matcher->node_pool_size * 2);
if (CTLSEQS_UNLIKELY(new_pool == NULL)) {
return CTLSEQS_NOMEM;
}
matcher->node_pool = new_pool;
matcher->node_pool_size *= 2;
}
old_node->children[ch] = node = &matcher->node_pool[node_pool_idx];
*node = (struct ctlseqs_trie_node) { .value = -1 };
if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) {
continue;
}
// Node with multiple placeholders contains negated offset of child
// which is the head of the linked list.
if (old_node->value < -1) {
node->next = old_node->children[-old_node->value];
node->next->value = old_node->value;
}
old_node->value = -ch;
}
}
size_t stack_size = sizeof(struct ctlseqs_match_ctx) * max_format_size;
struct ctlseqs_match_ctx *new_stack = realloc(matcher->match_stack, stack_size);
if (CTLSEQS_UNLIKELY(new_stack == NULL)) {
return CTLSEQS_NOMEM;
}
matcher->match_stack = new_stack;
return CTLSEQS_OK;
}
CTLSEQS_COLD void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher)
{
if (matcher != NULL) {
free(matcher->node_pool);
free(matcher);
}
}
struct ctlseqs_reader *
ctlseqs_reader_init()
{
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
if (CTLSEQS_LIKELY(reader != NULL)) {
*reader = (struct ctlseqs_reader) { 0 };
}
return reader;
}
int
ctlseqs_reader_setopt(struct ctlseqs_reader *reader, struct ctlseqs_reader_opts const *options)
{
reader->buffer = options->buffer;
size_t readlen = options->maxlen;
if (reader->readlen != readlen) {
char *rbuf;
if (reader->rbuf == NULL) {
rbuf = malloc(readlen);
} else {
rbuf = realloc(reader->rbuf, readlen);
}
if (rbuf == NULL) {
return CTLSEQS_NOMEM;
}
reader->readlen = readlen;
reader->rbuf = rbuf;
if (reader->buf_end > reader->readlen) {
reader->buf_end = reader->readlen;
}
}
reader->pollfd = (struct pollfd) { .fd = options->fd, .events = POLLIN };
reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL;
reader->retain_partial = options->flags & CTLSEQS_READER_RETAIN_PARTIAL;
return CTLSEQS_OK;
}
CTLSEQS_HOT ssize_t
ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher, int timeout)
{
if (reader->buf_start != 0) {
ssize_t match_result = ctlseqs_match(reader, matcher);
if (match_result >= 0) {
return match_result;
}
}
if (!reader->no_poll) {
int poll_result = ctlseqs_poll(&reader->pollfd, timeout);
if (poll_result < 0) {
return poll_result;
}
}
int read_result = ctlseqs_do_read(reader);
if (CTLSEQS_UNLIKELY(read_result < 0)) {
return read_result;
}
return ctlseqs_match(reader, matcher);
}
CTLSEQS_COLD void
ctlseqs_reader_free(struct ctlseqs_reader *reader)
{
if (reader != NULL) {
free(reader->rbuf);
free(reader);
}
}