ctlseqs/src/ctlseqs.c

711 lines
21 KiB
C

/**
* ctlseqs.c - helper library for control sequences
*
* Copyright (C) 2020,2021 CismonX <admin@cismon.net>
*
* This file is part of the ctlseqs library.
*
* ctlseqs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ctlseqs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ctlseqs. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif // HAVE_CONFIG_H
#include "ctlseqs.h"
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <poll.h>
#include <unistd.h>
#ifdef HAVE___BUILTIN_EXPECT
# define ctlseqs_likely(expr) __builtin_expect(!!(expr), 1)
# define ctlseqs_unlikely(expr) __builtin_expect(!!(expr), 0)
#else
# define ctlseqs_likely(expr) (expr)
# define ctlseqs_unlikely(expr) (expr)
#endif // HAVE___BUILTIN_EXPECT
#ifdef HAVE___BUILTIN_UNREACHABLE
# define ctlseqs_unreachable() __builtin_unreachable()
#else
# define ctlseqs_unreachable()
#endif // HAVE___BUILTIN_UNREACHABLE
#ifdef HAVE_FUNC_ATTRIBUTE_COLD
# define ctlseqs_cold __attribute__((cold))
#else
# define ctlseqs_cold
#endif // HAVE_FUNC_ATTRIBUTE_COLD
#ifdef HAVE_FUNC_ATTRIBUTE_HOT
# define ctlseqs_hot __attribute__((hot))
#else
# define ctlseqs_hot
#endif // HAVE_FUNC_ATTRIBUTE_HOT
#ifndef CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
# define CTLSEQS_TRIE_NODE_POOL_INIT_SIZE 16
#endif // !CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
#ifndef CTLSEQS_TRIE_NODE_POOL_MAX_NUM
# define CTLSEQS_TRIE_NODE_POOL_MAX_NUM 8
#endif // !CTLSEQS_TRIE_NODE_POOL_MAX_NUM
#define CTLSEQS_VALUE_STR(stop_cond) \
for (cnt = 0; ; ++cnt) { \
num = seq[cnt]; \
if (stop_cond) { \
break; \
} \
} \
buf_val[0].len = cnt; \
buf_val[1].str = seq; \
*buf += 2; \
return seq + cnt
#define CTLSEQS_VALUE_NUM(base) \
errno = 0; \
num = strtoul(seq, &endptr, base); \
if (errno || seq == endptr) { \
return NULL; \
} \
buf_val[0].num = num; \
++*buf; \
return endptr
enum ctlseqs_placeholder {
ctlseqs_ph_begin = 0x0e,
ctlseqs_ph_num = ctlseqs_ph_begin,
ctlseqs_ph_nums,
ctlseqs_ph_str,
ctlseqs_ph_cmdstr,
ctlseqs_ph_csi_param,
ctlseqs_ph_csi_intmd,
ctlseqs_ph_hexnum,
ctlseqs_ph_chrstr,
ctlseqs_ph_end,
};
enum ctlseqs_state {
ctlseqs_state_none,
ctlseqs_state_esc,
ctlseqs_state_csi,
ctlseqs_state_csi_intmd,
ctlseqs_state_cmdstr,
ctlseqs_state_ss,
ctlseqs_state_ctlstr,
ctlseqs_state_str_end,
ctlseqs_state_done,
ctlseqs_state_err,
};
struct ctlseqs_trie_node {
ssize_t value;
ssize_t placeholder;
struct ctlseqs_trie_node *next;
struct ctlseqs_trie_node *children[128];
};
struct ctlseqs_match_args {
char const *seq;
size_t seq_len;
size_t offset;
union ctlseqs_value *result;
size_t result_idx;
enum ctlseqs_state state;
bool save_seq;
};
struct ctlseqs_match_ctx {
ssize_t value;
struct ctlseqs_trie_node const *node;
char const *seq;
union ctlseqs_value *result;
};
struct ctlseqs_matcher {
struct ctlseqs_trie_node root;
struct ctlseqs_trie_node *node_pools[CTLSEQS_TRIE_NODE_POOL_MAX_NUM];
size_t pool_idx;
size_t pool_size;
size_t match_stack_size;
};
struct ctlseqs_reader {
union ctlseqs_value *result;
size_t readlen;
struct pollfd pollfd;
char *rbuf;
size_t buf_start;
size_t buf_end;
size_t last_idx;
enum ctlseqs_state state;
bool no_poll;
bool save_matched;
};
ctlseqs_hot static inline int
ctlseqs_poll(
struct pollfd *pollfd,
int timeout
) {
int nevents = poll(pollfd, 1, timeout);
switch (nevents) {
case 0:
return CTLSEQS_TIMEOUT;
case 1:
if (ctlseqs_likely(pollfd->revents & POLLIN)) {
return CTLSEQS_OK;
} else if (pollfd->revents & POLLHUP) {
return CTLSEQS_EOF;
} else {
return CTLSEQS_ERROR;
}
default:
if (errno == EINTR) {
return CTLSEQS_INTR;
} else {
return CTLSEQS_ERROR;
}
}
}
ctlseqs_hot static inline int
ctlseqs_do_read(
struct ctlseqs_reader *reader
) {
size_t offset = reader->buf_start + reader->last_idx;
ssize_t nbytes = read(
reader->pollfd.fd,
reader->rbuf + offset,
reader->readlen - offset
);
switch (nbytes) {
case -1:
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return CTLSEQS_TIMEOUT;
} else if (errno == EINTR) {
return CTLSEQS_INTR;
} else {
return CTLSEQS_ERROR;
}
case 0:
return CTLSEQS_EOF;
default:
reader->buf_end += nbytes;
return CTLSEQS_OK;
}
}
ctlseqs_hot static enum ctlseqs_state
ctlseqs_state_transition(
enum ctlseqs_state state,
char ch
) {
switch (state) {
case ctlseqs_state_none:
if (ch == 0x1b) {
return ctlseqs_state_esc;
} else {
return ctlseqs_state_err;
}
case ctlseqs_state_esc:
switch (ch) {
case '[':
return ctlseqs_state_csi;
case 'N':
case 'O':
return ctlseqs_state_ss;
case 'P':
case ']':
case '_':
case '^':
return ctlseqs_state_cmdstr;
case 'X':
return ctlseqs_state_ctlstr;
default:
if (ch >= ' ' && ch <= '~') {
return ctlseqs_state_done;
} else {
return ctlseqs_state_err;
}
}
case ctlseqs_state_csi:
if (ch >= '0' && ch <= '?') {
return state;
} else if (ch >= ' ' && ch <= '/') {
return ctlseqs_state_csi_intmd;
} else if (ch >= '@' && ch <= '~') {
return ctlseqs_state_done;
} else {
return ctlseqs_state_err;
}
case ctlseqs_state_csi_intmd:
if (ch >= ' ' && ch <= '/') {
return state;
} else if (ch >= '@' && ch <= '~') {
return ctlseqs_state_done;
} else {
return ctlseqs_state_err;
}
case ctlseqs_state_cmdstr:
if (ch == 0x1b) {
return ctlseqs_state_str_end;
} else if (ch < 0x08 || ch > '~' || (ch > 0x0d && ch < ' ')) {
return ctlseqs_state_err;
} else {
return state;
}
case ctlseqs_state_ss:
if (ch >= ' ' && ch <= '~') {
return ctlseqs_state_done;
} else {
return ctlseqs_state_err;
}
case ctlseqs_state_ctlstr:
if (ch == 0x1b) {
return ctlseqs_state_str_end;
} else {
return state;
}
case ctlseqs_state_str_end:
if (ch == '\\') {
return ctlseqs_state_done;
} else {
return ctlseqs_state_err;
}
default:
ctlseqs_unreachable();
return state;
}
}
ctlseqs_hot static char const *
ctlseqs_fetch_value(
char const *seq,
int type,
union ctlseqs_value **buf
) {
size_t cnt;
unsigned long num;
char *endptr = NULL;
union ctlseqs_value *buf_val = *buf;
switch (type) {
case ctlseqs_ph_num:
CTLSEQS_VALUE_NUM(10);
case ctlseqs_ph_nums:
for (cnt = 1; ; ++cnt) {
errno = 0;
num = strtoul(seq, &endptr, 10);
if (errno || seq == endptr) {
return NULL;
}
buf_val[cnt].num = num;
if (endptr[0] != ';') {
break;
}
seq = endptr + 1;
}
buf_val[0].len = cnt;
*buf += cnt + 1;
return endptr;
case ctlseqs_ph_str:
CTLSEQS_VALUE_STR(num < ' ' || num > '~');
case ctlseqs_ph_cmdstr:
CTLSEQS_VALUE_STR(
num < 0x08 || num > '~' || (num > 0x0d && num < ' '));
case ctlseqs_ph_csi_param:
CTLSEQS_VALUE_STR(num < '0' || num > '?');
case ctlseqs_ph_csi_intmd:
CTLSEQS_VALUE_STR(num < ' ' || num > '/');
case ctlseqs_ph_hexnum:
CTLSEQS_VALUE_NUM(16);
case ctlseqs_ph_chrstr:
CTLSEQS_VALUE_STR(num > 0x7f);
default:
ctlseqs_unreachable();
return NULL;
}
}
ctlseqs_hot static ssize_t
ctlseqs_match_pattern(
struct ctlseqs_matcher const *matcher,
struct ctlseqs_match_args const *args
) {
struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_stack[matcher->match_stack_size];
struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root,
.seq = args->seq + 1,
.result = args->result + (args->save_seq ? 2 : 0),
};
ssize_t match_stack_top = -1;
while (true) {
match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) {
match_character:
match_ctx.node
= match_ctx.node->children[(unsigned)match_ctx.seq++[0]];
if (match_ctx.node == NULL) {
break;
}
} else if (match_ctx.value < -1) {
match_placeholder:
old_node = match_ctx.node;
match_ctx.node = match_ctx.node->children[-match_ctx.value];
struct ctlseqs_trie_node *next_node = match_ctx.node->next;
match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
.value = next_node == NULL ? 0 : -next_node->placeholder,
.node = old_node,
.seq = match_ctx.seq,
.result = match_ctx.result,
};
match_ctx.seq = ctlseqs_fetch_value(
match_ctx.seq,
-match_ctx.value,
&match_ctx.result
);
if (match_ctx.seq == NULL) {
break;
}
} else {
// Successfully matched.
return match_ctx.value;
}
}
if (match_stack_top >= 0) {
match_ctx = match_stack[match_stack_top--];
if (match_ctx.value == 0) {
goto match_character;
} else {
goto match_placeholder;
}
}
return CTLSEQS_NOMATCH;
}
ctlseqs_hot static inline ssize_t
ctlseqs_do_match(
struct ctlseqs_matcher const *matcher,
struct ctlseqs_match_args *args
) {
ssize_t retval = CTLSEQS_PARTIAL;
char const *seq = args->seq;
size_t idx, len = args->seq_len;
enum ctlseqs_state state = args->state;
for (idx = args->offset; idx < len; ++idx) {
state = ctlseqs_state_transition(state, seq[idx]);
if (state == ctlseqs_state_err) {
// Anything before next ESC is definitely not a control sequence.
for (; idx < len; ++idx) {
if (seq[idx] == 0x1b) {
break;
}
}
retval = CTLSEQS_NOSEQ;
break;
}
if (state == ctlseqs_state_done) {
retval = ctlseqs_match_pattern(matcher, args);
++idx;
break;
}
}
if (retval < 0 || args->save_seq) {
args->result[0].len = idx;
args->result[1].str = seq;
}
args->result_idx = idx;
args->state = state;
return retval;
}
ctlseqs_hot static ssize_t
ctlseqs_reader_match(
struct ctlseqs_reader *reader,
struct ctlseqs_matcher const *matcher
) {
struct ctlseqs_match_args args = {
.seq = reader->rbuf + reader->buf_start,
.seq_len = reader->buf_end - reader->buf_start,
.offset = reader->last_idx,
.result = reader->result,
.state = reader->state,
.save_seq = reader->save_matched,
};
ssize_t retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_PARTIAL) {
reader->last_idx = args.result_idx;
if (ctlseqs_unlikely(
reader->buf_start + args.result_idx == reader->readlen
)) {
// Buffer is full but a match is still pending.
// This may happen when the reader's maxlen option is not
// large enough to hold a sequence,
// or when the the sequences are produced faster than consumed.
if (reader->buf_start > reader->readlen / 2) {
memcpy(
reader->rbuf,
reader->rbuf + reader->buf_start,
args.result_idx
);
reader->buf_start = 0;
reader->buf_end = args.result_idx;
} else {
// We could memmove() here, but having a buffer no larger than
// twice the size of a sequence is hardly what a normal program
// would desire.
retval = CTLSEQS_NOMEM;
}
}
} else {
reader->buf_start += args.result_idx;
reader->last_idx = 0;
if (reader->buf_start == reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
}
if (args.state >= ctlseqs_state_done) {
reader->state = ctlseqs_state_none;
} else {
reader->state = args.state;
}
return retval;
}
struct ctlseqs_matcher *
ctlseqs_matcher_init()
{
struct ctlseqs_matcher *mem = malloc(sizeof(struct ctlseqs_matcher) +
sizeof(struct ctlseqs_trie_node) * CTLSEQS_TRIE_NODE_POOL_INIT_SIZE);
if (ctlseqs_likely(mem != NULL)) {
*mem = (struct ctlseqs_matcher) {
.node_pools = { (struct ctlseqs_trie_node *)(mem + 1) },
.pool_size = CTLSEQS_TRIE_NODE_POOL_INIT_SIZE
};
}
return mem;
}
int
ctlseqs_matcher_config(
struct ctlseqs_matcher *matcher,
struct ctlseqs_matcher_options const *options
) {
size_t node_idx = 0, max_format_size = 0;
struct ctlseqs_trie_node *node_pool
= matcher->node_pools[matcher->pool_idx];
matcher->root = (struct ctlseqs_trie_node) { .value = -1 };
for (size_t i = 0; i < options->npatterns; ++i) {
char const *pattern = options->patterns[i];
struct ctlseqs_trie_node *node = &matcher->root;
// We assume that pattern[0] is always ESC.
for (size_t j = 1; ; ++j) {
int ch = pattern[j];
if (ch == '\0') {
node->value = i;
if (j > max_format_size) {
max_format_size = j;
}
break;
}
struct ctlseqs_trie_node *old_node = node;
node = node->children[ch];
if (node != NULL) {
continue;
}
if (ctlseqs_unlikely(++node_idx >= matcher->pool_size)) {
if (ctlseqs_unlikely(
matcher->pool_idx >= CTLSEQS_TRIE_NODE_POOL_MAX_NUM - 1
)) {
return CTLSEQS_NOMEM;
}
node_pool = malloc(
sizeof(struct ctlseqs_trie_node) * matcher->pool_size * 2
);
if (ctlseqs_unlikely(node_pool == NULL)) {
return CTLSEQS_NOMEM;
}
node_idx = 0;
matcher->node_pools[++matcher->pool_idx] = node_pool;
matcher->pool_size *= 2;
}
old_node->children[ch] = node = node_pool + node_idx;
ssize_t placeholder;
if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) {
placeholder = 0;
} else {
placeholder = ch;
}
*node = (struct ctlseqs_trie_node) {
// Value -1 indicates that there's no match on current node.
.value = -1,
.placeholder = placeholder,
};
if (placeholder == 0) {
continue;
}
if (old_node->value < -1) {
// Node with multiple placeholders contains negated offset of
// the child node which is the head of the linked list.
node->next = old_node->children[-old_node->value];
}
old_node->value = -ch;
}
}
matcher->match_stack_size = max_format_size;
return CTLSEQS_OK;
}
ctlseqs_hot ssize_t
ctlseqs_match(
struct ctlseqs_matcher const *matcher,
char const *str,
size_t str_len,
union ctlseqs_value *result
) {
struct ctlseqs_match_args args = {
.seq = str,
.seq_len = str_len,
.result = result,
.save_seq = true,
};
ssize_t retval;
try_match:
retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_NOSEQ) {
size_t result_len = args.result[0].len;
if (result_len < str_len) {
args.seq += result_len;
args.seq_len -= result_len;
goto try_match;
}
}
return retval;
}
ctlseqs_cold void
ctlseqs_matcher_free(
struct ctlseqs_matcher *matcher
) {
if (ctlseqs_likely(matcher == NULL)) {
return;
}
for (size_t idx = 1; idx <= matcher->pool_idx; ++idx) {
free(matcher->node_pools[idx]);
}
free(matcher);
}
struct ctlseqs_reader *
ctlseqs_reader_init()
{
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
if (ctlseqs_likely(reader != NULL)) {
*reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN };
}
return reader;
}
int
ctlseqs_reader_config(
struct ctlseqs_reader *reader,
struct ctlseqs_reader_options const *options
) {
size_t const readlen = options->maxlen;
if (reader->readlen != readlen) {
if (readlen < reader->buf_end) {
return CTLSEQS_ERROR;
}
char *rbuf = realloc(reader->rbuf, readlen);
if (rbuf == NULL) {
return CTLSEQS_NOMEM;
}
reader->readlen = readlen;
reader->rbuf = rbuf;
if (reader->buf_end > reader->readlen) {
reader->buf_end = reader->readlen;
}
}
reader->result = options->result;
reader->pollfd.fd = options->fd;
reader->no_poll = options->flags & CTLSEQS_READER_NO_POLL;
reader->save_matched = options->flags & CTLSEQS_READER_SAVE_MATCHED_SEQS;
return CTLSEQS_OK;
}
ctlseqs_hot ssize_t
ctlseqs_read(
struct ctlseqs_reader *reader,
struct ctlseqs_matcher const *matcher,
int timeout
) {
ssize_t result;
// Whether we have read more than we could match in the preview call.
if (reader->state == ctlseqs_state_none && reader->buf_start != 0) {
result = ctlseqs_reader_match(reader, matcher);
if (result != CTLSEQS_PARTIAL) {
return result;
}
}
if (!reader->no_poll) {
result = ctlseqs_poll(&reader->pollfd, timeout);
if (result < 0) {
return result;
}
}
result = ctlseqs_do_read(reader);
if (ctlseqs_unlikely(result < 0)) {
if (reader->state == ctlseqs_state_none) {
return result;
} else {
return CTLSEQS_PARTIAL;
}
}
return ctlseqs_reader_match(reader, matcher);
}
void
ctlseqs_purge(
struct ctlseqs_reader *reader,
size_t nbytes
) {
if (ctlseqs_unlikely(nbytes == 0)) {
return;
}
reader->buf_start += nbytes;
if (reader->buf_start >= reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
reader->last_idx = 0;
reader->state = ctlseqs_state_none;
}
ctlseqs_cold void
ctlseqs_reader_free(
struct ctlseqs_reader *reader
) {
if (ctlseqs_unlikely(reader == NULL)) {
return;
}
free(reader->rbuf);
free(reader);
}