Add `ctlseqs_match()`.
This commit is contained in:
parent
b19ef960a0
commit
5bf70d7b77
133
src/ctlseqs.c
133
src/ctlseqs.c
|
@ -69,25 +69,24 @@
|
|||
|
||||
#define CTLSEQS_VALUE_STR(stop_cond) \
|
||||
for (cnt = 0; ; ++cnt) { \
|
||||
num = seq_val[cnt]; \
|
||||
num = seq[cnt]; \
|
||||
if (stop_cond) { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
buf_val[0].num = cnt; \
|
||||
buf_val[1].str = seq_val; \
|
||||
*seq = seq_val + cnt; \
|
||||
buf_val[1].str = seq; \
|
||||
*buf += 2; \
|
||||
return true
|
||||
return seq + cnt
|
||||
#define CTLSEQS_VALUE_NUM(base) \
|
||||
errno = 0; \
|
||||
num = strtoul(seq_val, seq, base); \
|
||||
if (errno || seq_val == *seq) { \
|
||||
return false; \
|
||||
num = strtoul(seq, &endptr, base); \
|
||||
if (errno || seq == endptr) { \
|
||||
return NULL; \
|
||||
} \
|
||||
buf_val[0].num = num; \
|
||||
++*buf; \
|
||||
return true
|
||||
return endptr
|
||||
|
||||
enum ctlseqs_placeholder {
|
||||
ctlseqs_ph_begin = 0x0e,
|
||||
|
@ -122,10 +121,20 @@ struct ctlseqs_trie_node {
|
|||
struct ctlseqs_trie_node *children[128];
|
||||
};
|
||||
|
||||
struct ctlseqs_match_args {
|
||||
char const *seq;
|
||||
size_t seq_len;
|
||||
size_t offset;
|
||||
union ctlseqs_value *result;
|
||||
size_t result_idx;
|
||||
enum ctlseqs_state state;
|
||||
bool save_seq;
|
||||
};
|
||||
|
||||
struct ctlseqs_match_ctx {
|
||||
ssize_t value;
|
||||
struct ctlseqs_trie_node const *node;
|
||||
char *rbuf;
|
||||
char const *seq;
|
||||
union ctlseqs_value *result;
|
||||
};
|
||||
|
||||
|
@ -238,11 +247,11 @@ ctlseqs_state_transition(enum ctlseqs_state state, char ch)
|
|||
}
|
||||
}
|
||||
|
||||
CTLSEQS_HOT static bool
|
||||
ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
|
||||
CTLSEQS_HOT static char const *
|
||||
ctlseqs_fetch_value(char const *seq, int type, union ctlseqs_value **buf)
|
||||
{
|
||||
unsigned long cnt, num;
|
||||
char *seq_val = *seq;
|
||||
char *endptr = NULL;
|
||||
union ctlseqs_value *buf_val = *buf;
|
||||
switch (type) {
|
||||
case ctlseqs_ph_num:
|
||||
|
@ -250,19 +259,19 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
|
|||
case ctlseqs_ph_nums:
|
||||
for (cnt = 1; ; ++cnt) {
|
||||
errno = 0;
|
||||
num = strtoul(seq_val, seq, 10);
|
||||
if (errno || seq_val == *seq) {
|
||||
return false;
|
||||
num = strtoul(seq, &endptr, 10);
|
||||
if (errno || seq == endptr) {
|
||||
return NULL;
|
||||
}
|
||||
buf_val[cnt].num = num;
|
||||
if ((*seq)[0] != ';') {
|
||||
if (endptr[0] != ';') {
|
||||
break;
|
||||
}
|
||||
seq_val = *seq + 1;
|
||||
seq = endptr + 1;
|
||||
}
|
||||
buf_val[0].num = cnt;
|
||||
*buf += cnt + 1;
|
||||
return true;
|
||||
return endptr;
|
||||
case ctlseqs_ph_str:
|
||||
CTLSEQS_VALUE_STR(num < ' ' || num > '~');
|
||||
case ctlseqs_ph_cmdstr:
|
||||
|
@ -277,25 +286,25 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
|
|||
CTLSEQS_VALUE_STR(num > 0x7f);
|
||||
default:
|
||||
CTLSEQS_UNREACHABLE();
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
CTLSEQS_HOT static ssize_t
|
||||
ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
|
||||
ctlseqs_match_pattern(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args)
|
||||
{
|
||||
struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
|
||||
struct ctlseqs_match_ctx match_ctx = {
|
||||
.node = matcher == NULL ? &empty_node : &matcher->root,
|
||||
.rbuf = reader->rbuf + reader->buf_start + 1,
|
||||
.result = reader->buffer + (reader->save_matched ? 2 : 0),
|
||||
.seq = args->seq + 1,
|
||||
.result = args->result + (args->save_seq ? 2 : 0),
|
||||
};
|
||||
ssize_t match_stack_top = -1;
|
||||
while (true) {
|
||||
match_ctx.value = match_ctx.node->value;
|
||||
if (match_ctx.value == -1) {
|
||||
match_character:
|
||||
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]];
|
||||
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]];
|
||||
if (match_ctx.node == NULL) {
|
||||
break;
|
||||
}
|
||||
|
@ -307,10 +316,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
|
|||
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
|
||||
.value = next_node == NULL ? 0 : -next_node->placeholder,
|
||||
.node = old_node,
|
||||
.rbuf = match_ctx.rbuf,
|
||||
.seq = match_ctx.seq,
|
||||
.result = match_ctx.result,
|
||||
};
|
||||
if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, &match_ctx.result)) {
|
||||
match_ctx.seq = ctlseqs_fetch_value(match_ctx.seq, -match_ctx.value, &match_ctx.result);
|
||||
if (match_ctx.seq == NULL) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -329,19 +339,19 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
|
|||
return CTLSEQS_NOMATCH;
|
||||
}
|
||||
|
||||
CTLSEQS_HOT static ssize_t
|
||||
ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
|
||||
CTLSEQS_HOT static inline ssize_t
|
||||
ctlseqs_do_match(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args)
|
||||
{
|
||||
ssize_t retval = CTLSEQS_PARTIAL;
|
||||
char const *buf = reader->rbuf + reader->buf_start;
|
||||
size_t idx, len = reader->buf_end - reader->buf_start;
|
||||
enum ctlseqs_state state = reader->state;
|
||||
for (idx = reader->last_idx; idx < len; ++idx) {
|
||||
state = ctlseqs_state_transition(state, buf[idx]);
|
||||
char const *seq = args->seq;
|
||||
size_t idx, len = args->seq_len;
|
||||
enum ctlseqs_state state = args->state;
|
||||
for (idx = args->offset; idx < len; ++idx) {
|
||||
state = ctlseqs_state_transition(state, seq[idx]);
|
||||
if (state == ctlseqs_state_err) {
|
||||
// Anything before next ESC is definitely not a control sequence.
|
||||
for (; idx < len; ++idx) {
|
||||
if (buf[idx] == 0x1b) {
|
||||
if (seq[idx] == 0x1b) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -349,25 +359,42 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
|
|||
break;
|
||||
}
|
||||
if (state == ctlseqs_state_done) {
|
||||
retval = ctlseqs_match_pattern(reader, matcher);
|
||||
retval = ctlseqs_match_pattern(matcher, args);
|
||||
++idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (retval < 0 || reader->save_matched) {
|
||||
reader->buffer[0].num = idx;
|
||||
reader->buffer[1].str = buf;
|
||||
if (retval < 0 || args->save_seq) {
|
||||
args->result[0].num = idx;
|
||||
args->result[1].str = seq;
|
||||
}
|
||||
args->result_idx = idx;
|
||||
args->state = state;
|
||||
return retval;
|
||||
}
|
||||
|
||||
CTLSEQS_HOT static ssize_t
|
||||
ctlseqs_reader_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
|
||||
{
|
||||
struct ctlseqs_match_args args = {
|
||||
.seq = reader->rbuf + reader->buf_start,
|
||||
.seq_len = reader->buf_end - reader->buf_start,
|
||||
.offset = reader->last_idx,
|
||||
.result = reader->buffer,
|
||||
.state = reader->state,
|
||||
.save_seq = reader->save_matched,
|
||||
};
|
||||
ssize_t retval = ctlseqs_do_match(matcher, &args);
|
||||
if (retval == CTLSEQS_PARTIAL) {
|
||||
reader->last_idx = idx;
|
||||
if (CTLSEQS_UNLIKELY(reader->buf_start + idx == reader->readlen)) {
|
||||
reader->last_idx = args.result_idx;
|
||||
if (CTLSEQS_UNLIKELY(reader->buf_start + args.result_idx == reader->readlen)) {
|
||||
// Buffer is full but a match is still pending.
|
||||
// This may happen when the reader's maxlen option is not large enough to hold a sequence,
|
||||
// or when the the sequences are produced faster than consumed.
|
||||
if (reader->buf_start > reader->readlen / 2) {
|
||||
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, idx);
|
||||
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx);
|
||||
reader->buf_start = 0;
|
||||
reader->buf_end = idx;
|
||||
reader->buf_end = args.result_idx;
|
||||
} else {
|
||||
// We could memmove() here, but having a buffer no larger than twice the size of a sequence
|
||||
// is hardly what a normal program would desire.
|
||||
|
@ -375,14 +402,14 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
|
|||
}
|
||||
}
|
||||
} else {
|
||||
reader->buf_start += idx;
|
||||
reader->buf_start += args.result_idx;
|
||||
reader->last_idx = 0;
|
||||
if (reader->buf_start == reader->buf_end) {
|
||||
reader->buf_start = 0;
|
||||
reader->buf_end = 0;
|
||||
}
|
||||
}
|
||||
reader->state = state >= ctlseqs_state_done ? ctlseqs_state_none : state;
|
||||
reader->state = args.state >= ctlseqs_state_done ? ctlseqs_state_none : args.state;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -464,6 +491,18 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
|
|||
return CTLSEQS_OK;
|
||||
}
|
||||
|
||||
CTLSEQS_HOT ssize_t
|
||||
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result)
|
||||
{
|
||||
struct ctlseqs_match_args args = {
|
||||
.seq = seq,
|
||||
.seq_len = seq_len,
|
||||
.result = result,
|
||||
.save_seq = true,
|
||||
};
|
||||
return ctlseqs_do_match(matcher, &args);
|
||||
}
|
||||
|
||||
CTLSEQS_COLD void
|
||||
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher)
|
||||
{
|
||||
|
@ -481,9 +520,7 @@ ctlseqs_reader_init()
|
|||
{
|
||||
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
|
||||
if (CTLSEQS_LIKELY(reader != NULL)) {
|
||||
*reader = (struct ctlseqs_reader) {
|
||||
.pollfd.events = POLLIN,
|
||||
};
|
||||
*reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN };
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
@ -520,7 +557,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
|
|||
ssize_t result;
|
||||
// Whether we have read more than we could match in the preview call.
|
||||
if (reader->state == ctlseqs_state_none && reader->buf_start != 0) {
|
||||
result = ctlseqs_match(reader, matcher);
|
||||
result = ctlseqs_reader_match(reader, matcher);
|
||||
if (result != CTLSEQS_PARTIAL) {
|
||||
return result;
|
||||
}
|
||||
|
@ -535,7 +572,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
|
|||
if (CTLSEQS_UNLIKELY(result < 0)) {
|
||||
return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL;
|
||||
}
|
||||
return ctlseqs_match(reader, matcher);
|
||||
return ctlseqs_reader_match(reader, matcher);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -406,6 +406,9 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
|
|||
void
|
||||
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher);
|
||||
|
||||
ssize_t
|
||||
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result);
|
||||
|
||||
struct ctlseqs_reader *
|
||||
ctlseqs_reader_init();
|
||||
|
||||
|
|
Loading…
Reference in New Issue