Add `ctlseqs_match()`.

This commit is contained in:
CismonX 2020-12-24 14:39:14 +08:00
parent b19ef960a0
commit 5bf70d7b77
Signed by: cismonx
GPG Key ID: 3094873E29A482FB
2 changed files with 88 additions and 48 deletions

View File

@ -69,25 +69,24 @@
#define CTLSEQS_VALUE_STR(stop_cond) \
for (cnt = 0; ; ++cnt) { \
num = seq_val[cnt]; \
num = seq[cnt]; \
if (stop_cond) { \
break; \
} \
} \
buf_val[0].num = cnt; \
buf_val[1].str = seq_val; \
*seq = seq_val + cnt; \
buf_val[1].str = seq; \
*buf += 2; \
return true
return seq + cnt
#define CTLSEQS_VALUE_NUM(base) \
errno = 0; \
num = strtoul(seq_val, seq, base); \
if (errno || seq_val == *seq) { \
return false; \
num = strtoul(seq, &endptr, base); \
if (errno || seq == endptr) { \
return NULL; \
} \
buf_val[0].num = num; \
++*buf; \
return true
return endptr
enum ctlseqs_placeholder {
ctlseqs_ph_begin = 0x0e,
@ -122,10 +121,20 @@ struct ctlseqs_trie_node {
struct ctlseqs_trie_node *children[128];
};
struct ctlseqs_match_args {
char const *seq;
size_t seq_len;
size_t offset;
union ctlseqs_value *result;
size_t result_idx;
enum ctlseqs_state state;
bool save_seq;
};
struct ctlseqs_match_ctx {
ssize_t value;
struct ctlseqs_trie_node const *node;
char *rbuf;
char const *seq;
union ctlseqs_value *result;
};
@ -238,11 +247,11 @@ ctlseqs_state_transition(enum ctlseqs_state state, char ch)
}
}
CTLSEQS_HOT static bool
ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
CTLSEQS_HOT static char const *
ctlseqs_fetch_value(char const *seq, int type, union ctlseqs_value **buf)
{
unsigned long cnt, num;
char *seq_val = *seq;
char *endptr = NULL;
union ctlseqs_value *buf_val = *buf;
switch (type) {
case ctlseqs_ph_num:
@ -250,19 +259,19 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
case ctlseqs_ph_nums:
for (cnt = 1; ; ++cnt) {
errno = 0;
num = strtoul(seq_val, seq, 10);
if (errno || seq_val == *seq) {
return false;
num = strtoul(seq, &endptr, 10);
if (errno || seq == endptr) {
return NULL;
}
buf_val[cnt].num = num;
if ((*seq)[0] != ';') {
if (endptr[0] != ';') {
break;
}
seq_val = *seq + 1;
seq = endptr + 1;
}
buf_val[0].num = cnt;
*buf += cnt + 1;
return true;
return endptr;
case ctlseqs_ph_str:
CTLSEQS_VALUE_STR(num < ' ' || num > '~');
case ctlseqs_ph_cmdstr:
@ -277,25 +286,25 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
CTLSEQS_VALUE_STR(num > 0x7f);
default:
CTLSEQS_UNREACHABLE();
return false;
return NULL;
}
}
CTLSEQS_HOT static ssize_t
ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
ctlseqs_match_pattern(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args)
{
struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root,
.rbuf = reader->rbuf + reader->buf_start + 1,
.result = reader->buffer + (reader->save_matched ? 2 : 0),
.seq = args->seq + 1,
.result = args->result + (args->save_seq ? 2 : 0),
};
ssize_t match_stack_top = -1;
while (true) {
match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) {
match_character:
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]];
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]];
if (match_ctx.node == NULL) {
break;
}
@ -307,10 +316,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
.value = next_node == NULL ? 0 : -next_node->placeholder,
.node = old_node,
.rbuf = match_ctx.rbuf,
.seq = match_ctx.seq,
.result = match_ctx.result,
};
if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, &match_ctx.result)) {
match_ctx.seq = ctlseqs_fetch_value(match_ctx.seq, -match_ctx.value, &match_ctx.result);
if (match_ctx.seq == NULL) {
break;
}
} else {
@ -329,19 +339,19 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
return CTLSEQS_NOMATCH;
}
CTLSEQS_HOT static ssize_t
ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
CTLSEQS_HOT static inline ssize_t
ctlseqs_do_match(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args)
{
ssize_t retval = CTLSEQS_PARTIAL;
char const *buf = reader->rbuf + reader->buf_start;
size_t idx, len = reader->buf_end - reader->buf_start;
enum ctlseqs_state state = reader->state;
for (idx = reader->last_idx; idx < len; ++idx) {
state = ctlseqs_state_transition(state, buf[idx]);
char const *seq = args->seq;
size_t idx, len = args->seq_len;
enum ctlseqs_state state = args->state;
for (idx = args->offset; idx < len; ++idx) {
state = ctlseqs_state_transition(state, seq[idx]);
if (state == ctlseqs_state_err) {
// Anything before next ESC is definitely not a control sequence.
for (; idx < len; ++idx) {
if (buf[idx] == 0x1b) {
if (seq[idx] == 0x1b) {
break;
}
}
@ -349,25 +359,42 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
break;
}
if (state == ctlseqs_state_done) {
retval = ctlseqs_match_pattern(reader, matcher);
retval = ctlseqs_match_pattern(matcher, args);
++idx;
break;
}
}
if (retval < 0 || reader->save_matched) {
reader->buffer[0].num = idx;
reader->buffer[1].str = buf;
if (retval < 0 || args->save_seq) {
args->result[0].num = idx;
args->result[1].str = seq;
}
args->result_idx = idx;
args->state = state;
return retval;
}
CTLSEQS_HOT static ssize_t
ctlseqs_reader_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{
struct ctlseqs_match_args args = {
.seq = reader->rbuf + reader->buf_start,
.seq_len = reader->buf_end - reader->buf_start,
.offset = reader->last_idx,
.result = reader->buffer,
.state = reader->state,
.save_seq = reader->save_matched,
};
ssize_t retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_PARTIAL) {
reader->last_idx = idx;
if (CTLSEQS_UNLIKELY(reader->buf_start + idx == reader->readlen)) {
reader->last_idx = args.result_idx;
if (CTLSEQS_UNLIKELY(reader->buf_start + args.result_idx == reader->readlen)) {
// Buffer is full but a match is still pending.
// This may happen when the reader's maxlen option is not large enough to hold a sequence,
// or when the the sequences are produced faster than consumed.
if (reader->buf_start > reader->readlen / 2) {
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, idx);
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx);
reader->buf_start = 0;
reader->buf_end = idx;
reader->buf_end = args.result_idx;
} else {
// We could memmove() here, but having a buffer no larger than twice the size of a sequence
// is hardly what a normal program would desire.
@ -375,14 +402,14 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
}
}
} else {
reader->buf_start += idx;
reader->buf_start += args.result_idx;
reader->last_idx = 0;
if (reader->buf_start == reader->buf_end) {
reader->buf_start = 0;
reader->buf_end = 0;
}
}
reader->state = state >= ctlseqs_state_done ? ctlseqs_state_none : state;
reader->state = args.state >= ctlseqs_state_done ? ctlseqs_state_none : args.state;
return retval;
}
@ -464,6 +491,18 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
return CTLSEQS_OK;
}
CTLSEQS_HOT ssize_t
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result)
{
struct ctlseqs_match_args args = {
.seq = seq,
.seq_len = seq_len,
.result = result,
.save_seq = true,
};
return ctlseqs_do_match(matcher, &args);
}
CTLSEQS_COLD void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher)
{
@ -481,9 +520,7 @@ ctlseqs_reader_init()
{
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
if (CTLSEQS_LIKELY(reader != NULL)) {
*reader = (struct ctlseqs_reader) {
.pollfd.events = POLLIN,
};
*reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN };
}
return reader;
}
@ -520,7 +557,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
ssize_t result;
// Whether we have read more than we could match in the preview call.
if (reader->state == ctlseqs_state_none && reader->buf_start != 0) {
result = ctlseqs_match(reader, matcher);
result = ctlseqs_reader_match(reader, matcher);
if (result != CTLSEQS_PARTIAL) {
return result;
}
@ -535,7 +572,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
if (CTLSEQS_UNLIKELY(result < 0)) {
return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL;
}
return ctlseqs_match(reader, matcher);
return ctlseqs_reader_match(reader, matcher);
}
void

View File

@ -406,6 +406,9 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher);
ssize_t
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result);
struct ctlseqs_reader *
ctlseqs_reader_init();