Add `ctlseqs_match()`.

This commit is contained in:
CismonX 2020-12-24 14:39:14 +08:00
parent b19ef960a0
commit 5bf70d7b77
Signed by: cismonx
GPG Key ID: 3094873E29A482FB
2 changed files with 88 additions and 48 deletions

View File

@ -69,25 +69,24 @@
#define CTLSEQS_VALUE_STR(stop_cond) \ #define CTLSEQS_VALUE_STR(stop_cond) \
for (cnt = 0; ; ++cnt) { \ for (cnt = 0; ; ++cnt) { \
num = seq_val[cnt]; \ num = seq[cnt]; \
if (stop_cond) { \ if (stop_cond) { \
break; \ break; \
} \ } \
} \ } \
buf_val[0].num = cnt; \ buf_val[0].num = cnt; \
buf_val[1].str = seq_val; \ buf_val[1].str = seq; \
*seq = seq_val + cnt; \
*buf += 2; \ *buf += 2; \
return true return seq + cnt
#define CTLSEQS_VALUE_NUM(base) \ #define CTLSEQS_VALUE_NUM(base) \
errno = 0; \ errno = 0; \
num = strtoul(seq_val, seq, base); \ num = strtoul(seq, &endptr, base); \
if (errno || seq_val == *seq) { \ if (errno || seq == endptr) { \
return false; \ return NULL; \
} \ } \
buf_val[0].num = num; \ buf_val[0].num = num; \
++*buf; \ ++*buf; \
return true return endptr
enum ctlseqs_placeholder { enum ctlseqs_placeholder {
ctlseqs_ph_begin = 0x0e, ctlseqs_ph_begin = 0x0e,
@ -122,10 +121,20 @@ struct ctlseqs_trie_node {
struct ctlseqs_trie_node *children[128]; struct ctlseqs_trie_node *children[128];
}; };
struct ctlseqs_match_args {
char const *seq;
size_t seq_len;
size_t offset;
union ctlseqs_value *result;
size_t result_idx;
enum ctlseqs_state state;
bool save_seq;
};
struct ctlseqs_match_ctx { struct ctlseqs_match_ctx {
ssize_t value; ssize_t value;
struct ctlseqs_trie_node const *node; struct ctlseqs_trie_node const *node;
char *rbuf; char const *seq;
union ctlseqs_value *result; union ctlseqs_value *result;
}; };
@ -238,11 +247,11 @@ ctlseqs_state_transition(enum ctlseqs_state state, char ch)
} }
} }
CTLSEQS_HOT static bool CTLSEQS_HOT static char const *
ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf) ctlseqs_fetch_value(char const *seq, int type, union ctlseqs_value **buf)
{ {
unsigned long cnt, num; unsigned long cnt, num;
char *seq_val = *seq; char *endptr = NULL;
union ctlseqs_value *buf_val = *buf; union ctlseqs_value *buf_val = *buf;
switch (type) { switch (type) {
case ctlseqs_ph_num: case ctlseqs_ph_num:
@ -250,19 +259,19 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
case ctlseqs_ph_nums: case ctlseqs_ph_nums:
for (cnt = 1; ; ++cnt) { for (cnt = 1; ; ++cnt) {
errno = 0; errno = 0;
num = strtoul(seq_val, seq, 10); num = strtoul(seq, &endptr, 10);
if (errno || seq_val == *seq) { if (errno || seq == endptr) {
return false; return NULL;
} }
buf_val[cnt].num = num; buf_val[cnt].num = num;
if ((*seq)[0] != ';') { if (endptr[0] != ';') {
break; break;
} }
seq_val = *seq + 1; seq = endptr + 1;
} }
buf_val[0].num = cnt; buf_val[0].num = cnt;
*buf += cnt + 1; *buf += cnt + 1;
return true; return endptr;
case ctlseqs_ph_str: case ctlseqs_ph_str:
CTLSEQS_VALUE_STR(num < ' ' || num > '~'); CTLSEQS_VALUE_STR(num < ' ' || num > '~');
case ctlseqs_ph_cmdstr: case ctlseqs_ph_cmdstr:
@ -277,25 +286,25 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf)
CTLSEQS_VALUE_STR(num > 0x7f); CTLSEQS_VALUE_STR(num > 0x7f);
default: default:
CTLSEQS_UNREACHABLE(); CTLSEQS_UNREACHABLE();
return false; return NULL;
} }
} }
CTLSEQS_HOT static ssize_t CTLSEQS_HOT static ssize_t
ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) ctlseqs_match_pattern(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args)
{ {
struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_ctx = { struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root, .node = matcher == NULL ? &empty_node : &matcher->root,
.rbuf = reader->rbuf + reader->buf_start + 1, .seq = args->seq + 1,
.result = reader->buffer + (reader->save_matched ? 2 : 0), .result = args->result + (args->save_seq ? 2 : 0),
}; };
ssize_t match_stack_top = -1; ssize_t match_stack_top = -1;
while (true) { while (true) {
match_ctx.value = match_ctx.node->value; match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) { if (match_ctx.value == -1) {
match_character: match_character:
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]]; match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]];
if (match_ctx.node == NULL) { if (match_ctx.node == NULL) {
break; break;
} }
@ -307,10 +316,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
.value = next_node == NULL ? 0 : -next_node->placeholder, .value = next_node == NULL ? 0 : -next_node->placeholder,
.node = old_node, .node = old_node,
.rbuf = match_ctx.rbuf, .seq = match_ctx.seq,
.result = match_ctx.result, .result = match_ctx.result,
}; };
if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, &match_ctx.result)) { match_ctx.seq = ctlseqs_fetch_value(match_ctx.seq, -match_ctx.value, &match_ctx.result);
if (match_ctx.seq == NULL) {
break; break;
} }
} else { } else {
@ -329,19 +339,19 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
return CTLSEQS_NOMATCH; return CTLSEQS_NOMATCH;
} }
CTLSEQS_HOT static ssize_t CTLSEQS_HOT static inline ssize_t
ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) ctlseqs_do_match(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args)
{ {
ssize_t retval = CTLSEQS_PARTIAL; ssize_t retval = CTLSEQS_PARTIAL;
char const *buf = reader->rbuf + reader->buf_start; char const *seq = args->seq;
size_t idx, len = reader->buf_end - reader->buf_start; size_t idx, len = args->seq_len;
enum ctlseqs_state state = reader->state; enum ctlseqs_state state = args->state;
for (idx = reader->last_idx; idx < len; ++idx) { for (idx = args->offset; idx < len; ++idx) {
state = ctlseqs_state_transition(state, buf[idx]); state = ctlseqs_state_transition(state, seq[idx]);
if (state == ctlseqs_state_err) { if (state == ctlseqs_state_err) {
// Anything before next ESC is definitely not a control sequence. // Anything before next ESC is definitely not a control sequence.
for (; idx < len; ++idx) { for (; idx < len; ++idx) {
if (buf[idx] == 0x1b) { if (seq[idx] == 0x1b) {
break; break;
} }
} }
@ -349,25 +359,42 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
break; break;
} }
if (state == ctlseqs_state_done) { if (state == ctlseqs_state_done) {
retval = ctlseqs_match_pattern(reader, matcher); retval = ctlseqs_match_pattern(matcher, args);
++idx; ++idx;
break; break;
} }
} }
if (retval < 0 || reader->save_matched) { if (retval < 0 || args->save_seq) {
reader->buffer[0].num = idx; args->result[0].num = idx;
reader->buffer[1].str = buf; args->result[1].str = seq;
} }
args->result_idx = idx;
args->state = state;
return retval;
}
CTLSEQS_HOT static ssize_t
ctlseqs_reader_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{
struct ctlseqs_match_args args = {
.seq = reader->rbuf + reader->buf_start,
.seq_len = reader->buf_end - reader->buf_start,
.offset = reader->last_idx,
.result = reader->buffer,
.state = reader->state,
.save_seq = reader->save_matched,
};
ssize_t retval = ctlseqs_do_match(matcher, &args);
if (retval == CTLSEQS_PARTIAL) { if (retval == CTLSEQS_PARTIAL) {
reader->last_idx = idx; reader->last_idx = args.result_idx;
if (CTLSEQS_UNLIKELY(reader->buf_start + idx == reader->readlen)) { if (CTLSEQS_UNLIKELY(reader->buf_start + args.result_idx == reader->readlen)) {
// Buffer is full but a match is still pending. // Buffer is full but a match is still pending.
// This may happen when the reader's maxlen option is not large enough to hold a sequence, // This may happen when the reader's maxlen option is not large enough to hold a sequence,
// or when the the sequences are produced faster than consumed. // or when the the sequences are produced faster than consumed.
if (reader->buf_start > reader->readlen / 2) { if (reader->buf_start > reader->readlen / 2) {
memcpy(reader->rbuf, reader->rbuf + reader->buf_start, idx); memcpy(reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx);
reader->buf_start = 0; reader->buf_start = 0;
reader->buf_end = idx; reader->buf_end = args.result_idx;
} else { } else {
// We could memmove() here, but having a buffer no larger than twice the size of a sequence // We could memmove() here, but having a buffer no larger than twice the size of a sequence
// is hardly what a normal program would desire. // is hardly what a normal program would desire.
@ -375,14 +402,14 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match
} }
} }
} else { } else {
reader->buf_start += idx; reader->buf_start += args.result_idx;
reader->last_idx = 0; reader->last_idx = 0;
if (reader->buf_start == reader->buf_end) { if (reader->buf_start == reader->buf_end) {
reader->buf_start = 0; reader->buf_start = 0;
reader->buf_end = 0; reader->buf_end = 0;
} }
} }
reader->state = state >= ctlseqs_state_done ? ctlseqs_state_none : state; reader->state = args.state >= ctlseqs_state_done ? ctlseqs_state_none : args.state;
return retval; return retval;
} }
@ -464,6 +491,18 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
return CTLSEQS_OK; return CTLSEQS_OK;
} }
CTLSEQS_HOT ssize_t
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result)
{
struct ctlseqs_match_args args = {
.seq = seq,
.seq_len = seq_len,
.result = result,
.save_seq = true,
};
return ctlseqs_do_match(matcher, &args);
}
CTLSEQS_COLD void CTLSEQS_COLD void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher) ctlseqs_matcher_free(struct ctlseqs_matcher *matcher)
{ {
@ -481,9 +520,7 @@ ctlseqs_reader_init()
{ {
struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader)); struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader));
if (CTLSEQS_LIKELY(reader != NULL)) { if (CTLSEQS_LIKELY(reader != NULL)) {
*reader = (struct ctlseqs_reader) { *reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN };
.pollfd.events = POLLIN,
};
} }
return reader; return reader;
} }
@ -520,7 +557,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
ssize_t result; ssize_t result;
// Whether we have read more than we could match in the preview call. // Whether we have read more than we could match in the preview call.
if (reader->state == ctlseqs_state_none && reader->buf_start != 0) { if (reader->state == ctlseqs_state_none && reader->buf_start != 0) {
result = ctlseqs_match(reader, matcher); result = ctlseqs_reader_match(reader, matcher);
if (result != CTLSEQS_PARTIAL) { if (result != CTLSEQS_PARTIAL) {
return result; return result;
} }
@ -535,7 +572,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche
if (CTLSEQS_UNLIKELY(result < 0)) { if (CTLSEQS_UNLIKELY(result < 0)) {
return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL; return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL;
} }
return ctlseqs_match(reader, matcher); return ctlseqs_reader_match(reader, matcher);
} }
void void

View File

@ -406,6 +406,9 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
void void
ctlseqs_matcher_free(struct ctlseqs_matcher *matcher); ctlseqs_matcher_free(struct ctlseqs_matcher *matcher);
ssize_t
ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result);
struct ctlseqs_reader * struct ctlseqs_reader *
ctlseqs_reader_init(); ctlseqs_reader_init();