From 5bf70d7b770e647422a9db8bce4de5bbb39f278e Mon Sep 17 00:00:00 2001 From: CismonX Date: Thu, 24 Dec 2020 14:39:14 +0800 Subject: [PATCH] Add `ctlseqs_match()`. --- src/ctlseqs.c | 133 ++++++++++++++++++++++++++++++++------------------ src/ctlseqs.h | 3 ++ 2 files changed, 88 insertions(+), 48 deletions(-) diff --git a/src/ctlseqs.c b/src/ctlseqs.c index f980f00..069ec6d 100644 --- a/src/ctlseqs.c +++ b/src/ctlseqs.c @@ -69,25 +69,24 @@ #define CTLSEQS_VALUE_STR(stop_cond) \ for (cnt = 0; ; ++cnt) { \ - num = seq_val[cnt]; \ + num = seq[cnt]; \ if (stop_cond) { \ break; \ } \ } \ buf_val[0].num = cnt; \ - buf_val[1].str = seq_val; \ - *seq = seq_val + cnt; \ + buf_val[1].str = seq; \ *buf += 2; \ - return true + return seq + cnt #define CTLSEQS_VALUE_NUM(base) \ errno = 0; \ - num = strtoul(seq_val, seq, base); \ - if (errno || seq_val == *seq) { \ - return false; \ + num = strtoul(seq, &endptr, base); \ + if (errno || seq == endptr) { \ + return NULL; \ } \ buf_val[0].num = num; \ ++*buf; \ - return true + return endptr enum ctlseqs_placeholder { ctlseqs_ph_begin = 0x0e, @@ -122,10 +121,20 @@ struct ctlseqs_trie_node { struct ctlseqs_trie_node *children[128]; }; +struct ctlseqs_match_args { + char const *seq; + size_t seq_len; + size_t offset; + union ctlseqs_value *result; + size_t result_idx; + enum ctlseqs_state state; + bool save_seq; +}; + struct ctlseqs_match_ctx { ssize_t value; struct ctlseqs_trie_node const *node; - char *rbuf; + char const *seq; union ctlseqs_value *result; }; @@ -238,11 +247,11 @@ ctlseqs_state_transition(enum ctlseqs_state state, char ch) } } -CTLSEQS_HOT static bool -ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf) +CTLSEQS_HOT static char const * +ctlseqs_fetch_value(char const *seq, int type, union ctlseqs_value **buf) { unsigned long cnt, num; - char *seq_val = *seq; + char *endptr = NULL; union ctlseqs_value *buf_val = *buf; switch (type) { case ctlseqs_ph_num: @@ -250,19 +259,19 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf) case ctlseqs_ph_nums: for (cnt = 1; ; ++cnt) { errno = 0; - num = strtoul(seq_val, seq, 10); - if (errno || seq_val == *seq) { - return false; + num = strtoul(seq, &endptr, 10); + if (errno || seq == endptr) { + return NULL; } buf_val[cnt].num = num; - if ((*seq)[0] != ';') { + if (endptr[0] != ';') { break; } - seq_val = *seq + 1; + seq = endptr + 1; } buf_val[0].num = cnt; *buf += cnt + 1; - return true; + return endptr; case ctlseqs_ph_str: CTLSEQS_VALUE_STR(num < ' ' || num > '~'); case ctlseqs_ph_cmdstr: @@ -277,25 +286,25 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value **buf) CTLSEQS_VALUE_STR(num > 0x7f); default: CTLSEQS_UNREACHABLE(); - return false; + return NULL; } } CTLSEQS_HOT static ssize_t -ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) +ctlseqs_match_pattern(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args const *args) { struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_match_ctx match_ctx = { .node = matcher == NULL ? &empty_node : &matcher->root, - .rbuf = reader->rbuf + reader->buf_start + 1, - .result = reader->buffer + (reader->save_matched ? 2 : 0), + .seq = args->seq + 1, + .result = args->result + (args->save_seq ? 2 : 0), }; ssize_t match_stack_top = -1; while (true) { match_ctx.value = match_ctx.node->value; if (match_ctx.value == -1) { match_character: - match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]]; + match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.seq++[0]]; if (match_ctx.node == NULL) { break; } @@ -307,10 +316,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { .value = next_node == NULL ? 0 : -next_node->placeholder, .node = old_node, - .rbuf = match_ctx.rbuf, + .seq = match_ctx.seq, .result = match_ctx.result, }; - if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, &match_ctx.result)) { + match_ctx.seq = ctlseqs_fetch_value(match_ctx.seq, -match_ctx.value, &match_ctx.result); + if (match_ctx.seq == NULL) { break; } } else { @@ -329,19 +339,19 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons return CTLSEQS_NOMATCH; } -CTLSEQS_HOT static ssize_t -ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) +CTLSEQS_HOT static inline ssize_t +ctlseqs_do_match(struct ctlseqs_matcher const *matcher, struct ctlseqs_match_args *args) { ssize_t retval = CTLSEQS_PARTIAL; - char const *buf = reader->rbuf + reader->buf_start; - size_t idx, len = reader->buf_end - reader->buf_start; - enum ctlseqs_state state = reader->state; - for (idx = reader->last_idx; idx < len; ++idx) { - state = ctlseqs_state_transition(state, buf[idx]); + char const *seq = args->seq; + size_t idx, len = args->seq_len; + enum ctlseqs_state state = args->state; + for (idx = args->offset; idx < len; ++idx) { + state = ctlseqs_state_transition(state, seq[idx]); if (state == ctlseqs_state_err) { // Anything before next ESC is definitely not a control sequence. for (; idx < len; ++idx) { - if (buf[idx] == 0x1b) { + if (seq[idx] == 0x1b) { break; } } @@ -349,25 +359,42 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match break; } if (state == ctlseqs_state_done) { - retval = ctlseqs_match_pattern(reader, matcher); + retval = ctlseqs_match_pattern(matcher, args); ++idx; break; } } - if (retval < 0 || reader->save_matched) { - reader->buffer[0].num = idx; - reader->buffer[1].str = buf; + if (retval < 0 || args->save_seq) { + args->result[0].num = idx; + args->result[1].str = seq; } + args->result_idx = idx; + args->state = state; + return retval; +} + +CTLSEQS_HOT static ssize_t +ctlseqs_reader_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) +{ + struct ctlseqs_match_args args = { + .seq = reader->rbuf + reader->buf_start, + .seq_len = reader->buf_end - reader->buf_start, + .offset = reader->last_idx, + .result = reader->buffer, + .state = reader->state, + .save_seq = reader->save_matched, + }; + ssize_t retval = ctlseqs_do_match(matcher, &args); if (retval == CTLSEQS_PARTIAL) { - reader->last_idx = idx; - if (CTLSEQS_UNLIKELY(reader->buf_start + idx == reader->readlen)) { + reader->last_idx = args.result_idx; + if (CTLSEQS_UNLIKELY(reader->buf_start + args.result_idx == reader->readlen)) { // Buffer is full but a match is still pending. // This may happen when the reader's maxlen option is not large enough to hold a sequence, // or when the the sequences are produced faster than consumed. if (reader->buf_start > reader->readlen / 2) { - memcpy(reader->rbuf, reader->rbuf + reader->buf_start, idx); + memcpy(reader->rbuf, reader->rbuf + reader->buf_start, args.result_idx); reader->buf_start = 0; - reader->buf_end = idx; + reader->buf_end = args.result_idx; } else { // We could memmove() here, but having a buffer no larger than twice the size of a sequence // is hardly what a normal program would desire. @@ -375,14 +402,14 @@ ctlseqs_match(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *match } } } else { - reader->buf_start += idx; + reader->buf_start += args.result_idx; reader->last_idx = 0; if (reader->buf_start == reader->buf_end) { reader->buf_start = 0; reader->buf_end = 0; } } - reader->state = state >= ctlseqs_state_done ? ctlseqs_state_none : state; + reader->state = args.state >= ctlseqs_state_done ? ctlseqs_state_none : args.state; return retval; } @@ -464,6 +491,18 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o return CTLSEQS_OK; } +CTLSEQS_HOT ssize_t +ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result) +{ + struct ctlseqs_match_args args = { + .seq = seq, + .seq_len = seq_len, + .result = result, + .save_seq = true, + }; + return ctlseqs_do_match(matcher, &args); +} + CTLSEQS_COLD void ctlseqs_matcher_free(struct ctlseqs_matcher *matcher) { @@ -481,9 +520,7 @@ ctlseqs_reader_init() { struct ctlseqs_reader *reader = malloc(sizeof(struct ctlseqs_reader)); if (CTLSEQS_LIKELY(reader != NULL)) { - *reader = (struct ctlseqs_reader) { - .pollfd.events = POLLIN, - }; + *reader = (struct ctlseqs_reader) { .pollfd.events = POLLIN }; } return reader; } @@ -520,7 +557,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche ssize_t result; // Whether we have read more than we could match in the preview call. if (reader->state == ctlseqs_state_none && reader->buf_start != 0) { - result = ctlseqs_match(reader, matcher); + result = ctlseqs_reader_match(reader, matcher); if (result != CTLSEQS_PARTIAL) { return result; } @@ -535,7 +572,7 @@ ctlseqs_read(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matche if (CTLSEQS_UNLIKELY(result < 0)) { return reader->state == ctlseqs_state_none ? result : CTLSEQS_PARTIAL; } - return ctlseqs_match(reader, matcher); + return ctlseqs_reader_match(reader, matcher); } void diff --git a/src/ctlseqs.h b/src/ctlseqs.h index 2507a17..92ffc98 100644 --- a/src/ctlseqs.h +++ b/src/ctlseqs.h @@ -406,6 +406,9 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o void ctlseqs_matcher_free(struct ctlseqs_matcher *matcher); +ssize_t +ctlseqs_match(struct ctlseqs_matcher const *matcher, char const *seq, size_t seq_len, union ctlseqs_value *result); + struct ctlseqs_reader * ctlseqs_reader_init();