diff --git a/man/ctlseqs.7 b/man/ctlseqs.7 index 94eb90b..55077f5 100644 --- a/man/ctlseqs.7 +++ b/man/ctlseqs.7 @@ -55,6 +55,7 @@ C1 (8-bit) control characters are not supported. .BR ctlseqs_reader_free (3) .PP .BR ctlseqs_read (3) +.BR ctlseqs_purge (3) . .SH COPYRIGHT Copyright (c) 2020 CismonX diff --git a/src/ctlseqs.c b/src/ctlseqs.c index d01c33e..3840903 100644 --- a/src/ctlseqs.c +++ b/src/ctlseqs.c @@ -113,15 +113,16 @@ enum ctlseqs_state { struct ctlseqs_trie_node { ssize_t value; - int placeholder; + ssize_t placeholder; struct ctlseqs_trie_node *next; struct ctlseqs_trie_node *children[128]; }; struct ctlseqs_match_ctx { ssize_t value; + size_t result_idx; struct ctlseqs_trie_node const *node; - char *buf; + char *rbuf; }; struct ctlseqs_matcher { @@ -277,36 +278,32 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value *buf, size_t *buf_offset CTLSEQS_HOT static ssize_t ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) { - static struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; + struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_match_ctx match_ctx = { .node = matcher == NULL ? &empty_node : &matcher->root, - .buf = reader->rbuf + reader->buf_start + 1, + .rbuf = reader->rbuf + reader->buf_start + 1, }; ssize_t match_stack_top = -1; - size_t buffer_offset = 0; while (true) { match_ctx.value = match_ctx.node->value; if (match_ctx.value == -1) { - // Match character as-is. - match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.buf++[0]]; + match_character: + match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]]; if (match_ctx.node == NULL) { break; } } else if (match_ctx.value < -1) { - // Match placeholder. - start_match: + match_placeholder: old_node = match_ctx.node; match_ctx.node = match_ctx.node->children[-match_ctx.value]; - // If other placeholders exist on the same level, save them - // to the stack for future retry. - if (match_ctx.node->next != NULL) { - matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { - .value = match_ctx.node->next->placeholder, - .node = old_node, - .buf = match_ctx.buf, - }; - } - if (!ctlseqs_fetch(&match_ctx.buf, -match_ctx.value, reader->buffer, &buffer_offset)) { + struct ctlseqs_trie_node *next_node = match_ctx.node->next; + matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { + .value = next_node == NULL ? 0 : -next_node->placeholder, + .node = old_node, + .rbuf = match_ctx.rbuf, + .result_idx = match_ctx.result_idx, + }; + if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, reader->buffer, &match_ctx.result_idx)) { break; } } else { @@ -316,7 +313,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons } if (match_stack_top >= 0) { match_ctx = matcher->match_stack[match_stack_top--]; - goto start_match; + if (match_ctx.value == 0) { + goto match_character; + } else { + goto match_placeholder; + } } return CTLSEQS_NOMATCH; } @@ -403,6 +404,7 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o for (size_t i = 0; i < options->npatterns; ++i) { char const *pattern = options->patterns[i]; struct ctlseqs_trie_node *node = &matcher->root; + // We assume that pattern[0] is always ESC. for (size_t j = 1; ; ++j) { int ch = pattern[j]; if (ch == '\0') { @@ -426,15 +428,17 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o matcher->node_pool_size *= 2; } old_node->children[ch] = node = &matcher->node_pool[node_pool_idx]; - *node = (struct ctlseqs_trie_node) { .value = -1 }; - if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) { + *node = (struct ctlseqs_trie_node) { + .value = -1, // Value -1 indicates that there's no match on current node. + .placeholder = ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end ? 0 : ch, + }; + if (node->placeholder == 0) { continue; } - // Node with multiple placeholders contains negated offset of child - // which is the head of the linked list. if (old_node->value < -1) { + // Node with multiple placeholders contains negated offset of the child node + // which is the head of the linked list. node->next = old_node->children[-old_node->value]; - node->next->value = old_node->value; } old_node->value = -ch; } diff --git a/tests/tcsgrep.c b/tests/tcsgrep.c index 21a7f83..fa71d58 100644 --- a/tests/tcsgrep.c +++ b/tests/tcsgrep.c @@ -86,7 +86,9 @@ print_generic_seq(char const *header, union ctlseqs_value *buffer) printf("%s %zu", header, length); for (size_t idx = 0; idx < length; ++idx) { char ch = seq[idx]; - if (isprint(ch)) { + if (ch == ' ') { + printf(" SP"); + } if (isprint(ch)) { printf(" %c", ch); } else if (ch == 0x1b) { printf(" ESC"); @@ -102,7 +104,7 @@ print_matching_seq(struct tcsgrep_sequence *seq, union ctlseqs_value *buffer) { printf("OK %s", seq->name); for (int idx = 0; idx < 8; ++idx) { - char placeholder = seq->pattern[idx]; + char placeholder = seq->args[idx]; switch (placeholder) { case 0x0e: // CTLSEQS_PH_NUM printf(" %lu", buffer[idx].num);