Fix bugs.

This commit is contained in:
CismonX 2020-12-16 18:35:24 +08:00
parent f646b2d9de
commit 7bee35ba94
Signed by: cismonx
GPG Key ID: 3094873E29A482FB
3 changed files with 34 additions and 27 deletions

View File

@ -55,6 +55,7 @@ C1 (8-bit) control characters are not supported.
.BR ctlseqs_reader_free (3) .BR ctlseqs_reader_free (3)
.PP .PP
.BR ctlseqs_read (3) .BR ctlseqs_read (3)
.BR ctlseqs_purge (3)
. .
.SH COPYRIGHT .SH COPYRIGHT
Copyright (c) 2020 CismonX <admin@cismon.net> Copyright (c) 2020 CismonX <admin@cismon.net>

View File

@ -113,15 +113,16 @@ enum ctlseqs_state {
struct ctlseqs_trie_node { struct ctlseqs_trie_node {
ssize_t value; ssize_t value;
int placeholder; ssize_t placeholder;
struct ctlseqs_trie_node *next; struct ctlseqs_trie_node *next;
struct ctlseqs_trie_node *children[128]; struct ctlseqs_trie_node *children[128];
}; };
struct ctlseqs_match_ctx { struct ctlseqs_match_ctx {
ssize_t value; ssize_t value;
size_t result_idx;
struct ctlseqs_trie_node const *node; struct ctlseqs_trie_node const *node;
char *buf; char *rbuf;
}; };
struct ctlseqs_matcher { struct ctlseqs_matcher {
@ -277,36 +278,32 @@ ctlseqs_fetch(char **seq, int type, union ctlseqs_value *buf, size_t *buf_offset
CTLSEQS_HOT static ssize_t CTLSEQS_HOT static ssize_t
ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher) ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher const *matcher)
{ {
static struct ctlseqs_trie_node const *old_node, empty_node = { 0 }; struct ctlseqs_trie_node const *old_node, empty_node = { 0 };
struct ctlseqs_match_ctx match_ctx = { struct ctlseqs_match_ctx match_ctx = {
.node = matcher == NULL ? &empty_node : &matcher->root, .node = matcher == NULL ? &empty_node : &matcher->root,
.buf = reader->rbuf + reader->buf_start + 1, .rbuf = reader->rbuf + reader->buf_start + 1,
}; };
ssize_t match_stack_top = -1; ssize_t match_stack_top = -1;
size_t buffer_offset = 0;
while (true) { while (true) {
match_ctx.value = match_ctx.node->value; match_ctx.value = match_ctx.node->value;
if (match_ctx.value == -1) { if (match_ctx.value == -1) {
// Match character as-is. match_character:
match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.buf++[0]]; match_ctx.node = match_ctx.node->children[(unsigned)match_ctx.rbuf++[0]];
if (match_ctx.node == NULL) { if (match_ctx.node == NULL) {
break; break;
} }
} else if (match_ctx.value < -1) { } else if (match_ctx.value < -1) {
// Match placeholder. match_placeholder:
start_match:
old_node = match_ctx.node; old_node = match_ctx.node;
match_ctx.node = match_ctx.node->children[-match_ctx.value]; match_ctx.node = match_ctx.node->children[-match_ctx.value];
// If other placeholders exist on the same level, save them struct ctlseqs_trie_node *next_node = match_ctx.node->next;
// to the stack for future retry. matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) {
if (match_ctx.node->next != NULL) { .value = next_node == NULL ? 0 : -next_node->placeholder,
matcher->match_stack[++match_stack_top] = (struct ctlseqs_match_ctx) { .node = old_node,
.value = match_ctx.node->next->placeholder, .rbuf = match_ctx.rbuf,
.node = old_node, .result_idx = match_ctx.result_idx,
.buf = match_ctx.buf, };
}; if (!ctlseqs_fetch(&match_ctx.rbuf, -match_ctx.value, reader->buffer, &match_ctx.result_idx)) {
}
if (!ctlseqs_fetch(&match_ctx.buf, -match_ctx.value, reader->buffer, &buffer_offset)) {
break; break;
} }
} else { } else {
@ -316,7 +313,11 @@ ctlseqs_match_pattern(struct ctlseqs_reader *reader, struct ctlseqs_matcher cons
} }
if (match_stack_top >= 0) { if (match_stack_top >= 0) {
match_ctx = matcher->match_stack[match_stack_top--]; match_ctx = matcher->match_stack[match_stack_top--];
goto start_match; if (match_ctx.value == 0) {
goto match_character;
} else {
goto match_placeholder;
}
} }
return CTLSEQS_NOMATCH; return CTLSEQS_NOMATCH;
} }
@ -403,6 +404,7 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
for (size_t i = 0; i < options->npatterns; ++i) { for (size_t i = 0; i < options->npatterns; ++i) {
char const *pattern = options->patterns[i]; char const *pattern = options->patterns[i];
struct ctlseqs_trie_node *node = &matcher->root; struct ctlseqs_trie_node *node = &matcher->root;
// We assume that pattern[0] is always ESC.
for (size_t j = 1; ; ++j) { for (size_t j = 1; ; ++j) {
int ch = pattern[j]; int ch = pattern[j];
if (ch == '\0') { if (ch == '\0') {
@ -426,15 +428,17 @@ ctlseqs_matcher_config(struct ctlseqs_matcher *matcher, struct ctlseqs_matcher_o
matcher->node_pool_size *= 2; matcher->node_pool_size *= 2;
} }
old_node->children[ch] = node = &matcher->node_pool[node_pool_idx]; old_node->children[ch] = node = &matcher->node_pool[node_pool_idx];
*node = (struct ctlseqs_trie_node) { .value = -1 }; *node = (struct ctlseqs_trie_node) {
if (ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end) { .value = -1, // Value -1 indicates that there's no match on current node.
.placeholder = ch < ctlseqs_ph_begin || ch >= ctlseqs_ph_end ? 0 : ch,
};
if (node->placeholder == 0) {
continue; continue;
} }
// Node with multiple placeholders contains negated offset of child
// which is the head of the linked list.
if (old_node->value < -1) { if (old_node->value < -1) {
// Node with multiple placeholders contains negated offset of the child node
// which is the head of the linked list.
node->next = old_node->children[-old_node->value]; node->next = old_node->children[-old_node->value];
node->next->value = old_node->value;
} }
old_node->value = -ch; old_node->value = -ch;
} }

View File

@ -86,7 +86,9 @@ print_generic_seq(char const *header, union ctlseqs_value *buffer)
printf("%s %zu", header, length); printf("%s %zu", header, length);
for (size_t idx = 0; idx < length; ++idx) { for (size_t idx = 0; idx < length; ++idx) {
char ch = seq[idx]; char ch = seq[idx];
if (isprint(ch)) { if (ch == ' ') {
printf(" SP");
} if (isprint(ch)) {
printf(" %c", ch); printf(" %c", ch);
} else if (ch == 0x1b) { } else if (ch == 0x1b) {
printf(" ESC"); printf(" ESC");
@ -102,7 +104,7 @@ print_matching_seq(struct tcsgrep_sequence *seq, union ctlseqs_value *buffer)
{ {
printf("OK %s", seq->name); printf("OK %s", seq->name);
for (int idx = 0; idx < 8; ++idx) { for (int idx = 0; idx < 8; ++idx) {
char placeholder = seq->pattern[idx]; char placeholder = seq->args[idx];
switch (placeholder) { switch (placeholder) {
case 0x0e: // CTLSEQS_PH_NUM case 0x0e: // CTLSEQS_PH_NUM
printf(" %lu", buffer[idx].num); printf(" %lu", buffer[idx].num);