htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit 275940c7345d05564943a2bb14dfe0ccc306a95e
parent 0e33554f458efcf0b45d3985db29a333c2c35385
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue,  8 Aug 2023 21:43:24 +0200

Support finding by just an attr value

Diffstat:
Mhtml.c | 19++++++++++---------
Mtodo | 2+-
2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/html.c b/html.c @@ -518,22 +518,19 @@ struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts * { bool matchesTag = false; bool matchesAttrKey = false; - bool matchesAttrKeyAndValue = false; + bool matchesAttrValue = false; if (strcmp(tag->name, opt->tag) == 0) matchesTag = true; for (int i=0; i<tag->attrsLen; i++) { if (strcmp(tag->attrs[i]->name, opt->key) == 0) matchesAttrKey = true; - if (matchesAttrKey) - { - if (strcmp(tag->attrs[i]->value, opt->attr) == 0) - matchesAttrKeyAndValue = true; - } + if (strcmp(tag->attrs[i]->value, opt->attr) == 0) + matchesAttrValue = true; } if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0) { - if (matchesTag && matchesAttrKeyAndValue) + if (matchesTag && matchesAttrKey && matchesAttrValue) return tag; } else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0) @@ -548,7 +545,7 @@ struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts * } else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0) { - if (matchesAttrKeyAndValue) + if (matchesAttrKey && matchesAttrValue) return tag; } else if (strlen(opt->key) > 0) @@ -556,6 +553,11 @@ struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts * if (matchesAttrKey) return tag; } + else if (strlen(opt->attr) > 0) + { + if (matchesAttrValue) + return tag; + } for (int i=0; i<tag->childrenLen; i++) { struct tag *foundTag = findTag(tag->children[i], list, opt); @@ -582,7 +584,6 @@ void filterHtml(char *text, struct filter_opts *opts) else { printf("result: %s\n", result->name); - printf("%ld %ld\n", result->_outerHtmlBeginOffset, result->_outerHtmlEndOffset); if (!result->_isVoidElement) { printf("innerHtml: %s\n", result->innerHtml); diff --git a/todo b/todo @@ -1,2 +1,2 @@ strip beginning and ending whitespace of inner and outer html -find element by attr value (something's still wrong) +parse html comments right