htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit a06875046fabb87acddbfb1400a904528bef7a0d
parent 27e1b9ab0d7cab9372e897a59f6c81aa3b7b2e6b
Author: Robin <kroekerrobin@gmail.com>
Date:   Wed,  9 Aug 2023 08:03:35 +0200

Print all matching tags

Diffstat:
Mhtml.c | 85++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/html.c b/html.c @@ -522,7 +522,7 @@ void printHtml(struct tag *t) } } -struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts *opt) +void findTag(struct tag *tag, struct filter_opts *opt, struct tag_list *foundTags) { bool matchesTag = false; bool matchesAttrKey = false; @@ -539,40 +539,77 @@ struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts * if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0) { if (matchesTag && matchesAttrKey && matchesAttrValue) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0) { if (matchesTag && matchesAttrKey) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } else if (strlen(opt->tag) > 0) { if (matchesTag) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0) { if (matchesAttrKey && matchesAttrValue) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } else if (strlen(opt->key) > 0) { if (matchesAttrKey) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } else if (strlen(opt->attr) > 0) { if (matchesAttrValue) - return tag; + { + foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag)); + foundTags->tags[foundTags->len] = tag; + foundTags->len++; + } } for (int i=tag->childrenLen-1; i>-1; i--) { - struct tag *foundTag = findTag(tag->children[i], list, opt); - if (foundTag != NULL) - return foundTag; + findTag(tag->children[i], opt, foundTags); + } +} + +void printResult(struct tag_list *foundTags, struct filter_opts *opts) +{ + char *trimmedOutput; + for (int i=0; i<foundTags->len; i++) + { + if (foundTags->tags[i]->_isVoidElement) + opts->out = OUT_OUTER_HTML; + if (opts->out == OUT_OUTER_HTML) + trimmedOutput = trim(foundTags->tags[i]->outerHtml); + else if (opts->out == OUT_INNER_HTML) + trimmedOutput = trim(foundTags->tags[i]->innerHtml); + printf("%s\n", trimmedOutput); + free(trimmedOutput); } - return NULL; } void filterHtml(char *text, struct filter_opts *opts) @@ -586,26 +623,12 @@ void filterHtml(char *text, struct filter_opts *opts) rootTag = parseTag(text+len, 0, STATE_INNER_TEXT, tagList); else rootTag = parseTag(text, 0, STATE_INNER_TEXT, tagList); - struct tag *result = findTag(rootTag, tagList, opts); - if (result == NULL) - printError("No tag found."); - else - { - char *trimmedOutput; - if (result->_isVoidElement) - opts->out = OUT_OUTER_HTML; - switch (opts->out) - { - case OUT_INNER_HTML: - trimmedOutput = trim(result->innerHtml); - break; - case OUT_OUTER_HTML: - trimmedOutput = trim(result->outerHtml); - break; - } - printf("%s\n", trimmedOutput); - free(trimmedOutput); - } + struct tag_list *foundTags = malloc(sizeof(struct tag_list)); + foundTags->tags = NULL; + foundTags->len = 0; + findTag(rootTag, opts, foundTags); + printResult(foundTags, opts); freeTag(rootTag); freeTagList(tagList); + freeTagList(foundTags); }