htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit 27e1b9ab0d7cab9372e897a59f6c81aa3b7b2e6b
parent d847e830279219425e57ef45bde04eed13f2ad4b
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue,  8 Aug 2023 22:33:15 +0200

Trim text before outputting

Diffstat:
Mhtex.c | 8++------
Mhtml.c | 18+++++++++++++-----
Mhtml.h | 16+++++++---------
Mlib.c | 46++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/htex.c b/htex.c @@ -45,8 +45,7 @@ char *readFile(FILE *fp) struct filter_opts *parseFilterOpts(const char *pattern) { struct filter_opts *opt = malloc(sizeof(struct filter_opts)); - opt->outerHtml = true; - opt->innerHtml = false; + opt->out = OUT_OUTER_HTML; opt->tag = malloc(sizeof(char)); opt->tag[0] = 0; opt->attr = malloc(sizeof(char)); @@ -224,10 +223,7 @@ int main(int argc, char *argv[]) } struct filter_opts *options = parseFilterOpts(searchPattern); if (isInnerHtml) - { - options->innerHtml = true; - options->outerHtml = false; - } + options->out = OUT_INNER_HTML; filterHtml(text, options); freeOpts(options); free(text); diff --git a/html.c b/html.c @@ -566,7 +566,7 @@ struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts * if (matchesAttrValue) return tag; } - for (int i=0; i<tag->childrenLen; i++) + for (int i=tag->childrenLen-1; i>-1; i--) { struct tag *foundTag = findTag(tag->children[i], list, opt); if (foundTag != NULL) @@ -591,12 +591,20 @@ void filterHtml(char *text, struct filter_opts *opts) printError("No tag found."); else { - printf("result: %s\n", result->name); - if (!result->_isVoidElement) + char *trimmedOutput; + if (result->_isVoidElement) + opts->out = OUT_OUTER_HTML; + switch (opts->out) { - printf("innerHtml: %s\n", result->innerHtml); + case OUT_INNER_HTML: + trimmedOutput = trim(result->innerHtml); + break; + case OUT_OUTER_HTML: + trimmedOutput = trim(result->outerHtml); + break; } - printf("outerHtml: %s\n", result->outerHtml); + printf("%s\n", trimmedOutput); + free(trimmedOutput); } freeTag(rootTag); freeTagList(tagList); diff --git a/html.h b/html.h @@ -20,13 +20,18 @@ const char *voidElements[] = { "input", "link", "meta", "source", "track", "wbr" }; +enum output_type +{ + OUT_INNER_HTML, + OUT_OUTER_HTML +}; + struct filter_opts { char *tag; char *attr; char *key; - bool innerHtml; - bool outerHtml; + enum output_type out; }; struct attr @@ -77,10 +82,3 @@ enum attr_value_syntax AVS_APOSTROPHE, AVS_UNQUOTED }; - -/* enum search_type -{ - ST_NO, - ST_LIST, - ST_HIERARCHY -}; */ diff --git a/lib.c b/lib.c @@ -30,3 +30,49 @@ char *cpToChars(uint_least32_t cp, size_t len) str[len] = 0; return str; } + +char *trim(char *text) +{ + char *trimmedText = NULL; + int begin = 0; + int end = 0; + for (int i=0; i<strlen(text); i++) + { + if + ( + text[i] == ' ' || + text[i] == '\n' || + text[i] == '\t' || + text[i] == '\r' + ) + begin++; + else + break; + } + for (int i=strlen(text)-1; i>=0; i--) + { + if + ( + text[i] == ' '|| + text[i] == '\n' || + text[i] == '\t' || + text[i] == '\r' + ) + end++; + else + break; + } + int k = 0; + for (int i=0; i<strlen(text); i++) + { + if (i >= begin && i < strlen(text) - end) + { + trimmedText = realloc(trimmedText, (k+1) * sizeof(char)); + trimmedText[k] = text[i]; + k++; + } + } + trimmedText = realloc(trimmedText, (k+1) * sizeof(char)); + trimmedText[k] = 0; + return trimmedText; +}