htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit 633e945a287ee1fc0b03d7bab858bbc8e7cdba09
parent 95542daf7c0bd7d289d61c34e6c2522c101e7a12
Author: Robin <kroekerrobin@gmail.com>
Date:   Sat,  5 Aug 2023 21:23:39 +0200

No memory leaks.

YES!!!!!

Diffstat:
MMakefile | 2++
Mhtex.c | 4++++
Mhtml.c | 66+++++++++++++++++++++++++++++++++++++++++-------------------------
3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/Makefile b/Makefile @@ -3,6 +3,8 @@ MANPREFIX = $(PREFIX)/share/man all: $(CC) -O -Werror -o htex htex.c -lgrapheme +debug: + $(CC) -fsanitize=address -O -Werror -o htex htex.c -lgrapheme clean: rm htex install: all diff --git a/htex.c b/htex.c @@ -101,6 +101,7 @@ int main(int argc, char *argv[]) return -1; } text = readFile(fp); + fclose(fp); if (strlen(text) == 0) { printf("No data in file.\n"); @@ -108,6 +109,9 @@ int main(int argc, char *argv[]) } } parseHtml(text); + free(tag); + free(attribute); + free(key); free(text); return 0; } diff --git a/html.c b/html.c @@ -213,6 +213,7 @@ struct tag *getLastOpenTag(struct tag_list *tagList) if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed) return tagList->tags[i]; } + return tagList->tags[0]; } struct tag *parseTag(const char *text, enum state state, struct tag_list *tagList) @@ -221,7 +222,7 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis tagList->tags = realloc(tagList->tags, (tagList->len+1) * sizeof(struct tag)); tagList->tags[tagList->len] = tag; tagList->len++; - struct tag *innerTextTag = tag; + struct tag *stillOpenTag = tag; char *endTag = malloc(sizeof(char)); endTag[0] = 0; size_t a = 0; @@ -231,7 +232,6 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis uint_least32_t cp; size_t len = strlen(text); size_t ret, off; - int n = 0; for (off = 0; off<len; off += ret) { if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off) @@ -240,9 +240,9 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis } else { - char *the_codepoint = cpToChars(cp, ret); - printf("cp: %02X, %s, %s", cp, the_codepoint, stateToString(state)); - free(the_codepoint); + // char *the_codepoint = cpToChars(cp, ret); + // printf("cp: %02X, %s, %s\n", cp, the_codepoint, stateToString(state)); + // free(the_codepoint); switch (state) { case STATE_INNER_TEXT: @@ -251,8 +251,8 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis state = STATE_TAG; break; } - innerTextTag = getLastOpenTag(tagList); - innerTextTag->innerText = stringCat(innerTextTag->innerText, cpToChars(cp, ret)); + stillOpenTag = getLastOpenTag(tagList); + stillOpenTag->innerText = stringCat(stillOpenTag->innerText, cpToChars(cp, ret)); break; case STATE_TAG: if (cp == SOLIDUS) @@ -265,9 +265,14 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis state = STATE_COMMENT_ETC; break; } - tag->children = realloc(tag->children, (tag->childrenLen+1) * sizeof(struct tag)); - tag->children[tag->childrenLen] = parseTag(text+off, STATE_BEGIN_TAG_NAME, tagList); - tag->childrenLen++; + stillOpenTag->children = realloc( + stillOpenTag->children, + (stillOpenTag->childrenLen+1) * sizeof(struct tag) + ); + struct tag *oneTag = parseTag(text+off, STATE_BEGIN_TAG_NAME, tagList); + stillOpenTag->children[stillOpenTag->childrenLen] = oneTag; + stillOpenTag->childrenLen++; + free(endTag); return tag; case STATE_BEGIN_TAG_NAME: if (cp == GREATER_THAN_SIGN) @@ -393,35 +398,44 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list *tagLis state = STATE_INNER_TEXT; break; } - printf("\n"); } } - return tag; + free(endTag); } void freeTag(struct tag *t) { - if (t->name != NULL) - free(t->name); - if (t->innerText != NULL) - free(t->innerText); + free(t->name); + free(t->innerText); for (int i=0; i<t->attrsLen; i++) { free(t->attrs[i]->name); free(t->attrs[i]->value); free(t->attrs[i]); } + free(t->attrs); for (int i=0; i<t->childrenLen; i++) - freeTag(t->children[i]); + { + if (t->children[i] != NULL) + freeTag(t->children[i]); + } + free(t->children); free(t); } -void printHtml(struct tag *tag, int i) +void freeTagList(struct tag_list *t) { - printf("%d: %s, %s\n", i, tag->name, tag->innerText); - // printf("%*s\n", width + strlen(tag->name), tag->name); - for (int i=0; i<tag->childrenLen; i++) - printHtml(tag->children[i], i); + free(t->tags); + free(t); +} + +void printHtml(struct tag *t) +{ + printf("name: %s\n", t->name); + for (int i=0; i<t->childrenLen; i++) + { + printHtml(t->children[i]); + } } void parseHtml(const char *text) @@ -435,8 +449,10 @@ void parseHtml(const char *text) rootTag = parseTag(text+len, STATE_INNER_TEXT, tagList); else rootTag = parseTag(text, STATE_INNER_TEXT, tagList); - printHtml(rootTag, 0); + printHtml(rootTag); + /* printf("%s\n", rootTag->children[0]->children[0]->name); + printf("%s\n", rootTag->children[0]->children[0]->children[1]->name); + printf("%s\n", rootTag->children[0]->children[0]->children[1]->attrs[0]->value); */ freeTag(rootTag); - free(tagList->tags); - free(tagList); + freeTagList(tagList); }