commit 95542daf7c0bd7d289d61c34e6c2522c101e7a12
parent 8b7678b3f56e1b9d1a4fdc58b1d0635bfba473fe
Author: Robin <kroekerrobin@gmail.com>
Date: Thu, 3 Aug 2023 22:47:22 +0200
Parse innerText correctly
Diffstat:
| M | html.c | | | 59 | ++++++++++++++++++++++++++++++++++++++++++++++------------- |
1 file changed, 46 insertions(+), 13 deletions(-)
diff --git a/html.c b/html.c
@@ -197,18 +197,33 @@ size_t parseDOCTYPE(const char *text)
return 0;
}
-struct tag *parseTag(const char *text, enum state state, struct tag_list **tagList)
+void closeLastUnclosedTag(struct tag_list *tagList, const char *endTag)
{
- struct tag *tag = initTag();
- (*tagList)->tags = realloc((*tagList)->tags, ((*tagList)->len+1) * sizeof(struct tag));
- (*tagList)->tags[(*tagList)->len] = tag;
- (*tagList)->len++;
- /* printf("tagList: ");
- for (int i=0; i<tagList->len; i++)
+ for (int i=tagList->len-1; i>-1; i--)
+ {
+ if (strcmp(tagList->tags[i]->name, endTag) == 0)
+ tagList->tags[i]->_isClosed = true;
+ }
+}
+
+struct tag *getLastOpenTag(struct tag_list *tagList)
+{
+ for (int i=tagList->len-1; i>-1; i--)
{
- printf("%02X, ", tagList->tags[i]);
+ if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed)
+ return tagList->tags[i];
}
- printf("\n"); */
+}
+
+struct tag *parseTag(const char *text, enum state state, struct tag_list *tagList)
+{
+ struct tag *tag = initTag();
+ tagList->tags = realloc(tagList->tags, (tagList->len+1) * sizeof(struct tag));
+ tagList->tags[tagList->len] = tag;
+ tagList->len++;
+ struct tag *innerTextTag = tag;
+ char *endTag = malloc(sizeof(char));
+ endTag[0] = 0;
size_t a = 0;
size_t attrNameCount = 0;
size_t attrValueCount = 0;
@@ -236,7 +251,8 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list **tagLi
state = STATE_TAG;
break;
}
- tag->innerText = stringCat(tag->innerText, cpToChars(cp, ret));
+ innerTextTag = getLastOpenTag(tagList);
+ innerTextTag->innerText = stringCat(innerTextTag->innerText, cpToChars(cp, ret));
break;
case STATE_TAG:
if (cp == SOLIDUS)
@@ -272,7 +288,16 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list **tagLi
break;
case STATE_END_TAG_NAME:
if (cp == GREATER_THAN_SIGN)
+ {
+ closeLastUnclosedTag(tagList, endTag);
+ free(endTag);
+ endTag = malloc(sizeof(char));
+ endTag[0] = 0;
state = STATE_INNER_TEXT;
+ break;
+ }
+ if (!isASCIIWhitespace(cp))
+ endTag = stringCat(endTag, cpToChars(cp, ret));
break;
case STATE_ATTR_NAME:
if (cp == GREATER_THAN_SIGN)
@@ -376,7 +401,6 @@ struct tag *parseTag(const char *text, enum state state, struct tag_list **tagLi
void freeTag(struct tag *t)
{
- printf("freeing a tag...\n");
if (t->name != NULL)
free(t->name);
if (t->innerText != NULL)
@@ -392,6 +416,14 @@ void freeTag(struct tag *t)
free(t);
}
+void printHtml(struct tag *tag, int i)
+{
+ printf("%d: %s, %s\n", i, tag->name, tag->innerText);
+ // printf("%*s\n", width + strlen(tag->name), tag->name);
+ for (int i=0; i<tag->childrenLen; i++)
+ printHtml(tag->children[i], i);
+}
+
void parseHtml(const char *text)
{
struct tag *rootTag;
@@ -400,9 +432,10 @@ void parseHtml(const char *text)
tagList->len = 0;
size_t len = parseDOCTYPE(text);
if (len)
- rootTag = parseTag(text+len, STATE_INNER_TEXT, &tagList);
+ rootTag = parseTag(text+len, STATE_INNER_TEXT, tagList);
else
- rootTag = parseTag(text, STATE_INNER_TEXT, &tagList);
+ rootTag = parseTag(text, STATE_INNER_TEXT, tagList);
+ printHtml(rootTag, 0);
freeTag(rootTag);
free(tagList->tags);
free(tagList);