htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit f527a413ca2f869e114bee4d0b3db7f55dbfaf21
parent 3bb5b904098dd95d37006dfef9bc9316cab2dc6d
Author: Robin <kroekerrobin@gmail.com>
Date:   Sat, 12 Aug 2023 11:23:19 +0200

Refactor

Diffstat:
Mhtml.c | 51+++++++++++++++++----------------------------------
1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/html.c b/html.c @@ -275,6 +275,20 @@ void setInnerHtmlEndOffset(struct tag *closedTag, char *text, size_t off) closedTag->_innerHtmlEndOffset = i; } +enum state endOfBeginTag(struct tag *t, size_t offset) +{ + t->_innerHtmlBeginOffset = offset+1; + t->_isVoidElement = isVoidElement(t->name); + if (t->_isVoidElement) + t->_outerHtmlEndOffset = offset+1; + if (strcmp(t->name, "script") == 0) + return STATE_SCRIPT; + else if (strcmp(t->name, "style") == 0) + return STATE_STYLE; + else + return STATE_INNER_TEXT; +} + struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_list *tagList) { struct tag *tag = initTag(); @@ -336,16 +350,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis case STATE_BEGIN_TAG_NAME: if (cp == GREATER_THAN_SIGN) { - tag->_innerHtmlBeginOffset = off+1; - tag->_isVoidElement = isVoidElement(tag->name); - if (tag->_isVoidElement) - tag->_outerHtmlEndOffset = off+1; - if (strcmp(tag->name, "script") == 0) - state = STATE_SCRIPT; - else if (strcmp(tag->name, "style") == 0) - state = STATE_STYLE; - else - state = STATE_INNER_TEXT; + state = endOfBeginTag(tag, off); break; } if (isASCIIWhitespace(cp)) @@ -375,18 +380,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis case STATE_ATTR_NAME: if (cp == GREATER_THAN_SIGN) { - tag->_innerHtmlBeginOffset = off+1; - tag->_isVoidElement = isVoidElement(tag->name); - if (tag->_isVoidElement) - { - tag->_outerHtmlEndOffset = off+1; - } - if (strcmp(tag->name, "script") == 0) - state = STATE_SCRIPT; - else if (strcmp(tag->name, "style") == 0) - state = STATE_STYLE; - else - state = STATE_INNER_TEXT; + state = endOfBeginTag(tag, off); break; } if (isASCIIWhitespace(cp)) @@ -458,18 +452,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis } if (cp == GREATER_THAN_SIGN) { - tag->_innerHtmlBeginOffset = off+1; - tag->_isVoidElement = isVoidElement(tag->name); - if (tag->_isVoidElement) - { - tag->_outerHtmlEndOffset = off+1; - } - if (strcmp(tag->name, "script") == 0) - state = STATE_SCRIPT; - else if (strcmp(tag->name, "style") == 0) - state = STATE_STYLE; - else - state = STATE_INNER_TEXT; + state = endOfBeginTag(tag, off); break; } if (