commit d847e830279219425e57ef45bde04eed13f2ad4b
parent 275940c7345d05564943a2bb14dfe0ccc306a95e
Author: Robin <kroekerrobin@gmail.com>
Date: Tue, 8 Aug 2023 22:18:02 +0200
Improve comment parsing
Diffstat:
3 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/html.c b/html.c
@@ -10,7 +10,7 @@ const char *stateToString(enum state s)
case STATE_END_TAG_NAME: return "STATE_END_TAG_NAME";
case STATE_ATTR_NAME: return "STATE_ATTR_NAME";
case STATE_ATTR_VALUE: return "STATE_ATTR_VALUE";
- case STATE_COMMENT_ETC: return "STATE_COMMENT_ETC";
+ case STATE_COMMENT: return "STATE_COMMENT";
}
}
@@ -276,6 +276,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
size_t attrNameCount = 0;
size_t attrValueCount = 0;
enum attr_value_syntax attrValueSyntax = AVS_NO;
+ size_t hyphenCount = 0;
uint_least32_t cp;
size_t len = strlen(text);
size_t ret, off;
@@ -309,7 +310,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
}
if (cp == EXCLAMATION_MARK)
{
- state = STATE_COMMENT_ETC;
+ state = STATE_COMMENT;
break;
}
stillOpenTag->children = realloc(
@@ -467,9 +468,16 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
);
}
break;
- case STATE_COMMENT_ETC:
- if (cp == GREATER_THAN_SIGN)
+ case STATE_COMMENT:
+ if (cp == GREATER_THAN_SIGN && hyphenCount == 2)
+ {
state = STATE_INNER_TEXT;
+ break;
+ }
+ if (cp == HYPHEN_MINUS)
+ hyphenCount++;
+ else
+ hyphenCount = 0;
break;
}
}
diff --git a/html.h b/html.h
@@ -13,6 +13,7 @@
#define QUOTATION_MARK 0x22
#define APOSTROPHE 0x27
#define GRAVE_ACCENT 0x60
+#define HYPHEN_MINUS 0x2D
const char *voidElements[] = {
"area", "base", "br", "col", "embed", "hr", "img",
@@ -66,7 +67,7 @@ enum state
STATE_END_TAG_NAME,
STATE_ATTR_NAME,
STATE_ATTR_VALUE,
- STATE_COMMENT_ETC
+ STATE_COMMENT
};
enum attr_value_syntax
diff --git a/todo b/todo
@@ -1,2 +1 @@
strip beginning and ending whitespace of inner and outer html
-parse html comments right