Change curly brace style - htex - simple incorrect html parser

commit 468e6772f71842a17b2013db6f92f8ca908d6648
parent 29cc7cac94d871dc422a3e2b42c3138f359bb275
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue,  2 Apr 2024 22:28:00 +0200

Change curly brace style

Diffstat:
M htex.c  | 59 ++++++++++++++++++++---------------------------------------
M html.c  | 331 ++++++++++++++++++++++++++-----------------------------------------------------
M html.h  | 54 ++++++++++++++++++++++--------------------------------
M misc.c  | 27 +++++++++------------------

4 files changed, 159 insertions(+), 312 deletions(-)
diff --git a/htex.c b/htex.c
@@ -27,8 +27,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
 	int aot = 0;
 	int ak = 0;
 	int av = 0;
-	switch (pattern[0])
-	{
+	switch (pattern[0]) {
 		case '.':
 			isClassValue = true;
 			i = 1;
@@ -38,36 +37,31 @@ struct find_opts *parseFilterOpts(const char *pattern)
 			i = 1;
 			break;
 	}
-	for (; i<strlen(pattern); i++)
-	{
+	for (; i<strlen(pattern); i++) {
 		if (pattern[i] == ']')
 			break;
 		if (
-			!isAttrKey &&
+		    !isAttrKey &&
 			!isAttrOrTag &&
 			pattern[i] != ']' &&
 			pattern[i] != '"'
-		)
-		{
+		) {
 			opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
 			opt->attr[av] = pattern[i];
 			av++;
 		}
 		if (pattern[i] == '=')
 			isAttrKey = false;
-		if (isAttrKey && !isAttrOrTag)
-		{
+		if (isAttrKey && !isAttrOrTag) {
 			opt->key = realloc(opt->key, (ak+1) * sizeof(char));
 			opt->key[ak] = pattern[i];
 			ak++;
 		}
-		if (pattern[i] == '[')
-		{
+		if (pattern[i] == '[') {
 			isAttrKey = true;
 			isAttrOrTag = false;
 		}
-		if (isAttrOrTag)
-		{
+		if (isAttrOrTag) {
 			attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
 			attrOrTag[aot] = pattern[i];
 			aot++;
@@ -75,8 +69,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
 	}
 	attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
 	attrOrTag[aot] = 0;
-	if (isIdValue)
-	{
+	if (isIdValue) {
 		free(opt->key);
 		opt->key = NULL;
 		free(opt->attr);
@@ -87,8 +80,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
 		opt->key[1] = 'd';
 		opt->key[2] = 0;
 	}
-	else if (isClassValue)
-	{
+	else if (isClassValue) {
 		free(opt->key);
 		opt->key = NULL;
 		free(opt->attr);
@@ -102,17 +94,14 @@ struct find_opts *parseFilterOpts(const char *pattern)
 		opt->key[4] = 's';
 		opt->key[5] = 0;
 	}
-	else
-	{
+	else {
 		free(opt->tag);
 		opt->tag = attrOrTag;
-		if (av > 0)
-		{
+		if (av > 0) {
 			opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
 			opt->attr[av] = 0;
 		}
-		if (ak > 0)
-		{
+		if (ak > 0) {
 			opt->key = realloc(opt->key, (ak+1) * sizeof(char));
 			opt->key[ak] = 0;
 		}
@@ -173,47 +162,39 @@ int main(int argc, char *argv[])
 		}
 	}
 	enum output_type out = parseOutputArg(output);
-	if (out == -1)
-	{
+	if (out == -1) {
 		fprintf(stderr, "Provide a valid output type!\n");
 		free(output);
 		return -1;
 	}
-	if (limit == 0)
-	{
+	if (limit == 0) {
 		fprintf(stderr, "Provide a valid limit value.\n");
 		free(output);
 		return -1;
 	}
-	if (argc == optind)
-	{
+	if (argc == optind) {
 		fprintf(stderr, "Provide a search pattern!\n");
 		return -1;
 	}
-	if (argc > optind+2)
-	{
+	if (argc > optind+2) {
 		fprintf(stderr, "Provide only one file!\n");
 		return -1;
 	}
-	if (argc == optind+1)
-	{
+	if (argc == optind+1) {
 		searchPattern = argv[argc-1];
 		text = readFile(stdin);
 	}
-	else if (argc == optind+2)
-	{
+	else if (argc == optind+2) {
 		searchPattern = argv[argc-2];
 		char *filepath = argv[argc-1];
 		FILE *fp = fopen(filepath, "r");
-		if (fp == NULL)
-		{
+		if (fp == NULL) {
 			perror("fopen failed: ");
 			return -1;
 		}
 		text = readFile(fp);
 		fclose(fp);
-		if (strlen(text) == 0)
-		{
+		if (strlen(text) == 0) {
 			fprintf(stderr, "No data in file.\n");
 			free(output);
 			free(text);
diff --git a/html.c b/html.c
@@ -2,8 +2,7 @@
 
 const char *stateToString(enum state s)
 {
-	switch(s)
-	{
+	switch(s) {
 		case STATE_INNER_TEXT: return "STATE_INNER_TEXT";
 		case STATE_TAG: return "STATE_TAG";
 		case STATE_BEGIN_TAG_NAME: return "STATE_BEGIN_TAG_NAME";
@@ -104,8 +103,7 @@ static inline bool isASCIIWhitespace(uint_least32_t cp)
 
 static inline bool isVoidElement(const char *tagName)
 {
-	for (int i=0; i<13; i++)
-	{
+	for (int i=0; i<13; i++) {
 		if (strcmp(tagName, voidElements[i]) == 0)
 			return true;
 	}
@@ -199,24 +197,18 @@ size_t parseDoctype(const char *text)
 	uint_least32_t cp;
 	size_t len = strlen(text);
 	size_t ret, off;
-	for (off = 0; off<len; off += ret)
-	{
-		if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off)
-		{
+	for (off = 0; off<len; off += ret) {
+		if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off) {
 			printError("Something wrong with ending of text");
 		}
-		else
-		{
-			switch (state)
-			{
+		else {
+			switch (state) {
 				case DSTATE_TEXT:
-					if (cp == LESS_THAN_SIGN)
-					{
+					if (cp == LESS_THAN_SIGN) {
 						state = DSTATE_POSSIBLE_DTYPE;
 						break;
 					}
-					if (cp == GREATER_THAN_SIGN)
-					{
+					if (cp == GREATER_THAN_SIGN) {
 						offset = off;
 						goto CLEANUP;
 					}
@@ -228,25 +220,24 @@ size_t parseDoctype(const char *text)
 						goto CLEANUP;
 					break;
 				case DSTATE_DTYPE_OR_COMMENT:
-					if (cp == HYPHEN_MINUS)
+					if (cp == HYPHEN_MINUS) {
 						goto CLEANUP;
-					else
-					{
+                    }
+					else {
 						doctype = stringCat(doctype, cpToChars(cp, ret));
 						state = DSTATE_DTYPE;
 						break;
 					}
 					break;
 				case DSTATE_DTYPE:
-					if (isASCIIWhitespace(cp))
-					{
+					if (isASCIIWhitespace(cp)) {
 						size_t dlen = strlen(doctype)+1;
 						lowerDoctype = malloc(dlen * sizeof(char));
 						grapheme_to_lowercase_utf8(doctype, dlen, lowerDoctype, dlen);
-						if (strcmp(lowerDoctype, "doctype") == 0)
+						if (strcmp(lowerDoctype, "doctype") == 0) {
 							state = DSTATE_TEXT;
-						else
-						{
+                        }
+						else {
 							offset = -1;
 							goto CLEANUP;
 						}
@@ -265,10 +256,8 @@ CLEANUP:
 
 struct tag *closeLastUnclosedTag(struct tag_list *tagList, const char *endTag, size_t endOffset)
 {
-	for (int i=tagList->len-1; i>-1; i--)
-	{
-		if (strcmp(tagList->tags[i]->name, endTag) == 0 && !tagList->tags[i]->_isClosed)
-		{
+	for (int i=tagList->len-1; i>-1; i--) {
+		if (strcmp(tagList->tags[i]->name, endTag) == 0 && !tagList->tags[i]->_isClosed) {
 			tagList->tags[i]->_isClosed = true;
 			tagList->tags[i]->_outerHtmlEndOffset = endOffset;
 			return tagList->tags[i];
@@ -279,10 +268,8 @@ struct tag *closeLastUnclosedTag(struct tag_list *tagList, const char *endTag, s
 
 struct tag *getLastOpenTag(struct tag_list *tagList)
 {
-	for (int i=tagList->len-1; i>-1; i--)
-	{
-		if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed)
-		{
+	for (int i=tagList->len-1; i>-1; i--) {
+		if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed) {
 			return tagList->tags[i];
 		}
 	}
@@ -293,8 +280,7 @@ char *getOuterHtml(char *text, struct tag *t)
 {
 	char *outerHtml = NULL;
 	int o = 0;
-	for (int i=t->_outerHtmlBeginOffset; i<t->_outerHtmlEndOffset; i++)
-	{
+	for (int i=t->_outerHtmlBeginOffset; i<t->_outerHtmlEndOffset; i++) {
 		outerHtml = realloc(outerHtml, (o+1) * sizeof(char));
 		outerHtml[o] = text[i];
 		o++;
@@ -308,8 +294,7 @@ char *getInnerHtml(char *text, struct tag *t)
 {
 	char *innerHtml = NULL;
 	int o = 0;
-	for (int i=t->_innerHtmlBeginOffset; i<t->_innerHtmlEndOffset; i++)
-	{
+	for (int i=t->_innerHtmlBeginOffset; i<t->_innerHtmlEndOffset; i++) {
 		innerHtml = realloc(innerHtml, (o+1) * sizeof(char));
 		innerHtml[o] = text[i];
 		o++;
@@ -337,8 +322,7 @@ enum state endOfBeginTag(struct tag *t, size_t offset)
 		return STATE_SCRIPT;
 	else if (strcmp(t->name, "style") == 0)
 		return STATE_STYLE;
-	else
-		return STATE_INNER_TEXT;
+    return STATE_INNER_TEXT;
 }
 
 char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
@@ -353,8 +337,7 @@ char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
         ret = grapheme_decode_utf8(text+off, strlen(text+off), &cp);
         numericCharRef = stringCat(numericCharRef, cpToChars(cp, ret));
         off += ret;
-    }
-    while (cp != SEMICOLON);
+    } while (cp != SEMICOLON);
     *newOffset = off - oldOffset;
     long i = strtol(numericCharRef, NULL, base);
     ret = grapheme_encode_utf8((uint_least32_t)i, character, MAX_CODEPOINT_SIZE);
@@ -366,8 +349,7 @@ char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
 char *parseNamedCharRef(char *text, size_t off, size_t len, enum attr_value_syntax avs)
 {
     uint_least32_t stopAt = 0;
-    switch(avs)
-    {
+    switch(avs) {
         case AVS_QUOTATION_MARK:
             stopAt = QUOTATION_MARK;
             break;
@@ -385,8 +367,7 @@ char *parseNamedCharRef(char *text, size_t off, size_t len, enum attr_value_synt
     size_t ret;
     uint_least32_t cp;
     int i = 0;
-    for (;;)
-    {
+    for (;;) {
         ret = grapheme_decode_utf8(text+off, strlen(text+off), &cp);
         if (cp == AMPERSAND || isASCIIWhitespace(cp))
             break;
@@ -407,22 +388,18 @@ char *encodeNamedCharRef(const char *name)
     char cp[MAX_CODEPOINT_SIZE];
     memset(&cp, 0, MAX_CODEPOINT_SIZE);
     size_t len;
-    for (int i=0; i<NAMED_CHAR_REF_COUNT; i++)
-    {
-        if (startsWith(name, entities[i].name))
-        {
+    for (int i=0; i<NAMED_CHAR_REF_COUNT; i++) {
+        if (startsWith(name, entities[i].name)) {
             len = grapheme_encode_utf8(entities[i].cp[0], cp, MAX_CODEPOINT_SIZE);
             strcpy(buf, cp);
-            if (entities[i].cp[1] != 0)
-            {
+            if (entities[i].cp[1] != 0) {
                 len += grapheme_encode_utf8(entities[i].cp[1], cp, MAX_CODEPOINT_SIZE);
                 strcat(buf, cp);
             }
             buf[len] = 0;
             const char *part = &name[strlen(entities[i].name)];
             size_t partLen = strlen(part);
-            if (partLen > 0)
-            {
+            if (partLen > 0) {
                 if (partLen == 1 && part[0] == ';')
                 return buf;
                 buf = realloc(buf, 2*MAX_CODEPOINT_SIZE+1+partLen);
@@ -457,27 +434,20 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
     uint_least32_t cp;
     size_t len = strlen(text);
     size_t ret, off;
-    for (off = offset; off<len; off += ret)
-    {
-        if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off)
-        {
-            printError("Something wrong with ending of text");
-        }
-        else
-        {
+    for (off = offset; off<len; off += ret) {
+        if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off) {
+            fprintf(stderr, "parseTag.grapheme_decode_utf8 failed.\n");
+        } else {
             // char *the_codepoint = cpToChars(cp, ret);
             // printf("cp: %02X, %s, %s\n", cp, the_codepoint, stateToString(state));
             // free(the_codepoint);
-            switch (state)
-            {
+            switch (state) {
                 case STATE_INNER_TEXT:
-                    if (cp == LESS_THAN_SIGN)
-                    {
+                    if (cp == LESS_THAN_SIGN) {
                         state = STATE_TAG;
                         break;
                     }
-                    if (cp == AMPERSAND)
-                    {
+                    if (cp == AMPERSAND) {
                         returnToState = STATE_INNER_TEXT;
                         state = STATE_CHAR_REF;
                         break;
@@ -486,13 +456,11 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                     stillOpenTag->innerText = stringCat(stillOpenTag->innerText, cpToChars(cp, ret));
                     break;
                 case STATE_TAG:
-                    if (cp == SOLIDUS)
-                    {
+                    if (cp == SOLIDUS) {
                         state = STATE_END_TAG_NAME;
                         break;
                     }
-                    if (cp == EXCLAMATION_MARK)
-                    {
+                    if (cp == EXCLAMATION_MARK) {
                         state = STATE_COMMENT;
                         break;
                     }
@@ -507,24 +475,20 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                     free(endTag);
                     return tag;
                 case STATE_BEGIN_TAG_NAME:
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         state = endOfBeginTag(tag, off);
                         break;
                     }
-                    if (isASCIIWhitespace(cp))
-                    {
+                    if (isASCIIWhitespace(cp)) {
                         state = STATE_ATTR_NAME;
                         break;
                     }
-                    if (isASCIIDigit(cp) || isASCIIAlpha(cp))
-                    {
+                    if (isASCIIDigit(cp) || isASCIIAlpha(cp)) {
                         tag->name = stringCat(tag->name, cpToChars(cp, ret));
                     }
                     break;
                 case STATE_END_TAG_NAME:
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
                         if (closedTag != NULL)
                             setInnerHtmlEndOffset(closedTag, text, off);
@@ -538,26 +502,21 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         endTag = stringCat(endTag, cpToChars(cp, ret));
                     break;
                 case STATE_ATTR_NAME:
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         state = endOfBeginTag(tag, off);
                         break;
                     }
-                    if (isASCIIWhitespace(cp))
-                    {
+                    if (isASCIIWhitespace(cp)) {
                         if (attrNameCount == a+1)
                             a++;
                         break;
                     }
-                    if (cp == EQUALS_SIGN)
-                    {
+                    if (cp == EQUALS_SIGN) {
                         state = STATE_ATTR_VALUE;
                         break;
                     }
-                    if (isValidAttrName(cp))
-                    {
-                        if (attrNameCount != a+1)
-                        {
+                    if (isValidAttrName(cp)) {
+                        if (attrNameCount != a+1) {
                             tag->attrs = realloc(
                                 tag->attrs,
                                 (a+1) * sizeof(struct attr)
@@ -573,20 +532,15 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                     }
                     break;
                 case STATE_ATTR_VALUE:
-                    if (isASCIIWhitespace(cp))
-                    {
-                        if (attrValueSyntax == AVS_UNQUOTED)
-                        {
+                    if (isASCIIWhitespace(cp)) {
+                        if (attrValueSyntax == AVS_UNQUOTED) {
                             attrValueSyntax = AVS_NO;
                             state = STATE_ATTR_NAME;
-                        }
-                        else if (attrValueSyntax == AVS_QUOTATION_MARK || attrValueSyntax == AVS_APOSTROPHE)
-                        {
+                        } else if (attrValueSyntax == AVS_QUOTATION_MARK || attrValueSyntax == AVS_APOSTROPHE) {
                             if (
                                 strcmp("id", tag->attrs[a]->name) == 0 ||
                                 strcmp("class", tag->attrs[a]->name) == 0
-                            )
-                            {
+                            ) {
                                 char *tmpName = malloc((strlen(tag->attrs[a]->name)+1) * sizeof(char));
                                 strcpy(tmpName, tag->attrs[a]->name);
                                 tag->attrs = realloc(
@@ -599,9 +553,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                                 tag->attrs[a]->name = tmpName;
                                 tag->attrsLen++;
                                 attrNameCount = a + 1;
-                            }
-                            else
-                            {
+                            } else {
                                 tag->attrs[a]->value = stringCat(
                                     tag->attrs[a]->value,
                                     cpToChars(cp, ret)
@@ -610,50 +562,40 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         }
                         break;
                     }
-                    if (cp == QUOTATION_MARK)
-                    {
-                        if (attrValueSyntax == AVS_NO)
-                        {
+                    if (cp == QUOTATION_MARK) {
+                        if (attrValueSyntax == AVS_NO) {
                             attrValueSyntax = AVS_QUOTATION_MARK;
                             break;
                         }
-                        if (attrValueSyntax == AVS_QUOTATION_MARK)
-                        {
+                        if (attrValueSyntax == AVS_QUOTATION_MARK) {
                             attrValueSyntax = AVS_NO;
                             state = STATE_ATTR_NAME;
                             break;
                         }
                     }
-                    if (cp == APOSTROPHE)
-                    {
-                        if (attrValueSyntax == AVS_NO)
-                        {
+                    if (cp == APOSTROPHE) {
+                        if (attrValueSyntax == AVS_NO) {
                             attrValueSyntax = AVS_APOSTROPHE;
                             break;
                         }
-                        if (attrValueSyntax == AVS_APOSTROPHE)
-                        {
+                        if (attrValueSyntax == AVS_APOSTROPHE) {
                             attrValueSyntax = AVS_NO;
                             state = STATE_ATTR_NAME;
                             break;
                         }
                     }
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         state = endOfBeginTag(tag, off);
                         break;
                     }
                     if (
                         attrValueSyntax == AVS_NO &&
                         isValidUnquotedAttrValue(cp)
-                    )
-                    {
+                    ) {
                         attrValueSyntax = AVS_UNQUOTED;
                     }
-                    if (attrValueSyntax > AVS_NO)
-                    {
-                        if (cp == AMPERSAND)
-                        {
+                    if (attrValueSyntax > AVS_NO) {
+                        if (cp == AMPERSAND) {
                             state = STATE_CHAR_REF;
                             returnToState = STATE_ATTR_VALUE;
                             break;
@@ -665,8 +607,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                     }
                     break;
                 case STATE_COMMENT:
-                    if (cp == GREATER_THAN_SIGN && hyphenCount >= 2)
-                    {
+                    if (cp == GREATER_THAN_SIGN && hyphenCount >= 2) {
                         state = STATE_INNER_TEXT;
                         break;
                     }
@@ -676,8 +617,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         hyphenCount = 0;
                     break;
                 case STATE_STYLE:
-                    if (cp == LESS_THAN_SIGN)
-                    {
+                    if (cp == LESS_THAN_SIGN) {
                         state = STATE_STYLE_POSSIBLE_END_TAG;
                         break;
                     }
@@ -689,8 +629,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         state = STATE_STYLE;
                     break;
                 case STATE_STYLE_END_TAG:
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
                         if (closedTag != NULL)
                             setInnerHtmlEndOffset(closedTag, text, off);
@@ -704,8 +643,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         endTag = stringCat(endTag, cpToChars(cp, ret));
                     break;
                 case STATE_SCRIPT:
-                    if (cp == LESS_THAN_SIGN)
-                    {
+                    if (cp == LESS_THAN_SIGN) {
                         state = STATE_SCRIPT_POSSIBLE_END_TAG;
                         break;
                     }
@@ -717,8 +655,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         state = STATE_SCRIPT;
                     break;
                 case STATE_SCRIPT_END_TAG:
-                    if (cp == GREATER_THAN_SIGN)
-                    {
+                    if (cp == GREATER_THAN_SIGN) {
                         struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
                         if (closedTag != NULL)
                             setInnerHtmlEndOffset(closedTag, text, off);
@@ -732,21 +669,17 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         endTag = stringCat(endTag, cpToChars(cp, ret));
                     break;
                 case STATE_CHAR_REF:
-                    if (cp == NUMBER_SIGN) // hashtag
-                    {
+                    if (cp == NUMBER_SIGN) { /* hashtag */
                         state = STATE_CHAR_REF_NUMERIC;
                         break;
                     }
                     char *namedCharRef = parseNamedCharRef(text, off, len, attrValueSyntax);
                     off += strlen(namedCharRef)-1;
                     char *encodedNamedCharRef = encodeNamedCharRef(namedCharRef);
-                    if (returnToState == STATE_INNER_TEXT)
-                    {
+                    if (returnToState == STATE_INNER_TEXT) {
                         stillOpenTag = getLastOpenTag(tagList);
                         stillOpenTag->innerText = stringCat(stillOpenTag->innerText, encodedNamedCharRef);
-                    }
-                    else if (returnToState == STATE_ATTR_VALUE)
-                    {
+                    } else if (returnToState == STATE_ATTR_VALUE) {
                         tag->attrs[a]->value = stringCat(
                             tag->attrs[a]->value,
                             encodedNamedCharRef
@@ -756,18 +689,14 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                     state = returnToState;
                     break;
                 case STATE_CHAR_REF_NUMERIC:
-                    if (cp == SMALL_LETTER_X || cp == CAPITAL_LETTER_X)
-                    {
+                    if (cp == SMALL_LETTER_X || cp == CAPITAL_LETTER_X) {
                         size_t newOffset;
                         char *numericCharRef = parseNumericCharRef(text, off+1, 16, &newOffset);
                         off += newOffset;
-                        if (returnToState == STATE_INNER_TEXT)
-                        {
+                        if (returnToState == STATE_INNER_TEXT) {
                             stillOpenTag = getLastOpenTag(tagList);
                             stillOpenTag->innerText = stringCat(stillOpenTag->innerText, numericCharRef);
-                        }
-                        else if (returnToState == STATE_ATTR_VALUE)
-                        {
+                        } else if (returnToState == STATE_ATTR_VALUE) {
                             tag->attrs[a]->value = stringCat(
                                 tag->attrs[a]->value,
                                 numericCharRef
@@ -775,19 +704,14 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
                         }
                         state = returnToState;
                         break;
-                    }
-                    else if (isASCIIDigit(cp))
-                    {
+                    } else if (isASCIIDigit(cp)) {
                         size_t newOffset;
                         char *numericCharRef = parseNumericCharRef(text, off, 10, &newOffset);
                         off += newOffset-1;
-                        if (returnToState == STATE_INNER_TEXT)
-                        {
+                        if (returnToState == STATE_INNER_TEXT) {
                             stillOpenTag = getLastOpenTag(tagList);
                             stillOpenTag->innerText = stringCat(stillOpenTag->innerText, numericCharRef);
-                        }
-                        else if (returnToState == STATE_ATTR_VALUE)
-                        {
+                        } else if (returnToState == STATE_ATTR_VALUE) {
                             tag->attrs[a]->value = stringCat(
                                 tag->attrs[a]->value,
                                 numericCharRef
@@ -809,15 +733,13 @@ void freeTag(struct tag *t)
 {
     free(t->name);
 	free(t->innerText);
-	for (int i=0; i<t->attrsLen; i++)
-	{
+	for (int i=0; i<t->attrsLen; i++) {
 		free(t->attrs[i]->name);
 		free(t->attrs[i]->value);
 		free(t->attrs[i]);
 	}
 	free(t->attrs);
-	for (int i=0; i<t->childrenLen; i++)
-	{
+	for (int i=0; i<t->childrenLen; i++) {
 		if (t->children[i] != NULL)
 			freeTag(t->children[i]);
 	}
@@ -840,69 +762,50 @@ void findTag(struct tag *tag, struct find_opts *opt, struct tag_list *foundTags)
 	bool matchesAttrValue = false;
 	if (strcmp(tag->name, opt->tag) == 0)
 		matchesTag = true;
-	for (int i=0; i<tag->attrsLen; i++)
-	{
+	for (int i=0; i<tag->attrsLen; i++) {
 		if (strcmp(tag->attrs[i]->name, opt->key) == 0)
 			matchesAttrKey = true;
 		if (strcmp(tag->attrs[i]->value, opt->attr) == 0)
 			matchesAttrValue = true;
 	}
-	if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0)
-	{
-		if (matchesTag && matchesAttrKey && matchesAttrValue)
-		{
+	if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0) {
+		if (matchesTag && matchesAttrKey && matchesAttrValue) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
-	}
-	else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0)
-	{
-		if (matchesTag && matchesAttrKey)
-		{
+	} else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0) {
+		if (matchesTag && matchesAttrKey) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
-	}
-	else if (strlen(opt->tag) > 0)
-	{
-		if (matchesTag)
-		{
+	} else if (strlen(opt->tag) > 0) {
+		if (matchesTag) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
-	}
-	else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0)
-	{
-		if (matchesAttrKey && matchesAttrValue)
-		{
+	} else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0) {
+		if (matchesAttrKey && matchesAttrValue) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
-	}
-	else if (strlen(opt->key) > 0)
-	{
-		if (matchesAttrKey)
-		{
+	} else if (strlen(opt->key) > 0) {
+		if (matchesAttrKey) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
-	}
-	else if (strlen(opt->attr) > 0)
-	{
-		if (matchesAttrValue)
-		{
+	} else if (strlen(opt->attr) > 0) {
+		if (matchesAttrValue) {
 			foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
 			foundTags->tags[foundTags->len] = tag;
 			foundTags->len++;
 		}
 	}
-	for (int i=tag->childrenLen-1; i>-1; i--)
-	{
+	for (int i=tag->childrenLen-1; i>-1; i--) {
 		findTag(tag->children[i], opt, foundTags);
 	}
 }
@@ -913,15 +816,11 @@ void printHtml(struct tag *t, int indent)
 		putchar(' ');
 	printf("%s", t->name);
 	for (int i=0; i<t->attrsLen; i++)
-	{
 		printf(" %s=%s", t->attrs[i]->name, t->attrs[i]->value);
-	}
 	printf("\n");
 	indent++;
 	for (int i=t->childrenLen-1; i>-1; i--)
-	{
 		printHtml(t->children[i], indent);
-	}
 }
 
 void printResult
@@ -932,14 +831,11 @@ void printResult
 	struct tag_list *foundTags
 )
 {
-	if (opts->isExcept)
-	{
+	if (opts->isExcept) {
 		bool isMatch = false;
-		for (int i=0; i<strlen(text); i++)
-		{
+		for (int i=0; i<strlen(text); i++) {
 			isMatch = false;
-			for (int k=0; k<foundTags->len; k++)
-			{
+			for (int k=0; k<foundTags->len; k++) {
 				if (
 					foundTags->tags[k]->_outerHtmlBeginOffset <= i &&
 					foundTags->tags[k]->_outerHtmlEndOffset > i
@@ -949,15 +845,11 @@ void printResult
 			if (!isMatch)
 				putchar(text[i]);
 		}
-	}
-	else
-	{
+	} else {
 		char *requestedText = NULL;
 		char *trimmedText = NULL;
-		for (int i=0; i<foundTags->len; i++)
-		{
-			switch (opts->out)
-			{
+		for (int i=0; i<foundTags->len; i++) {
+			switch (opts->out) {
 				case OUT_INNER_HTML:
 					requestedText = getInnerHtml(text, foundTags->tags[i]);
 					trimmedText = trim(requestedText);
@@ -972,23 +864,18 @@ void printResult
 					trimmedText = trim(foundTags->tags[i]->innerText);
 					break;
 				case OUT_ATTR_VALUE:
-					if (strlen(opts->key) > 0 && strlen(opts->tag) > 0)
-					{
-						for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
-						{
+					if (strlen(opts->key) > 0 && strlen(opts->tag) > 0) {
+						for (int k=0; k<foundTags->tags[i]->attrsLen; k++) {
 							if (strcmp(foundTags->tags[i]->attrs[k]->name, opts->key) == 0)
 								printf("%s\n", foundTags->tags[i]->attrs[k]->value);
 						}
-					}
-					else if (strlen(opts->tag) > 0)
-					{
+					} else if (strlen(opts->tag) > 0) {
 						for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
 							printf("%s\n", foundTags->tags[i]->attrs[k]->value);
 					}
 					break;
 			}
-			if (trimmedText)
-			{
+			if (trimmedText) {
 				if (strlen(trimmedText) > 0)
 					printf("%s\n", trimmedText);
 				free(trimmedText);
@@ -1013,22 +900,20 @@ void filterHtml(char *text, struct find_opts *opts)
 	struct tag_list *tagList = initTagList();
 	struct tag_list *foundTags = initTagList();
 	size_t len = parseDoctype(text);
-	if (len == -1)
-	{
+	if (len == -1) {
 		fprintf(stderr, "Error parsing <!DOCTYPE ....\n");
 		goto CLEAN;
-	}
-	else
+	} else {
 		text += len;
+    }
 	struct tag *rootTag = parseTag(text, 0, STATE_INNER_TEXT, tagList);
-	if (!existFindPattern(opts))
-	{
+	if (!existFindPattern(opts)) {
 		foundTags->tags = realloc(foundTags->tags, sizeof(struct tag));
 		foundTags->tags[0] = rootTag;
 		foundTags->len = 1;
-	}
-	else
+	} else {
 		findTag(rootTag, opts, foundTags);
+    }
 	printResult(text, rootTag, opts, foundTags);
 	// printHtml(rootTag, -1);
 	freeTag(rootTag);
diff --git a/html.h b/html.h
@@ -1,21 +1,19 @@
-#define printError(msg) do { fprintf(stderr, "%s: %s\n", __func__, msg); } while (0)
-
-#define LESS_THAN_SIGN			0x3C
-#define GREATER_THAN_SIGN		0x3E
-#define EQUALS_SIGN					0x3D
-#define TAB									0x09
-#define LF									0x0A
-#define FF									0x0C
-#define CR									0x0D
-#define SPACE								0x20
-#define SOLIDUS							0x2F
-#define EXCLAMATION_MARK		0x21
-#define QUOTATION_MARK			0x22
+#define LESS_THAN_SIGN		0x3C
+#define GREATER_THAN_SIGN	0x3E
+#define EQUALS_SIGN			0x3D
+#define TAB					0x09
+#define LF					0x0A
+#define FF					0x0C
+#define CR					0x0D
+#define SPACE				0x20
+#define SOLIDUS				0x2F
+#define EXCLAMATION_MARK	0x21
+#define QUOTATION_MARK		0x22
 #define NUMBER_SIGN         0x23
 #define AMPERSAND           0x26
-#define APOSTROPHE					0x27
-#define GRAVE_ACCENT				0x60
-#define HYPHEN_MINUS				0x2D
+#define APOSTROPHE			0x27
+#define GRAVE_ACCENT		0x60
+#define HYPHEN_MINUS		0x2D
 #define SEMICOLON           0x3B
 #define SMALL_LETTER_X      0x78
 #define CAPITAL_LETTER_X    0x58
@@ -29,16 +27,14 @@ static const char *voidElements[] = {
 	"input", "link", "meta", "source", "track", "wbr"
 };
 
-enum output_type
-{
+enum output_type {
 	OUT_INNER_HTML,
 	OUT_OUTER_HTML,
 	OUT_INNER_TEXT,
 	OUT_ATTR_VALUE
 };
 
-struct find_opts
-{
+struct find_opts {
 	char *tag;
 	char *attr;
 	char *key;
@@ -47,14 +43,12 @@ struct find_opts
 	int limit;
 };
 
-struct attr
-{
+struct attr {
 	char *name;
 	char *value; // optional
 };
 
-struct tag
-{
+struct tag {
 	char *name;
 	struct attr **attrs;
 	struct tag **children;
@@ -69,14 +63,12 @@ struct tag
 	size_t _innerHtmlEndOffset;
 };
 
-struct tag_list
-{
+struct tag_list {
 	struct tag **tags;
 	size_t len;
 };
 
-enum state
-{
+enum state {
 	STATE_INNER_TEXT,
 	STATE_TAG,
 	STATE_BEGIN_TAG_NAME,
@@ -94,16 +86,14 @@ enum state
     STATE_CHAR_REF_NUMERIC
 };
 
-enum doctype_state
-{
+enum doctype_state {
 	DSTATE_TEXT,
 	DSTATE_POSSIBLE_DTYPE,
 	DSTATE_DTYPE_OR_COMMENT,
 	DSTATE_DTYPE
 };
 
-enum attr_value_syntax
-{
+enum attr_value_syntax {
 	AVS_NO,
 	AVS_QUOTATION_MARK,
 	AVS_APOSTROPHE,
diff --git a/misc.c b/misc.c
@@ -32,8 +32,7 @@ char *trim(char *text)
 	char *trimmedText = NULL;
 	int begin = 0;
 	int end = 0;
-	for (int i=0; i<strlen(text); i++)
-	{
+	for (int i=0; i<strlen(text); i++) {
 		if (
 			text[i] == ' ' ||
             text[i] == '\n' ||
@@ -44,8 +43,7 @@ char *trim(char *text)
 		else
 			break;
 	}
-	for (int i=strlen(text)-1; i>=0; i--)
-	{
+	for (int i=strlen(text)-1; i>=0; i--) {
 		if (
 			text[i] == ' '||
 			text[i] == '\n' ||
@@ -57,10 +55,8 @@ char *trim(char *text)
 			break;
 	}
 	int k = 0;
-	for (int i=0; i<strlen(text); i++)
-	{
-		if (i >= begin && i < strlen(text) - end)
-		{
+	for (int i=0; i<strlen(text); i++) {
+		if (i >= begin && i < strlen(text) - end) {
 			trimmedText = realloc(trimmedText, (k+1) * sizeof(char));
 			trimmedText[k] = text[i];
 			k++;
@@ -76,12 +72,9 @@ bool startsWith(const char *string, const char *part)
     size_t partLen = strlen(part);
     if (partLen > strlen(string))
         return false;
-    for (int i=0; i<partLen; i++)
-    {
+    for (int i=0; i<partLen; i++) {
         if (string[i] != part[i])
-        {
             return false;
-        }
     }
     return true;
 }
@@ -104,16 +97,14 @@ char *readFile(FILE *fp)
 	char *text = NULL;
 	int i = 0;
 	char buf;
-	while (1)
-	{
-		if (tryRead(&buf, fp))
-		{
+	while (1) {
+		if (tryRead(&buf, fp)) {
 			text = realloc(text, (i+1) * sizeof(char));
 			text[i] = buf;
 			i++;
-		}
-		else
+		} else {
 			break;
+        }
 	}
 	text = realloc(text, (i+1) * sizeof(char));
 	text[i] = 0;

	htex simple incorrect html parser
	git clone git://git.relim.de/htex.git
	Log \| Files \| Refs \| README

M	htex.c	\|	59	++++++++++++++++++++---------------------------------------
M	html.c	\|	331	++++++++++++++++++++++++++-----------------------------------------------------
M	html.h	\|	54	++++++++++++++++++++++--------------------------------
M	misc.c	\|	27	+++++++++------------------