commit 468e6772f71842a17b2013db6f92f8ca908d6648
parent 29cc7cac94d871dc422a3e2b42c3138f359bb275
Author: Robin <kroekerrobin@gmail.com>
Date: Tue, 2 Apr 2024 22:28:00 +0200
Change curly brace style
Diffstat:
| M | htex.c | | | 59 | ++++++++++++++++++++--------------------------------------- |
| M | html.c | | | 331 | ++++++++++++++++++++++++++----------------------------------------------------- |
| M | html.h | | | 54 | ++++++++++++++++++++++-------------------------------- |
| M | misc.c | | | 27 | +++++++++------------------ |
4 files changed, 159 insertions(+), 312 deletions(-)
diff --git a/htex.c b/htex.c
@@ -27,8 +27,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
int aot = 0;
int ak = 0;
int av = 0;
- switch (pattern[0])
- {
+ switch (pattern[0]) {
case '.':
isClassValue = true;
i = 1;
@@ -38,36 +37,31 @@ struct find_opts *parseFilterOpts(const char *pattern)
i = 1;
break;
}
- for (; i<strlen(pattern); i++)
- {
+ for (; i<strlen(pattern); i++) {
if (pattern[i] == ']')
break;
if (
- !isAttrKey &&
+ !isAttrKey &&
!isAttrOrTag &&
pattern[i] != ']' &&
pattern[i] != '"'
- )
- {
+ ) {
opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
opt->attr[av] = pattern[i];
av++;
}
if (pattern[i] == '=')
isAttrKey = false;
- if (isAttrKey && !isAttrOrTag)
- {
+ if (isAttrKey && !isAttrOrTag) {
opt->key = realloc(opt->key, (ak+1) * sizeof(char));
opt->key[ak] = pattern[i];
ak++;
}
- if (pattern[i] == '[')
- {
+ if (pattern[i] == '[') {
isAttrKey = true;
isAttrOrTag = false;
}
- if (isAttrOrTag)
- {
+ if (isAttrOrTag) {
attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
attrOrTag[aot] = pattern[i];
aot++;
@@ -75,8 +69,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
}
attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
attrOrTag[aot] = 0;
- if (isIdValue)
- {
+ if (isIdValue) {
free(opt->key);
opt->key = NULL;
free(opt->attr);
@@ -87,8 +80,7 @@ struct find_opts *parseFilterOpts(const char *pattern)
opt->key[1] = 'd';
opt->key[2] = 0;
}
- else if (isClassValue)
- {
+ else if (isClassValue) {
free(opt->key);
opt->key = NULL;
free(opt->attr);
@@ -102,17 +94,14 @@ struct find_opts *parseFilterOpts(const char *pattern)
opt->key[4] = 's';
opt->key[5] = 0;
}
- else
- {
+ else {
free(opt->tag);
opt->tag = attrOrTag;
- if (av > 0)
- {
+ if (av > 0) {
opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
opt->attr[av] = 0;
}
- if (ak > 0)
- {
+ if (ak > 0) {
opt->key = realloc(opt->key, (ak+1) * sizeof(char));
opt->key[ak] = 0;
}
@@ -173,47 +162,39 @@ int main(int argc, char *argv[])
}
}
enum output_type out = parseOutputArg(output);
- if (out == -1)
- {
+ if (out == -1) {
fprintf(stderr, "Provide a valid output type!\n");
free(output);
return -1;
}
- if (limit == 0)
- {
+ if (limit == 0) {
fprintf(stderr, "Provide a valid limit value.\n");
free(output);
return -1;
}
- if (argc == optind)
- {
+ if (argc == optind) {
fprintf(stderr, "Provide a search pattern!\n");
return -1;
}
- if (argc > optind+2)
- {
+ if (argc > optind+2) {
fprintf(stderr, "Provide only one file!\n");
return -1;
}
- if (argc == optind+1)
- {
+ if (argc == optind+1) {
searchPattern = argv[argc-1];
text = readFile(stdin);
}
- else if (argc == optind+2)
- {
+ else if (argc == optind+2) {
searchPattern = argv[argc-2];
char *filepath = argv[argc-1];
FILE *fp = fopen(filepath, "r");
- if (fp == NULL)
- {
+ if (fp == NULL) {
perror("fopen failed: ");
return -1;
}
text = readFile(fp);
fclose(fp);
- if (strlen(text) == 0)
- {
+ if (strlen(text) == 0) {
fprintf(stderr, "No data in file.\n");
free(output);
free(text);
diff --git a/html.c b/html.c
@@ -2,8 +2,7 @@
const char *stateToString(enum state s)
{
- switch(s)
- {
+ switch(s) {
case STATE_INNER_TEXT: return "STATE_INNER_TEXT";
case STATE_TAG: return "STATE_TAG";
case STATE_BEGIN_TAG_NAME: return "STATE_BEGIN_TAG_NAME";
@@ -104,8 +103,7 @@ static inline bool isASCIIWhitespace(uint_least32_t cp)
static inline bool isVoidElement(const char *tagName)
{
- for (int i=0; i<13; i++)
- {
+ for (int i=0; i<13; i++) {
if (strcmp(tagName, voidElements[i]) == 0)
return true;
}
@@ -199,24 +197,18 @@ size_t parseDoctype(const char *text)
uint_least32_t cp;
size_t len = strlen(text);
size_t ret, off;
- for (off = 0; off<len; off += ret)
- {
- if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off)
- {
+ for (off = 0; off<len; off += ret) {
+ if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off) {
printError("Something wrong with ending of text");
}
- else
- {
- switch (state)
- {
+ else {
+ switch (state) {
case DSTATE_TEXT:
- if (cp == LESS_THAN_SIGN)
- {
+ if (cp == LESS_THAN_SIGN) {
state = DSTATE_POSSIBLE_DTYPE;
break;
}
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
offset = off;
goto CLEANUP;
}
@@ -228,25 +220,24 @@ size_t parseDoctype(const char *text)
goto CLEANUP;
break;
case DSTATE_DTYPE_OR_COMMENT:
- if (cp == HYPHEN_MINUS)
+ if (cp == HYPHEN_MINUS) {
goto CLEANUP;
- else
- {
+ }
+ else {
doctype = stringCat(doctype, cpToChars(cp, ret));
state = DSTATE_DTYPE;
break;
}
break;
case DSTATE_DTYPE:
- if (isASCIIWhitespace(cp))
- {
+ if (isASCIIWhitespace(cp)) {
size_t dlen = strlen(doctype)+1;
lowerDoctype = malloc(dlen * sizeof(char));
grapheme_to_lowercase_utf8(doctype, dlen, lowerDoctype, dlen);
- if (strcmp(lowerDoctype, "doctype") == 0)
+ if (strcmp(lowerDoctype, "doctype") == 0) {
state = DSTATE_TEXT;
- else
- {
+ }
+ else {
offset = -1;
goto CLEANUP;
}
@@ -265,10 +256,8 @@ CLEANUP:
struct tag *closeLastUnclosedTag(struct tag_list *tagList, const char *endTag, size_t endOffset)
{
- for (int i=tagList->len-1; i>-1; i--)
- {
- if (strcmp(tagList->tags[i]->name, endTag) == 0 && !tagList->tags[i]->_isClosed)
- {
+ for (int i=tagList->len-1; i>-1; i--) {
+ if (strcmp(tagList->tags[i]->name, endTag) == 0 && !tagList->tags[i]->_isClosed) {
tagList->tags[i]->_isClosed = true;
tagList->tags[i]->_outerHtmlEndOffset = endOffset;
return tagList->tags[i];
@@ -279,10 +268,8 @@ struct tag *closeLastUnclosedTag(struct tag_list *tagList, const char *endTag, s
struct tag *getLastOpenTag(struct tag_list *tagList)
{
- for (int i=tagList->len-1; i>-1; i--)
- {
- if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed)
- {
+ for (int i=tagList->len-1; i>-1; i--) {
+ if (!tagList->tags[i]->_isVoidElement && !tagList->tags[i]->_isClosed) {
return tagList->tags[i];
}
}
@@ -293,8 +280,7 @@ char *getOuterHtml(char *text, struct tag *t)
{
char *outerHtml = NULL;
int o = 0;
- for (int i=t->_outerHtmlBeginOffset; i<t->_outerHtmlEndOffset; i++)
- {
+ for (int i=t->_outerHtmlBeginOffset; i<t->_outerHtmlEndOffset; i++) {
outerHtml = realloc(outerHtml, (o+1) * sizeof(char));
outerHtml[o] = text[i];
o++;
@@ -308,8 +294,7 @@ char *getInnerHtml(char *text, struct tag *t)
{
char *innerHtml = NULL;
int o = 0;
- for (int i=t->_innerHtmlBeginOffset; i<t->_innerHtmlEndOffset; i++)
- {
+ for (int i=t->_innerHtmlBeginOffset; i<t->_innerHtmlEndOffset; i++) {
innerHtml = realloc(innerHtml, (o+1) * sizeof(char));
innerHtml[o] = text[i];
o++;
@@ -337,8 +322,7 @@ enum state endOfBeginTag(struct tag *t, size_t offset)
return STATE_SCRIPT;
else if (strcmp(t->name, "style") == 0)
return STATE_STYLE;
- else
- return STATE_INNER_TEXT;
+ return STATE_INNER_TEXT;
}
char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
@@ -353,8 +337,7 @@ char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
ret = grapheme_decode_utf8(text+off, strlen(text+off), &cp);
numericCharRef = stringCat(numericCharRef, cpToChars(cp, ret));
off += ret;
- }
- while (cp != SEMICOLON);
+ } while (cp != SEMICOLON);
*newOffset = off - oldOffset;
long i = strtol(numericCharRef, NULL, base);
ret = grapheme_encode_utf8((uint_least32_t)i, character, MAX_CODEPOINT_SIZE);
@@ -366,8 +349,7 @@ char *parseNumericCharRef(char *text, size_t off, int base, size_t *newOffset)
char *parseNamedCharRef(char *text, size_t off, size_t len, enum attr_value_syntax avs)
{
uint_least32_t stopAt = 0;
- switch(avs)
- {
+ switch(avs) {
case AVS_QUOTATION_MARK:
stopAt = QUOTATION_MARK;
break;
@@ -385,8 +367,7 @@ char *parseNamedCharRef(char *text, size_t off, size_t len, enum attr_value_synt
size_t ret;
uint_least32_t cp;
int i = 0;
- for (;;)
- {
+ for (;;) {
ret = grapheme_decode_utf8(text+off, strlen(text+off), &cp);
if (cp == AMPERSAND || isASCIIWhitespace(cp))
break;
@@ -407,22 +388,18 @@ char *encodeNamedCharRef(const char *name)
char cp[MAX_CODEPOINT_SIZE];
memset(&cp, 0, MAX_CODEPOINT_SIZE);
size_t len;
- for (int i=0; i<NAMED_CHAR_REF_COUNT; i++)
- {
- if (startsWith(name, entities[i].name))
- {
+ for (int i=0; i<NAMED_CHAR_REF_COUNT; i++) {
+ if (startsWith(name, entities[i].name)) {
len = grapheme_encode_utf8(entities[i].cp[0], cp, MAX_CODEPOINT_SIZE);
strcpy(buf, cp);
- if (entities[i].cp[1] != 0)
- {
+ if (entities[i].cp[1] != 0) {
len += grapheme_encode_utf8(entities[i].cp[1], cp, MAX_CODEPOINT_SIZE);
strcat(buf, cp);
}
buf[len] = 0;
const char *part = &name[strlen(entities[i].name)];
size_t partLen = strlen(part);
- if (partLen > 0)
- {
+ if (partLen > 0) {
if (partLen == 1 && part[0] == ';')
return buf;
buf = realloc(buf, 2*MAX_CODEPOINT_SIZE+1+partLen);
@@ -457,27 +434,20 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
uint_least32_t cp;
size_t len = strlen(text);
size_t ret, off;
- for (off = offset; off<len; off += ret)
- {
- if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off)
- {
- printError("Something wrong with ending of text");
- }
- else
- {
+ for (off = offset; off<len; off += ret) {
+ if ((ret = grapheme_decode_utf8(text+off, len-off, &cp)) > len-off) {
+ fprintf(stderr, "parseTag.grapheme_decode_utf8 failed.\n");
+ } else {
// char *the_codepoint = cpToChars(cp, ret);
// printf("cp: %02X, %s, %s\n", cp, the_codepoint, stateToString(state));
// free(the_codepoint);
- switch (state)
- {
+ switch (state) {
case STATE_INNER_TEXT:
- if (cp == LESS_THAN_SIGN)
- {
+ if (cp == LESS_THAN_SIGN) {
state = STATE_TAG;
break;
}
- if (cp == AMPERSAND)
- {
+ if (cp == AMPERSAND) {
returnToState = STATE_INNER_TEXT;
state = STATE_CHAR_REF;
break;
@@ -486,13 +456,11 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
stillOpenTag->innerText = stringCat(stillOpenTag->innerText, cpToChars(cp, ret));
break;
case STATE_TAG:
- if (cp == SOLIDUS)
- {
+ if (cp == SOLIDUS) {
state = STATE_END_TAG_NAME;
break;
}
- if (cp == EXCLAMATION_MARK)
- {
+ if (cp == EXCLAMATION_MARK) {
state = STATE_COMMENT;
break;
}
@@ -507,24 +475,20 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
free(endTag);
return tag;
case STATE_BEGIN_TAG_NAME:
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
state = endOfBeginTag(tag, off);
break;
}
- if (isASCIIWhitespace(cp))
- {
+ if (isASCIIWhitespace(cp)) {
state = STATE_ATTR_NAME;
break;
}
- if (isASCIIDigit(cp) || isASCIIAlpha(cp))
- {
+ if (isASCIIDigit(cp) || isASCIIAlpha(cp)) {
tag->name = stringCat(tag->name, cpToChars(cp, ret));
}
break;
case STATE_END_TAG_NAME:
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
if (closedTag != NULL)
setInnerHtmlEndOffset(closedTag, text, off);
@@ -538,26 +502,21 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
endTag = stringCat(endTag, cpToChars(cp, ret));
break;
case STATE_ATTR_NAME:
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
state = endOfBeginTag(tag, off);
break;
}
- if (isASCIIWhitespace(cp))
- {
+ if (isASCIIWhitespace(cp)) {
if (attrNameCount == a+1)
a++;
break;
}
- if (cp == EQUALS_SIGN)
- {
+ if (cp == EQUALS_SIGN) {
state = STATE_ATTR_VALUE;
break;
}
- if (isValidAttrName(cp))
- {
- if (attrNameCount != a+1)
- {
+ if (isValidAttrName(cp)) {
+ if (attrNameCount != a+1) {
tag->attrs = realloc(
tag->attrs,
(a+1) * sizeof(struct attr)
@@ -573,20 +532,15 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
}
break;
case STATE_ATTR_VALUE:
- if (isASCIIWhitespace(cp))
- {
- if (attrValueSyntax == AVS_UNQUOTED)
- {
+ if (isASCIIWhitespace(cp)) {
+ if (attrValueSyntax == AVS_UNQUOTED) {
attrValueSyntax = AVS_NO;
state = STATE_ATTR_NAME;
- }
- else if (attrValueSyntax == AVS_QUOTATION_MARK || attrValueSyntax == AVS_APOSTROPHE)
- {
+ } else if (attrValueSyntax == AVS_QUOTATION_MARK || attrValueSyntax == AVS_APOSTROPHE) {
if (
strcmp("id", tag->attrs[a]->name) == 0 ||
strcmp("class", tag->attrs[a]->name) == 0
- )
- {
+ ) {
char *tmpName = malloc((strlen(tag->attrs[a]->name)+1) * sizeof(char));
strcpy(tmpName, tag->attrs[a]->name);
tag->attrs = realloc(
@@ -599,9 +553,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
tag->attrs[a]->name = tmpName;
tag->attrsLen++;
attrNameCount = a + 1;
- }
- else
- {
+ } else {
tag->attrs[a]->value = stringCat(
tag->attrs[a]->value,
cpToChars(cp, ret)
@@ -610,50 +562,40 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
}
break;
}
- if (cp == QUOTATION_MARK)
- {
- if (attrValueSyntax == AVS_NO)
- {
+ if (cp == QUOTATION_MARK) {
+ if (attrValueSyntax == AVS_NO) {
attrValueSyntax = AVS_QUOTATION_MARK;
break;
}
- if (attrValueSyntax == AVS_QUOTATION_MARK)
- {
+ if (attrValueSyntax == AVS_QUOTATION_MARK) {
attrValueSyntax = AVS_NO;
state = STATE_ATTR_NAME;
break;
}
}
- if (cp == APOSTROPHE)
- {
- if (attrValueSyntax == AVS_NO)
- {
+ if (cp == APOSTROPHE) {
+ if (attrValueSyntax == AVS_NO) {
attrValueSyntax = AVS_APOSTROPHE;
break;
}
- if (attrValueSyntax == AVS_APOSTROPHE)
- {
+ if (attrValueSyntax == AVS_APOSTROPHE) {
attrValueSyntax = AVS_NO;
state = STATE_ATTR_NAME;
break;
}
}
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
state = endOfBeginTag(tag, off);
break;
}
if (
attrValueSyntax == AVS_NO &&
isValidUnquotedAttrValue(cp)
- )
- {
+ ) {
attrValueSyntax = AVS_UNQUOTED;
}
- if (attrValueSyntax > AVS_NO)
- {
- if (cp == AMPERSAND)
- {
+ if (attrValueSyntax > AVS_NO) {
+ if (cp == AMPERSAND) {
state = STATE_CHAR_REF;
returnToState = STATE_ATTR_VALUE;
break;
@@ -665,8 +607,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
}
break;
case STATE_COMMENT:
- if (cp == GREATER_THAN_SIGN && hyphenCount >= 2)
- {
+ if (cp == GREATER_THAN_SIGN && hyphenCount >= 2) {
state = STATE_INNER_TEXT;
break;
}
@@ -676,8 +617,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
hyphenCount = 0;
break;
case STATE_STYLE:
- if (cp == LESS_THAN_SIGN)
- {
+ if (cp == LESS_THAN_SIGN) {
state = STATE_STYLE_POSSIBLE_END_TAG;
break;
}
@@ -689,8 +629,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
state = STATE_STYLE;
break;
case STATE_STYLE_END_TAG:
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
if (closedTag != NULL)
setInnerHtmlEndOffset(closedTag, text, off);
@@ -704,8 +643,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
endTag = stringCat(endTag, cpToChars(cp, ret));
break;
case STATE_SCRIPT:
- if (cp == LESS_THAN_SIGN)
- {
+ if (cp == LESS_THAN_SIGN) {
state = STATE_SCRIPT_POSSIBLE_END_TAG;
break;
}
@@ -717,8 +655,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
state = STATE_SCRIPT;
break;
case STATE_SCRIPT_END_TAG:
- if (cp == GREATER_THAN_SIGN)
- {
+ if (cp == GREATER_THAN_SIGN) {
struct tag *closedTag = closeLastUnclosedTag(tagList, endTag, off+ret);
if (closedTag != NULL)
setInnerHtmlEndOffset(closedTag, text, off);
@@ -732,21 +669,17 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
endTag = stringCat(endTag, cpToChars(cp, ret));
break;
case STATE_CHAR_REF:
- if (cp == NUMBER_SIGN) // hashtag
- {
+ if (cp == NUMBER_SIGN) { /* hashtag */
state = STATE_CHAR_REF_NUMERIC;
break;
}
char *namedCharRef = parseNamedCharRef(text, off, len, attrValueSyntax);
off += strlen(namedCharRef)-1;
char *encodedNamedCharRef = encodeNamedCharRef(namedCharRef);
- if (returnToState == STATE_INNER_TEXT)
- {
+ if (returnToState == STATE_INNER_TEXT) {
stillOpenTag = getLastOpenTag(tagList);
stillOpenTag->innerText = stringCat(stillOpenTag->innerText, encodedNamedCharRef);
- }
- else if (returnToState == STATE_ATTR_VALUE)
- {
+ } else if (returnToState == STATE_ATTR_VALUE) {
tag->attrs[a]->value = stringCat(
tag->attrs[a]->value,
encodedNamedCharRef
@@ -756,18 +689,14 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
state = returnToState;
break;
case STATE_CHAR_REF_NUMERIC:
- if (cp == SMALL_LETTER_X || cp == CAPITAL_LETTER_X)
- {
+ if (cp == SMALL_LETTER_X || cp == CAPITAL_LETTER_X) {
size_t newOffset;
char *numericCharRef = parseNumericCharRef(text, off+1, 16, &newOffset);
off += newOffset;
- if (returnToState == STATE_INNER_TEXT)
- {
+ if (returnToState == STATE_INNER_TEXT) {
stillOpenTag = getLastOpenTag(tagList);
stillOpenTag->innerText = stringCat(stillOpenTag->innerText, numericCharRef);
- }
- else if (returnToState == STATE_ATTR_VALUE)
- {
+ } else if (returnToState == STATE_ATTR_VALUE) {
tag->attrs[a]->value = stringCat(
tag->attrs[a]->value,
numericCharRef
@@ -775,19 +704,14 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
}
state = returnToState;
break;
- }
- else if (isASCIIDigit(cp))
- {
+ } else if (isASCIIDigit(cp)) {
size_t newOffset;
char *numericCharRef = parseNumericCharRef(text, off, 10, &newOffset);
off += newOffset-1;
- if (returnToState == STATE_INNER_TEXT)
- {
+ if (returnToState == STATE_INNER_TEXT) {
stillOpenTag = getLastOpenTag(tagList);
stillOpenTag->innerText = stringCat(stillOpenTag->innerText, numericCharRef);
- }
- else if (returnToState == STATE_ATTR_VALUE)
- {
+ } else if (returnToState == STATE_ATTR_VALUE) {
tag->attrs[a]->value = stringCat(
tag->attrs[a]->value,
numericCharRef
@@ -809,15 +733,13 @@ void freeTag(struct tag *t)
{
free(t->name);
free(t->innerText);
- for (int i=0; i<t->attrsLen; i++)
- {
+ for (int i=0; i<t->attrsLen; i++) {
free(t->attrs[i]->name);
free(t->attrs[i]->value);
free(t->attrs[i]);
}
free(t->attrs);
- for (int i=0; i<t->childrenLen; i++)
- {
+ for (int i=0; i<t->childrenLen; i++) {
if (t->children[i] != NULL)
freeTag(t->children[i]);
}
@@ -840,69 +762,50 @@ void findTag(struct tag *tag, struct find_opts *opt, struct tag_list *foundTags)
bool matchesAttrValue = false;
if (strcmp(tag->name, opt->tag) == 0)
matchesTag = true;
- for (int i=0; i<tag->attrsLen; i++)
- {
+ for (int i=0; i<tag->attrsLen; i++) {
if (strcmp(tag->attrs[i]->name, opt->key) == 0)
matchesAttrKey = true;
if (strcmp(tag->attrs[i]->value, opt->attr) == 0)
matchesAttrValue = true;
}
- if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0)
- {
- if (matchesTag && matchesAttrKey && matchesAttrValue)
- {
+ if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0) {
+ if (matchesTag && matchesAttrKey && matchesAttrValue) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
- }
- else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0)
- {
- if (matchesTag && matchesAttrKey)
- {
+ } else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0) {
+ if (matchesTag && matchesAttrKey) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
- }
- else if (strlen(opt->tag) > 0)
- {
- if (matchesTag)
- {
+ } else if (strlen(opt->tag) > 0) {
+ if (matchesTag) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
- }
- else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0)
- {
- if (matchesAttrKey && matchesAttrValue)
- {
+ } else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0) {
+ if (matchesAttrKey && matchesAttrValue) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
- }
- else if (strlen(opt->key) > 0)
- {
- if (matchesAttrKey)
- {
+ } else if (strlen(opt->key) > 0) {
+ if (matchesAttrKey) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
- }
- else if (strlen(opt->attr) > 0)
- {
- if (matchesAttrValue)
- {
+ } else if (strlen(opt->attr) > 0) {
+ if (matchesAttrValue) {
foundTags->tags = realloc(foundTags->tags, (foundTags->len+1) * sizeof(struct tag));
foundTags->tags[foundTags->len] = tag;
foundTags->len++;
}
}
- for (int i=tag->childrenLen-1; i>-1; i--)
- {
+ for (int i=tag->childrenLen-1; i>-1; i--) {
findTag(tag->children[i], opt, foundTags);
}
}
@@ -913,15 +816,11 @@ void printHtml(struct tag *t, int indent)
putchar(' ');
printf("%s", t->name);
for (int i=0; i<t->attrsLen; i++)
- {
printf(" %s=%s", t->attrs[i]->name, t->attrs[i]->value);
- }
printf("\n");
indent++;
for (int i=t->childrenLen-1; i>-1; i--)
- {
printHtml(t->children[i], indent);
- }
}
void printResult
@@ -932,14 +831,11 @@ void printResult
struct tag_list *foundTags
)
{
- if (opts->isExcept)
- {
+ if (opts->isExcept) {
bool isMatch = false;
- for (int i=0; i<strlen(text); i++)
- {
+ for (int i=0; i<strlen(text); i++) {
isMatch = false;
- for (int k=0; k<foundTags->len; k++)
- {
+ for (int k=0; k<foundTags->len; k++) {
if (
foundTags->tags[k]->_outerHtmlBeginOffset <= i &&
foundTags->tags[k]->_outerHtmlEndOffset > i
@@ -949,15 +845,11 @@ void printResult
if (!isMatch)
putchar(text[i]);
}
- }
- else
- {
+ } else {
char *requestedText = NULL;
char *trimmedText = NULL;
- for (int i=0; i<foundTags->len; i++)
- {
- switch (opts->out)
- {
+ for (int i=0; i<foundTags->len; i++) {
+ switch (opts->out) {
case OUT_INNER_HTML:
requestedText = getInnerHtml(text, foundTags->tags[i]);
trimmedText = trim(requestedText);
@@ -972,23 +864,18 @@ void printResult
trimmedText = trim(foundTags->tags[i]->innerText);
break;
case OUT_ATTR_VALUE:
- if (strlen(opts->key) > 0 && strlen(opts->tag) > 0)
- {
- for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
- {
+ if (strlen(opts->key) > 0 && strlen(opts->tag) > 0) {
+ for (int k=0; k<foundTags->tags[i]->attrsLen; k++) {
if (strcmp(foundTags->tags[i]->attrs[k]->name, opts->key) == 0)
printf("%s\n", foundTags->tags[i]->attrs[k]->value);
}
- }
- else if (strlen(opts->tag) > 0)
- {
+ } else if (strlen(opts->tag) > 0) {
for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
printf("%s\n", foundTags->tags[i]->attrs[k]->value);
}
break;
}
- if (trimmedText)
- {
+ if (trimmedText) {
if (strlen(trimmedText) > 0)
printf("%s\n", trimmedText);
free(trimmedText);
@@ -1013,22 +900,20 @@ void filterHtml(char *text, struct find_opts *opts)
struct tag_list *tagList = initTagList();
struct tag_list *foundTags = initTagList();
size_t len = parseDoctype(text);
- if (len == -1)
- {
+ if (len == -1) {
fprintf(stderr, "Error parsing <!DOCTYPE ....\n");
goto CLEAN;
- }
- else
+ } else {
text += len;
+ }
struct tag *rootTag = parseTag(text, 0, STATE_INNER_TEXT, tagList);
- if (!existFindPattern(opts))
- {
+ if (!existFindPattern(opts)) {
foundTags->tags = realloc(foundTags->tags, sizeof(struct tag));
foundTags->tags[0] = rootTag;
foundTags->len = 1;
- }
- else
+ } else {
findTag(rootTag, opts, foundTags);
+ }
printResult(text, rootTag, opts, foundTags);
// printHtml(rootTag, -1);
freeTag(rootTag);
diff --git a/html.h b/html.h
@@ -1,21 +1,19 @@
-#define printError(msg) do { fprintf(stderr, "%s: %s\n", __func__, msg); } while (0)
-
-#define LESS_THAN_SIGN 0x3C
-#define GREATER_THAN_SIGN 0x3E
-#define EQUALS_SIGN 0x3D
-#define TAB 0x09
-#define LF 0x0A
-#define FF 0x0C
-#define CR 0x0D
-#define SPACE 0x20
-#define SOLIDUS 0x2F
-#define EXCLAMATION_MARK 0x21
-#define QUOTATION_MARK 0x22
+#define LESS_THAN_SIGN 0x3C
+#define GREATER_THAN_SIGN 0x3E
+#define EQUALS_SIGN 0x3D
+#define TAB 0x09
+#define LF 0x0A
+#define FF 0x0C
+#define CR 0x0D
+#define SPACE 0x20
+#define SOLIDUS 0x2F
+#define EXCLAMATION_MARK 0x21
+#define QUOTATION_MARK 0x22
#define NUMBER_SIGN 0x23
#define AMPERSAND 0x26
-#define APOSTROPHE 0x27
-#define GRAVE_ACCENT 0x60
-#define HYPHEN_MINUS 0x2D
+#define APOSTROPHE 0x27
+#define GRAVE_ACCENT 0x60
+#define HYPHEN_MINUS 0x2D
#define SEMICOLON 0x3B
#define SMALL_LETTER_X 0x78
#define CAPITAL_LETTER_X 0x58
@@ -29,16 +27,14 @@ static const char *voidElements[] = {
"input", "link", "meta", "source", "track", "wbr"
};
-enum output_type
-{
+enum output_type {
OUT_INNER_HTML,
OUT_OUTER_HTML,
OUT_INNER_TEXT,
OUT_ATTR_VALUE
};
-struct find_opts
-{
+struct find_opts {
char *tag;
char *attr;
char *key;
@@ -47,14 +43,12 @@ struct find_opts
int limit;
};
-struct attr
-{
+struct attr {
char *name;
char *value; // optional
};
-struct tag
-{
+struct tag {
char *name;
struct attr **attrs;
struct tag **children;
@@ -69,14 +63,12 @@ struct tag
size_t _innerHtmlEndOffset;
};
-struct tag_list
-{
+struct tag_list {
struct tag **tags;
size_t len;
};
-enum state
-{
+enum state {
STATE_INNER_TEXT,
STATE_TAG,
STATE_BEGIN_TAG_NAME,
@@ -94,16 +86,14 @@ enum state
STATE_CHAR_REF_NUMERIC
};
-enum doctype_state
-{
+enum doctype_state {
DSTATE_TEXT,
DSTATE_POSSIBLE_DTYPE,
DSTATE_DTYPE_OR_COMMENT,
DSTATE_DTYPE
};
-enum attr_value_syntax
-{
+enum attr_value_syntax {
AVS_NO,
AVS_QUOTATION_MARK,
AVS_APOSTROPHE,
diff --git a/misc.c b/misc.c
@@ -32,8 +32,7 @@ char *trim(char *text)
char *trimmedText = NULL;
int begin = 0;
int end = 0;
- for (int i=0; i<strlen(text); i++)
- {
+ for (int i=0; i<strlen(text); i++) {
if (
text[i] == ' ' ||
text[i] == '\n' ||
@@ -44,8 +43,7 @@ char *trim(char *text)
else
break;
}
- for (int i=strlen(text)-1; i>=0; i--)
- {
+ for (int i=strlen(text)-1; i>=0; i--) {
if (
text[i] == ' '||
text[i] == '\n' ||
@@ -57,10 +55,8 @@ char *trim(char *text)
break;
}
int k = 0;
- for (int i=0; i<strlen(text); i++)
- {
- if (i >= begin && i < strlen(text) - end)
- {
+ for (int i=0; i<strlen(text); i++) {
+ if (i >= begin && i < strlen(text) - end) {
trimmedText = realloc(trimmedText, (k+1) * sizeof(char));
trimmedText[k] = text[i];
k++;
@@ -76,12 +72,9 @@ bool startsWith(const char *string, const char *part)
size_t partLen = strlen(part);
if (partLen > strlen(string))
return false;
- for (int i=0; i<partLen; i++)
- {
+ for (int i=0; i<partLen; i++) {
if (string[i] != part[i])
- {
return false;
- }
}
return true;
}
@@ -104,16 +97,14 @@ char *readFile(FILE *fp)
char *text = NULL;
int i = 0;
char buf;
- while (1)
- {
- if (tryRead(&buf, fp))
- {
+ while (1) {
+ if (tryRead(&buf, fp)) {
text = realloc(text, (i+1) * sizeof(char));
text[i] = buf;
i++;
- }
- else
+ } else {
break;
+ }
}
text = realloc(text, (i+1) * sizeof(char));
text[i] = 0;