commit 6c04cb48bb011fd5e498f770447bbb1e3d98947d
parent 572566e1c67d1d3e41dc16dbce8b1cec56107ff9
Author: Robin <kroekerrobin@gmail.com>
Date: Sat, 6 Apr 2024 18:29:17 +0200
Refactor
Diffstat:
| M | src/html.c | | | 145 | +++++++++++++++++++++++++++++++++++++++++-------------------------------------- |
| M | src/misc.c | | | 24 | ++++++++++++++++-------- |
| M | src/misc.h | | | 3 | ++- |
3 files changed, 93 insertions(+), 79 deletions(-)
diff --git a/src/html.c b/src/html.c
@@ -179,12 +179,9 @@ struct FindOpts *find_opts_parse(const char *pattern)
{
struct FindOpts *opts = malloc(sizeof(struct FindOpts));
opts->out = OUT_OUTER_HTML;
- opts->tag = malloc(sizeof(char));
- opts->tag[0] = 0;
- opts->attr = malloc(sizeof(char));
- opts->attr[0] = 0;
- opts->key = malloc(sizeof(char));
- opts->key[0] = 0;
+ opts->tag = NULL;
+ opts->attr = NULL;
+ opts->key = NULL;
bool is_class_value = false;
bool is_id_value = false;
int i = 0;
@@ -300,10 +297,8 @@ enum OutType output_type_parse(const char *type)
static struct Attr *attr_init(void)
{
struct Attr *attr = malloc(sizeof(struct Attr));
- attr->name = malloc(sizeof(char));
- attr->name[0] = 0;
- attr->value = malloc(sizeof(char));
- attr->value[0] = 0;
+ attr->name = NULL;
+ attr->value = NULL;
return attr;
}
@@ -311,8 +306,7 @@ static char *charref_numeric_parse_and_encode(char *text, size_t offset, size_t
{
size_t old_offset = offset;
char *character = malloc(MAX_CODEPOINT_SIZE * sizeof(char));
- char *numeric_charref = malloc(sizeof(char));
- numeric_charref[0] = 0;
+ char *numeric_charref = NULL;
size_t ret;
uint_least32_t cp;
do {
@@ -344,8 +338,7 @@ static char *charref_named_parse(char *text, size_t offset, size_t len, enum Att
case AVS_NO: /* Just to silence the compilier warning */
break;
}
- char *named_charref = malloc(sizeof(char));
- named_charref[0] = 0;
+ char *named_charref = NULL;
size_t ret;
uint_least32_t cp;
int i = 0;
@@ -382,43 +375,48 @@ static char *charref_named_encode(const char *name)
char *buf = NULL;
size_t len;
int i;
- for (i=0; i<2138; i++) {
- if (string_starts_with(name, single_cp_entities[i].name)) {
- buf = realloc(buf, MAX_CODEPOINT_SIZE+1);
- len = grapheme_encode_utf8(single_cp_entities[i].cp, buf, MAX_CODEPOINT_SIZE);
- buf[len] = 0;
- charref_named_concat_remaining_string(name, single_cp_entities[i].name, &buf);
- return buf;
+ if (name) {
+ for (i=0; i<2138; i++) {
+ if (string_starts_with(name, single_cp_entities[i].name)) {
+ buf = realloc(buf, MAX_CODEPOINT_SIZE+1);
+ len = grapheme_encode_utf8(single_cp_entities[i].cp, buf, MAX_CODEPOINT_SIZE);
+ buf[len] = 0;
+ charref_named_concat_remaining_string(name, single_cp_entities[i].name, &buf);
+ return buf;
+ }
}
- }
- for (i=0; i<93; i++) {
- if (string_starts_with(name, double_cp_entities[i].name)) {
- size_t buf_len = 0;
- buf = realloc(buf, 2*MAX_CODEPOINT_SIZE+1);
- len = grapheme_encode_utf8(double_cp_entities[i].cp[0], buf, MAX_CODEPOINT_SIZE);
- buf_len += len;
- buf += len;
- len = grapheme_encode_utf8(double_cp_entities[i].cp[1], buf, MAX_CODEPOINT_SIZE);
- buf_len += len;
- buf[buf_len] = 0;
- charref_named_concat_remaining_string(name, double_cp_entities[i].name, &buf);
- return buf;
+ for (i=0; i<93; i++) {
+ if (string_starts_with(name, double_cp_entities[i].name)) {
+ size_t buf_len = 0;
+ buf = realloc(buf, 2*MAX_CODEPOINT_SIZE+1);
+ len = grapheme_encode_utf8(double_cp_entities[i].cp[0], buf, MAX_CODEPOINT_SIZE);
+ buf_len += len;
+ buf += len;
+ len = grapheme_encode_utf8(double_cp_entities[i].cp[1], buf, MAX_CODEPOINT_SIZE);
+ buf_len += len;
+ buf[buf_len] = 0;
+ charref_named_concat_remaining_string(name, double_cp_entities[i].name, &buf);
+ return buf;
+ }
}
+ buf = realloc(buf, (strlen(name)+2) * sizeof(char));
+ buf[0] = '&';
+ buf[1] = 0;
+ strcat(buf, name);
+ return buf;
+ } else {
+ buf = realloc(buf, 2 * sizeof(char));
+ buf[0] = '&';
+ buf[1] = 0;
+ return buf;
}
- buf = realloc(buf, (strlen(name)+2) * sizeof(char));
- buf[0] = '&';
- buf[1] = 0;
- strcat(buf, name);
- return buf;
}
static struct Tag *tag_init(void)
{
struct Tag *tag = malloc(sizeof(struct Tag));
- tag->name = malloc(sizeof(char));
- tag->name[0] = 0;
- tag->inner_text = malloc(sizeof(char));
- tag->inner_text[0] = 0;
+ tag->name = NULL;
+ tag->inner_text = NULL;
tag->attrs = NULL;
tag->children = NULL;
tag->attrs_len = 0;
@@ -528,45 +526,53 @@ static void tag_find(struct Tag *tag, struct FindOpts *opts, struct TagList *fou
bool matches_tag = false;
bool matches_attr_key = false;
bool matches_attr_value = false;
- if (strcmp(tag->name, opts->tag) == 0)
- matches_tag = true;
- for (int i=0; i<tag->attrs_len; i++) {
- if (strcmp(tag->attrs[i]->name, opts->key) == 0)
- matches_attr_key = true;
- if (strcmp(tag->attrs[i]->value, opts->attr) == 0)
- matches_attr_value = true;
- }
- if (strlen(opts->tag) > 0 && strlen(opts->key) > 0 && strlen(opts->attr) > 0) {
+ if (!string_is_empty(opts->tag)) {
+ if (strcmp(tag->name, opts->tag) == 0)
+ matches_tag = true;
+ }
+ if (!string_is_empty(opts->key)) {
+ for (int i=0; i<tag->attrs_len; i++) {
+ if (strcmp(tag->attrs[i]->name, opts->key) == 0)
+ matches_attr_key = true;
+ }
+ }
+ if (!string_is_empty(opts->attr)) {
+ for (int i=0; i<tag->attrs_len; i++) {
+ if (strcmp(tag->attrs[i]->value, opts->attr) == 0)
+ matches_attr_value = true;
+ }
+ }
+ if (!string_is_empty(opts->tag) && !string_is_empty(opts->key) && !string_is_empty(opts->attr)) {
if (matches_tag && matches_attr_key && matches_attr_value) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
found_tags->len++;
}
- } else if (strlen(opts->tag) > 0 && strlen(opts->key) > 0) {
+ } else if (!string_is_empty(opts->tag) && !string_is_empty(opts->key)) {
if (matches_tag && matches_attr_key) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
found_tags->len++;
}
- } else if (strlen(opts->tag) > 0) {
+ } else if (!string_is_empty(opts->tag)) {
if (matches_tag) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
found_tags->len++;
}
- } else if (strlen(opts->key) > 0 && strlen(opts->attr) > 0) {
+ } else if (!string_is_empty(opts->key) && !string_is_empty(opts->attr)) {
if (matches_attr_key && matches_attr_value) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
found_tags->len++;
}
- } else if (strlen(opts->key) > 0) {
+ } else if (!string_is_empty(opts->key)) {
if (matches_attr_key) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
found_tags->len++;
}
- } else if (strlen(opts->attr) > 0) {
+ } else if (!string_is_empty(opts->attr)) {
if (matches_attr_value) {
found_tags->tags = realloc(found_tags->tags, (found_tags->len+1) * sizeof(struct Tag));
found_tags->tags[found_tags->len] = tag;
@@ -649,8 +655,7 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
tag_list->tags[tag_list->len] = tag;
tag_list->len++;
struct Tag *still_open_tag = tag;
- char *end_tag = malloc(sizeof(char));
- end_tag[0] = 0;
+ char *end_tag = NULL;
enum State return_to_state = STATE_INNER_TEXT;
size_t a = 0;
size_t attr_name_count = 0;
@@ -718,8 +723,7 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
if (closed_tag != NULL)
tag_set_inner_html_end_offset(closed_tag, text, off);
free(end_tag);
- end_tag = malloc(sizeof(char));
- end_tag[0] = 0;
+ end_tag = NULL;
state = STATE_INNER_TEXT;
break;
}
@@ -742,10 +746,7 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
}
if (attr_name_char_is_valid(cp)) {
if (attr_name_count != a+1) {
- tag->attrs = realloc(
- tag->attrs,
- (a+1) * sizeof(struct Attr)
- );
+ tag->attrs = realloc(tag->attrs, (a+1) * sizeof(struct Attr));
tag->attrs[a] = attr_init();
attr_name_count = a + 1;
tag->attrs_len = attr_name_count;
@@ -847,8 +848,7 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
if (closed_tag != NULL)
tag_set_inner_html_end_offset(closed_tag, text, off);
free(end_tag);
- end_tag = malloc(sizeof(char));
- end_tag[0] = 0;
+ end_tag = NULL;
state = STATE_INNER_TEXT;
break;
}
@@ -873,8 +873,7 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
if (closed_tag != NULL)
tag_set_inner_html_end_offset(closed_tag, text, off);
free(end_tag);
- end_tag = malloc(sizeof(char));
- end_tag[0] = 0;
+ end_tag = NULL;
state = STATE_INNER_TEXT;
break;
}
@@ -887,7 +886,11 @@ static struct Tag *tag_parse(struct TagList *tag_list, char *text, size_t offset
break;
}
char *named_charref = charref_named_parse(text, off, len, avs);
- off += strlen(named_charref)-1;
+ if (named_charref) {
+ off += strlen(named_charref)-1;
+ } else {
+ off--;
+ }
char *encoded_named_charref = charref_named_encode(named_charref);
if (return_to_state == STATE_INNER_TEXT) {
still_open_tag = tag_get_last_open(tag_list);
@@ -991,6 +994,8 @@ struct HTMLDocument *html_document_parse(char *buffer)
document->buffer += len;
}
document->tag = tag_parse(document->tag_list, document->buffer, 0, STATE_INNER_TEXT);
+ document->tag->name = malloc(sizeof(char));
+ document->tag->name[0] = 0;
return document;
}
diff --git a/src/misc.c b/src/misc.c
@@ -6,6 +6,14 @@
#include <grapheme.h>
#include "misc.h"
+char *cp_to_string(uint_least32_t cp, size_t len)
+{
+ char *str = malloc((len+1) * sizeof(char));
+ grapheme_encode_utf8(cp, str, len);
+ str[len] = 0;
+ return str;
+}
+
char *string_concat(char *str1, char *str2)
{
size_t len2 = strlen(str2);
@@ -20,14 +28,6 @@ char *string_concat(char *str1, char *str2)
return str1;
}
-char *cp_to_string(uint_least32_t cp, size_t len)
-{
- char *str = malloc((len+1) * sizeof(char));
- grapheme_encode_utf8(cp, str, len);
- str[len] = 0;
- return str;
-}
-
char *string_trim(char *text)
{
char *trimmed_text = NULL;
@@ -81,6 +81,14 @@ bool string_starts_with(const char *string, const char *part)
return true;
}
+bool string_is_empty(char *string)
+{
+ if (string && string[0] != 0) {
+ return false;
+ }
+ return true;
+}
+
// Do not use for reading from a socket fd
bool file_try_read(char *buf, FILE *fp)
{
diff --git a/src/misc.h b/src/misc.h
@@ -1,6 +1,7 @@
-char *string_concat(char *str1, char *str2);
char *cp_to_string(uint_least32_t cp, size_t len);
+char *string_concat(char *str1, char *str2);
char *string_trim(char *text);
bool string_starts_with(const char *string, const char *part);
+bool string_is_empty(char *string);
bool file_try_read(char *buf, FILE *fp);
char *file_read(FILE *fp);